aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-11 22:29:00 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-11 22:29:00 -0400
commitded0e250b58a27af6034a8ab9226cbcdf7c0d847 (patch)
tree3487d4920f55ec245b1493c114a48acae5f72489
parentf0c032d81f58c99b63a6e57cf883e923db910928 (diff)
parent9a07826f99034202dad589285a47132685d9538b (diff)
Merge tag 'ntb-4.3' of git://github.com/jonmason/ntb
Pull NTB fixes from Jon Mason: "NTB bug and documentation fixes, new device IDs, performance improvements, and adding a mailing list to MAINTAINERS for NTB" * tag 'ntb-4.3' of git://github.com/jonmason/ntb: NTB: Fix range check on memory window index NTB: Improve index handling in B2B MW workaround NTB: Fix documentation for ntb_peer_db_clear. NTB: Fix documentation for ntb_link_is_up NTB: Use unique DMA channels for TX and RX NTB: Remove dma_sync_wait from ntb_async_rx NTB: Clean up QP stats info NTB: Make the transport list in order of discovery NTB: Add PCI Device IDs for Broadwell Xeon NTB: Add flow control to the ntb_netdev NTB: Add list to MAINTAINERS
-rw-r--r--MAINTAINERS2
-rw-r--r--drivers/net/ntb_netdev.c77
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.c39
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_intel.h3
-rw-r--r--drivers/ntb/ntb_transport.c126
-rw-r--r--include/linux/ntb.h9
-rw-r--r--include/linux/ntb_transport.h1
7 files changed, 210 insertions, 47 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 310da4295c70..030d8724b476 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7396,6 +7396,7 @@ NTB DRIVER CORE
7396M: Jon Mason <jdmason@kudzu.us> 7396M: Jon Mason <jdmason@kudzu.us>
7397M: Dave Jiang <dave.jiang@intel.com> 7397M: Dave Jiang <dave.jiang@intel.com>
7398M: Allen Hubbe <Allen.Hubbe@emc.com> 7398M: Allen Hubbe <Allen.Hubbe@emc.com>
7399L: linux-ntb@googlegroups.com
7399S: Supported 7400S: Supported
7400W: https://github.com/jonmason/ntb/wiki 7401W: https://github.com/jonmason/ntb/wiki
7401T: git git://github.com/jonmason/ntb.git 7402T: git git://github.com/jonmason/ntb.git
@@ -7407,6 +7408,7 @@ F: include/linux/ntb_transport.h
7407NTB INTEL DRIVER 7408NTB INTEL DRIVER
7408M: Jon Mason <jdmason@kudzu.us> 7409M: Jon Mason <jdmason@kudzu.us>
7409M: Dave Jiang <dave.jiang@intel.com> 7410M: Dave Jiang <dave.jiang@intel.com>
7411L: linux-ntb@googlegroups.com
7410S: Supported 7412S: Supported
7411W: https://github.com/jonmason/ntb/wiki 7413W: https://github.com/jonmason/ntb/wiki
7412T: git git://github.com/jonmason/ntb.git 7414T: git git://github.com/jonmason/ntb.git
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index d8757bf9ad75..a9acf7156855 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -61,11 +61,21 @@ MODULE_VERSION(NTB_NETDEV_VER);
61MODULE_LICENSE("Dual BSD/GPL"); 61MODULE_LICENSE("Dual BSD/GPL");
62MODULE_AUTHOR("Intel Corporation"); 62MODULE_AUTHOR("Intel Corporation");
63 63
64/* Time in usecs for tx resource reaper */
65static unsigned int tx_time = 1;
66
67/* Number of descriptors to free before resuming tx */
68static unsigned int tx_start = 10;
69
70/* Number of descriptors still available before stop upper layer tx */
71static unsigned int tx_stop = 5;
72
64struct ntb_netdev { 73struct ntb_netdev {
65 struct list_head list; 74 struct list_head list;
66 struct pci_dev *pdev; 75 struct pci_dev *pdev;
67 struct net_device *ndev; 76 struct net_device *ndev;
68 struct ntb_transport_qp *qp; 77 struct ntb_transport_qp *qp;
78 struct timer_list tx_timer;
69}; 79};
70 80
71#define NTB_TX_TIMEOUT_MS 1000 81#define NTB_TX_TIMEOUT_MS 1000
@@ -136,11 +146,42 @@ enqueue_again:
136 } 146 }
137} 147}
138 148
149static int __ntb_netdev_maybe_stop_tx(struct net_device *netdev,
150 struct ntb_transport_qp *qp, int size)
151{
152 struct ntb_netdev *dev = netdev_priv(netdev);
153
154 netif_stop_queue(netdev);
155 /* Make sure to see the latest value of ntb_transport_tx_free_entry()
156 * since the queue was last started.
157 */
158 smp_mb();
159
160 if (likely(ntb_transport_tx_free_entry(qp) < size)) {
161 mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time));
162 return -EBUSY;
163 }
164
165 netif_start_queue(netdev);
166 return 0;
167}
168
169static int ntb_netdev_maybe_stop_tx(struct net_device *ndev,
170 struct ntb_transport_qp *qp, int size)
171{
172 if (netif_queue_stopped(ndev) ||
173 (ntb_transport_tx_free_entry(qp) >= size))
174 return 0;
175
176 return __ntb_netdev_maybe_stop_tx(ndev, qp, size);
177}
178
139static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data, 179static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
140 void *data, int len) 180 void *data, int len)
141{ 181{
142 struct net_device *ndev = qp_data; 182 struct net_device *ndev = qp_data;
143 struct sk_buff *skb; 183 struct sk_buff *skb;
184 struct ntb_netdev *dev = netdev_priv(ndev);
144 185
145 skb = data; 186 skb = data;
146 if (!skb || !ndev) 187 if (!skb || !ndev)
@@ -155,6 +196,15 @@ static void ntb_netdev_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
155 } 196 }
156 197
157 dev_kfree_skb(skb); 198 dev_kfree_skb(skb);
199
200 if (ntb_transport_tx_free_entry(dev->qp) >= tx_start) {
201 /* Make sure anybody stopping the queue after this sees the new
202 * value of ntb_transport_tx_free_entry()
203 */
204 smp_mb();
205 if (netif_queue_stopped(ndev))
206 netif_wake_queue(ndev);
207 }
158} 208}
159 209
160static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb, 210static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
@@ -163,10 +213,15 @@ static netdev_tx_t ntb_netdev_start_xmit(struct sk_buff *skb,
163 struct ntb_netdev *dev = netdev_priv(ndev); 213 struct ntb_netdev *dev = netdev_priv(ndev);
164 int rc; 214 int rc;
165 215
216 ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
217
166 rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len); 218 rc = ntb_transport_tx_enqueue(dev->qp, skb, skb->data, skb->len);
167 if (rc) 219 if (rc)
168 goto err; 220 goto err;
169 221
222 /* check for next submit */
223 ntb_netdev_maybe_stop_tx(ndev, dev->qp, tx_stop);
224
170 return NETDEV_TX_OK; 225 return NETDEV_TX_OK;
171 226
172err: 227err:
@@ -175,6 +230,23 @@ err:
175 return NETDEV_TX_BUSY; 230 return NETDEV_TX_BUSY;
176} 231}
177 232
233static void ntb_netdev_tx_timer(unsigned long data)
234{
235 struct net_device *ndev = (struct net_device *)data;
236 struct ntb_netdev *dev = netdev_priv(ndev);
237
238 if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) {
239 mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time));
240 } else {
241 /* Make sure anybody stopping the queue after this sees the new
242 * value of ntb_transport_tx_free_entry()
243 */
244 smp_mb();
245 if (netif_queue_stopped(ndev))
246 netif_wake_queue(ndev);
247 }
248}
249
178static int ntb_netdev_open(struct net_device *ndev) 250static int ntb_netdev_open(struct net_device *ndev)
179{ 251{
180 struct ntb_netdev *dev = netdev_priv(ndev); 252 struct ntb_netdev *dev = netdev_priv(ndev);
@@ -197,8 +269,11 @@ static int ntb_netdev_open(struct net_device *ndev)
197 } 269 }
198 } 270 }
199 271
272 setup_timer(&dev->tx_timer, ntb_netdev_tx_timer, (unsigned long)ndev);
273
200 netif_carrier_off(ndev); 274 netif_carrier_off(ndev);
201 ntb_transport_link_up(dev->qp); 275 ntb_transport_link_up(dev->qp);
276 netif_start_queue(ndev);
202 277
203 return 0; 278 return 0;
204 279
@@ -219,6 +294,8 @@ static int ntb_netdev_close(struct net_device *ndev)
219 while ((skb = ntb_transport_rx_remove(dev->qp, &len))) 294 while ((skb = ntb_transport_rx_remove(dev->qp, &len)))
220 dev_kfree_skb(skb); 295 dev_kfree_skb(skb);
221 296
297 del_timer_sync(&dev->tx_timer);
298
222 return 0; 299 return 0;
223} 300}
224 301
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c
index 87751cfd6f4f..865a3e3cc581 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.c
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.c
@@ -190,14 +190,17 @@ static inline int pdev_is_xeon(struct pci_dev *pdev)
190 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB: 190 case PCI_DEVICE_ID_INTEL_NTB_SS_SNB:
191 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT: 191 case PCI_DEVICE_ID_INTEL_NTB_SS_IVT:
192 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: 192 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
193 case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
193 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF: 194 case PCI_DEVICE_ID_INTEL_NTB_PS_JSF:
194 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB: 195 case PCI_DEVICE_ID_INTEL_NTB_PS_SNB:
195 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT: 196 case PCI_DEVICE_ID_INTEL_NTB_PS_IVT:
196 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: 197 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
198 case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
197 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF: 199 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
198 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB: 200 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
199 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT: 201 case PCI_DEVICE_ID_INTEL_NTB_B2B_IVT:
200 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: 202 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
203 case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
201 return 1; 204 return 1;
202 } 205 }
203 return 0; 206 return 0;
@@ -237,7 +240,7 @@ static inline int ndev_ignore_unsafe(struct intel_ntb_dev *ndev,
237 240
238static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx) 241static int ndev_mw_to_bar(struct intel_ntb_dev *ndev, int idx)
239{ 242{
240 if (idx < 0 || idx > ndev->mw_count) 243 if (idx < 0 || idx >= ndev->mw_count)
241 return -EINVAL; 244 return -EINVAL;
242 return ndev->reg->mw_bar[idx]; 245 return ndev->reg->mw_bar[idx];
243} 246}
@@ -572,10 +575,13 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf,
572 "Connection Topology -\t%s\n", 575 "Connection Topology -\t%s\n",
573 ntb_topo_string(ndev->ntb.topo)); 576 ntb_topo_string(ndev->ntb.topo));
574 577
575 off += scnprintf(buf + off, buf_size - off, 578 if (ndev->b2b_idx != UINT_MAX) {
576 "B2B Offset -\t\t%#lx\n", ndev->b2b_off); 579 off += scnprintf(buf + off, buf_size - off,
577 off += scnprintf(buf + off, buf_size - off, 580 "B2B MW Idx -\t\t%u\n", ndev->b2b_idx);
578 "B2B MW Idx -\t\t%d\n", ndev->b2b_idx); 581 off += scnprintf(buf + off, buf_size - off,
582 "B2B Offset -\t\t%#lx\n", ndev->b2b_off);
583 }
584
579 off += scnprintf(buf + off, buf_size - off, 585 off += scnprintf(buf + off, buf_size - off,
580 "BAR4 Split -\t\t%s\n", 586 "BAR4 Split -\t\t%s\n",
581 ndev->bar4_split ? "yes" : "no"); 587 ndev->bar4_split ? "yes" : "no");
@@ -1484,7 +1490,7 @@ static int xeon_setup_b2b_mw(struct intel_ntb_dev *ndev,
1484 pdev = ndev_pdev(ndev); 1490 pdev = ndev_pdev(ndev);
1485 mmio = ndev->self_mmio; 1491 mmio = ndev->self_mmio;
1486 1492
1487 if (ndev->b2b_idx >= ndev->mw_count) { 1493 if (ndev->b2b_idx == UINT_MAX) {
1488 dev_dbg(ndev_dev(ndev), "not using b2b mw\n"); 1494 dev_dbg(ndev_dev(ndev), "not using b2b mw\n");
1489 b2b_bar = 0; 1495 b2b_bar = 0;
1490 ndev->b2b_off = 0; 1496 ndev->b2b_off = 0;
@@ -1776,6 +1782,13 @@ static int xeon_init_ntb(struct intel_ntb_dev *ndev)
1776 else 1782 else
1777 ndev->b2b_idx = b2b_mw_idx; 1783 ndev->b2b_idx = b2b_mw_idx;
1778 1784
1785 if (ndev->b2b_idx >= ndev->mw_count) {
1786 dev_dbg(ndev_dev(ndev),
1787 "b2b_mw_idx %d invalid for mw_count %u\n",
1788 b2b_mw_idx, ndev->mw_count);
1789 return -EINVAL;
1790 }
1791
1779 dev_dbg(ndev_dev(ndev), 1792 dev_dbg(ndev_dev(ndev),
1780 "setting up b2b mw idx %d means %d\n", 1793 "setting up b2b mw idx %d means %d\n",
1781 b2b_mw_idx, ndev->b2b_idx); 1794 b2b_mw_idx, ndev->b2b_idx);
@@ -1843,6 +1856,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
1843 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: 1856 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
1844 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: 1857 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
1845 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: 1858 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
1859 case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
1860 case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
1861 case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
1846 ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP; 1862 ndev->hwerr_flags |= NTB_HWERR_SDOORBELL_LOCKUP;
1847 break; 1863 break;
1848 } 1864 }
@@ -1857,6 +1873,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
1857 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: 1873 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
1858 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: 1874 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
1859 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: 1875 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
1876 case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
1877 case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
1878 case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
1860 ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP; 1879 ndev->hwerr_flags |= NTB_HWERR_SB01BASE_LOCKUP;
1861 break; 1880 break;
1862 } 1881 }
@@ -1878,6 +1897,9 @@ static int xeon_init_dev(struct intel_ntb_dev *ndev)
1878 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX: 1897 case PCI_DEVICE_ID_INTEL_NTB_SS_HSX:
1879 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX: 1898 case PCI_DEVICE_ID_INTEL_NTB_PS_HSX:
1880 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX: 1899 case PCI_DEVICE_ID_INTEL_NTB_B2B_HSX:
1900 case PCI_DEVICE_ID_INTEL_NTB_SS_BDX:
1901 case PCI_DEVICE_ID_INTEL_NTB_PS_BDX:
1902 case PCI_DEVICE_ID_INTEL_NTB_B2B_BDX:
1881 ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14; 1903 ndev->hwerr_flags |= NTB_HWERR_B2BDOORBELL_BIT14;
1882 break; 1904 break;
1883 } 1905 }
@@ -1996,7 +2018,7 @@ static inline void ndev_init_struct(struct intel_ntb_dev *ndev,
1996 ndev->ntb.ops = &intel_ntb_ops; 2018 ndev->ntb.ops = &intel_ntb_ops;
1997 2019
1998 ndev->b2b_off = 0; 2020 ndev->b2b_off = 0;
1999 ndev->b2b_idx = INT_MAX; 2021 ndev->b2b_idx = UINT_MAX;
2000 2022
2001 ndev->bar4_split = 0; 2023 ndev->bar4_split = 0;
2002 2024
@@ -2234,14 +2256,17 @@ static const struct pci_device_id intel_ntb_pci_tbl[] = {
2234 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)}, 2256 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
2235 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)}, 2257 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_IVT)},
2236 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)}, 2258 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_HSX)},
2259 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BDX)},
2237 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)}, 2260 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_JSF)},
2238 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)}, 2261 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_SNB)},
2239 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)}, 2262 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_IVT)},
2240 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)}, 2263 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_HSX)},
2264 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_PS_BDX)},
2241 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)}, 2265 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_JSF)},
2242 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)}, 2266 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_SNB)},
2243 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)}, 2267 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_IVT)},
2244 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)}, 2268 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_HSX)},
2269 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_SS_BDX)},
2245 {0} 2270 {0}
2246}; 2271};
2247MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl); 2272MODULE_DEVICE_TABLE(pci, intel_ntb_pci_tbl);
diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.h b/drivers/ntb/hw/intel/ntb_hw_intel.h
index 7ddaf387b679..ea0612f797df 100644
--- a/drivers/ntb/hw/intel/ntb_hw_intel.h
+++ b/drivers/ntb/hw/intel/ntb_hw_intel.h
@@ -67,6 +67,9 @@
67#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E 67#define PCI_DEVICE_ID_INTEL_NTB_PS_HSX 0x2F0E
68#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F 68#define PCI_DEVICE_ID_INTEL_NTB_SS_HSX 0x2F0F
69#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E 69#define PCI_DEVICE_ID_INTEL_NTB_B2B_BWD 0x0C4E
70#define PCI_DEVICE_ID_INTEL_NTB_B2B_BDX 0x6F0D
71#define PCI_DEVICE_ID_INTEL_NTB_PS_BDX 0x6F0E
72#define PCI_DEVICE_ID_INTEL_NTB_SS_BDX 0x6F0F
70 73
71/* Intel Xeon hardware */ 74/* Intel Xeon hardware */
72 75
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index 1c6386d5f79c..6e3ee907d186 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -119,7 +119,8 @@ struct ntb_transport_qp {
119 struct ntb_transport_ctx *transport; 119 struct ntb_transport_ctx *transport;
120 struct ntb_dev *ndev; 120 struct ntb_dev *ndev;
121 void *cb_data; 121 void *cb_data;
122 struct dma_chan *dma_chan; 122 struct dma_chan *tx_dma_chan;
123 struct dma_chan *rx_dma_chan;
123 124
124 bool client_ready; 125 bool client_ready;
125 bool link_is_up; 126 bool link_is_up;
@@ -297,7 +298,7 @@ static LIST_HEAD(ntb_transport_list);
297 298
298static int ntb_bus_init(struct ntb_transport_ctx *nt) 299static int ntb_bus_init(struct ntb_transport_ctx *nt)
299{ 300{
300 list_add(&nt->entry, &ntb_transport_list); 301 list_add_tail(&nt->entry, &ntb_transport_list);
301 return 0; 302 return 0;
302} 303}
303 304
@@ -452,7 +453,7 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
452 453
453 out_offset = 0; 454 out_offset = 0;
454 out_offset += snprintf(buf + out_offset, out_count - out_offset, 455 out_offset += snprintf(buf + out_offset, out_count - out_offset,
455 "NTB QP stats\n"); 456 "\nNTB QP stats:\n\n");
456 out_offset += snprintf(buf + out_offset, out_count - out_offset, 457 out_offset += snprintf(buf + out_offset, out_count - out_offset,
457 "rx_bytes - \t%llu\n", qp->rx_bytes); 458 "rx_bytes - \t%llu\n", qp->rx_bytes);
458 out_offset += snprintf(buf + out_offset, out_count - out_offset, 459 out_offset += snprintf(buf + out_offset, out_count - out_offset,
@@ -470,11 +471,11 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
470 out_offset += snprintf(buf + out_offset, out_count - out_offset, 471 out_offset += snprintf(buf + out_offset, out_count - out_offset,
471 "rx_err_ver - \t%llu\n", qp->rx_err_ver); 472 "rx_err_ver - \t%llu\n", qp->rx_err_ver);
472 out_offset += snprintf(buf + out_offset, out_count - out_offset, 473 out_offset += snprintf(buf + out_offset, out_count - out_offset,
473 "rx_buff - \t%p\n", qp->rx_buff); 474 "rx_buff - \t0x%p\n", qp->rx_buff);
474 out_offset += snprintf(buf + out_offset, out_count - out_offset, 475 out_offset += snprintf(buf + out_offset, out_count - out_offset,
475 "rx_index - \t%u\n", qp->rx_index); 476 "rx_index - \t%u\n", qp->rx_index);
476 out_offset += snprintf(buf + out_offset, out_count - out_offset, 477 out_offset += snprintf(buf + out_offset, out_count - out_offset,
477 "rx_max_entry - \t%u\n", qp->rx_max_entry); 478 "rx_max_entry - \t%u\n\n", qp->rx_max_entry);
478 479
479 out_offset += snprintf(buf + out_offset, out_count - out_offset, 480 out_offset += snprintf(buf + out_offset, out_count - out_offset,
480 "tx_bytes - \t%llu\n", qp->tx_bytes); 481 "tx_bytes - \t%llu\n", qp->tx_bytes);
@@ -489,15 +490,32 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count,
489 out_offset += snprintf(buf + out_offset, out_count - out_offset, 490 out_offset += snprintf(buf + out_offset, out_count - out_offset,
490 "tx_err_no_buf - %llu\n", qp->tx_err_no_buf); 491 "tx_err_no_buf - %llu\n", qp->tx_err_no_buf);
491 out_offset += snprintf(buf + out_offset, out_count - out_offset, 492 out_offset += snprintf(buf + out_offset, out_count - out_offset,
492 "tx_mw - \t%p\n", qp->tx_mw); 493 "tx_mw - \t0x%p\n", qp->tx_mw);
493 out_offset += snprintf(buf + out_offset, out_count - out_offset, 494 out_offset += snprintf(buf + out_offset, out_count - out_offset,
494 "tx_index - \t%u\n", qp->tx_index); 495 "tx_index (H) - \t%u\n", qp->tx_index);
496 out_offset += snprintf(buf + out_offset, out_count - out_offset,
497 "RRI (T) - \t%u\n",
498 qp->remote_rx_info->entry);
495 out_offset += snprintf(buf + out_offset, out_count - out_offset, 499 out_offset += snprintf(buf + out_offset, out_count - out_offset,
496 "tx_max_entry - \t%u\n", qp->tx_max_entry); 500 "tx_max_entry - \t%u\n", qp->tx_max_entry);
501 out_offset += snprintf(buf + out_offset, out_count - out_offset,
502 "free tx - \t%u\n",
503 ntb_transport_tx_free_entry(qp));
497 504
498 out_offset += snprintf(buf + out_offset, out_count - out_offset, 505 out_offset += snprintf(buf + out_offset, out_count - out_offset,
499 "\nQP Link %s\n", 506 "\n");
507 out_offset += snprintf(buf + out_offset, out_count - out_offset,
508 "Using TX DMA - \t%s\n",
509 qp->tx_dma_chan ? "Yes" : "No");
510 out_offset += snprintf(buf + out_offset, out_count - out_offset,
511 "Using RX DMA - \t%s\n",
512 qp->rx_dma_chan ? "Yes" : "No");
513 out_offset += snprintf(buf + out_offset, out_count - out_offset,
514 "QP Link - \t%s\n",
500 qp->link_is_up ? "Up" : "Down"); 515 qp->link_is_up ? "Up" : "Down");
516 out_offset += snprintf(buf + out_offset, out_count - out_offset,
517 "\n");
518
501 if (out_offset > out_count) 519 if (out_offset > out_count)
502 out_offset = out_count; 520 out_offset = out_count;
503 521
@@ -535,6 +553,7 @@ static struct ntb_queue_entry *ntb_list_rm(spinlock_t *lock,
535 } 553 }
536 entry = list_first_entry(list, struct ntb_queue_entry, entry); 554 entry = list_first_entry(list, struct ntb_queue_entry, entry);
537 list_del(&entry->entry); 555 list_del(&entry->entry);
556
538out: 557out:
539 spin_unlock_irqrestore(lock, flags); 558 spin_unlock_irqrestore(lock, flags);
540 559
@@ -1206,7 +1225,7 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
1206{ 1225{
1207 struct dma_async_tx_descriptor *txd; 1226 struct dma_async_tx_descriptor *txd;
1208 struct ntb_transport_qp *qp = entry->qp; 1227 struct ntb_transport_qp *qp = entry->qp;
1209 struct dma_chan *chan = qp->dma_chan; 1228 struct dma_chan *chan = qp->rx_dma_chan;
1210 struct dma_device *device; 1229 struct dma_device *device;
1211 size_t pay_off, buff_off, len; 1230 size_t pay_off, buff_off, len;
1212 struct dmaengine_unmap_data *unmap; 1231 struct dmaengine_unmap_data *unmap;
@@ -1219,18 +1238,18 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset)
1219 goto err; 1238 goto err;
1220 1239
1221 if (len < copy_bytes) 1240 if (len < copy_bytes)
1222 goto err_wait; 1241 goto err;
1223 1242
1224 device = chan->device; 1243 device = chan->device;
1225 pay_off = (size_t)offset & ~PAGE_MASK; 1244 pay_off = (size_t)offset & ~PAGE_MASK;
1226 buff_off = (size_t)buf & ~PAGE_MASK; 1245 buff_off = (size_t)buf & ~PAGE_MASK;
1227 1246
1228 if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) 1247 if (!is_dma_copy_aligned(device, pay_off, buff_off, len))
1229 goto err_wait; 1248 goto err;
1230 1249
1231 unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); 1250 unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
1232 if (!unmap) 1251 if (!unmap)
1233 goto err_wait; 1252 goto err;
1234 1253
1235 unmap->len = len; 1254 unmap->len = len;
1236 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), 1255 unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset),
@@ -1273,12 +1292,6 @@ err_set_unmap:
1273 dmaengine_unmap_put(unmap); 1292 dmaengine_unmap_put(unmap);
1274err_get_unmap: 1293err_get_unmap:
1275 dmaengine_unmap_put(unmap); 1294 dmaengine_unmap_put(unmap);
1276err_wait:
1277 /* If the callbacks come out of order, the writing of the index to the
1278 * last completed will be out of order. This may result in the
1279 * receive stalling forever.
1280 */
1281 dma_sync_wait(chan, qp->last_cookie);
1282err: 1295err:
1283 ntb_memcpy_rx(entry, offset); 1296 ntb_memcpy_rx(entry, offset);
1284 qp->rx_memcpy++; 1297 qp->rx_memcpy++;
@@ -1373,8 +1386,8 @@ static void ntb_transport_rxc_db(unsigned long data)
1373 break; 1386 break;
1374 } 1387 }
1375 1388
1376 if (i && qp->dma_chan) 1389 if (i && qp->rx_dma_chan)
1377 dma_async_issue_pending(qp->dma_chan); 1390 dma_async_issue_pending(qp->rx_dma_chan);
1378 1391
1379 if (i == qp->rx_max_entry) { 1392 if (i == qp->rx_max_entry) {
1380 /* there is more work to do */ 1393 /* there is more work to do */
@@ -1441,7 +1454,7 @@ static void ntb_async_tx(struct ntb_transport_qp *qp,
1441{ 1454{
1442 struct ntb_payload_header __iomem *hdr; 1455 struct ntb_payload_header __iomem *hdr;
1443 struct dma_async_tx_descriptor *txd; 1456 struct dma_async_tx_descriptor *txd;
1444 struct dma_chan *chan = qp->dma_chan; 1457 struct dma_chan *chan = qp->tx_dma_chan;
1445 struct dma_device *device; 1458 struct dma_device *device;
1446 size_t dest_off, buff_off; 1459 size_t dest_off, buff_off;
1447 struct dmaengine_unmap_data *unmap; 1460 struct dmaengine_unmap_data *unmap;
@@ -1634,14 +1647,27 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
1634 dma_cap_set(DMA_MEMCPY, dma_mask); 1647 dma_cap_set(DMA_MEMCPY, dma_mask);
1635 1648
1636 if (use_dma) { 1649 if (use_dma) {
1637 qp->dma_chan = dma_request_channel(dma_mask, ntb_dma_filter_fn, 1650 qp->tx_dma_chan =
1638 (void *)(unsigned long)node); 1651 dma_request_channel(dma_mask, ntb_dma_filter_fn,
1639 if (!qp->dma_chan) 1652 (void *)(unsigned long)node);
1640 dev_info(&pdev->dev, "Unable to allocate DMA channel\n"); 1653 if (!qp->tx_dma_chan)
1654 dev_info(&pdev->dev, "Unable to allocate TX DMA channel\n");
1655
1656 qp->rx_dma_chan =
1657 dma_request_channel(dma_mask, ntb_dma_filter_fn,
1658 (void *)(unsigned long)node);
1659 if (!qp->rx_dma_chan)
1660 dev_info(&pdev->dev, "Unable to allocate RX DMA channel\n");
1641 } else { 1661 } else {
1642 qp->dma_chan = NULL; 1662 qp->tx_dma_chan = NULL;
1663 qp->rx_dma_chan = NULL;
1643 } 1664 }
1644 dev_dbg(&pdev->dev, "Using %s memcpy\n", qp->dma_chan ? "DMA" : "CPU"); 1665
1666 dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
1667 qp->tx_dma_chan ? "DMA" : "CPU");
1668
1669 dev_dbg(&pdev->dev, "Using %s memcpy for RX\n",
1670 qp->rx_dma_chan ? "DMA" : "CPU");
1645 1671
1646 for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { 1672 for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
1647 entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); 1673 entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node);
@@ -1676,8 +1702,10 @@ err2:
1676err1: 1702err1:
1677 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) 1703 while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
1678 kfree(entry); 1704 kfree(entry);
1679 if (qp->dma_chan) 1705 if (qp->tx_dma_chan)
1680 dma_release_channel(qp->dma_chan); 1706 dma_release_channel(qp->tx_dma_chan);
1707 if (qp->rx_dma_chan)
1708 dma_release_channel(qp->rx_dma_chan);
1681 nt->qp_bitmap_free |= qp_bit; 1709 nt->qp_bitmap_free |= qp_bit;
1682err: 1710err:
1683 return NULL; 1711 return NULL;
@@ -1701,12 +1729,27 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
1701 1729
1702 pdev = qp->ndev->pdev; 1730 pdev = qp->ndev->pdev;
1703 1731
1704 if (qp->dma_chan) { 1732 if (qp->tx_dma_chan) {
1705 struct dma_chan *chan = qp->dma_chan; 1733 struct dma_chan *chan = qp->tx_dma_chan;
1734 /* Putting the dma_chan to NULL will force any new traffic to be
1735 * processed by the CPU instead of the DAM engine
1736 */
1737 qp->tx_dma_chan = NULL;
1738
1739 /* Try to be nice and wait for any queued DMA engine
1740 * transactions to process before smashing it with a rock
1741 */
1742 dma_sync_wait(chan, qp->last_cookie);
1743 dmaengine_terminate_all(chan);
1744 dma_release_channel(chan);
1745 }
1746
1747 if (qp->rx_dma_chan) {
1748 struct dma_chan *chan = qp->rx_dma_chan;
1706 /* Putting the dma_chan to NULL will force any new traffic to be 1749 /* Putting the dma_chan to NULL will force any new traffic to be
1707 * processed by the CPU instead of the DAM engine 1750 * processed by the CPU instead of the DAM engine
1708 */ 1751 */
1709 qp->dma_chan = NULL; 1752 qp->rx_dma_chan = NULL;
1710 1753
1711 /* Try to be nice and wait for any queued DMA engine 1754 /* Try to be nice and wait for any queued DMA engine
1712 * transactions to process before smashing it with a rock 1755 * transactions to process before smashing it with a rock
@@ -1843,7 +1886,7 @@ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
1843 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q); 1886 entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
1844 if (!entry) { 1887 if (!entry) {
1845 qp->tx_err_no_buf++; 1888 qp->tx_err_no_buf++;
1846 return -ENOMEM; 1889 return -EBUSY;
1847 } 1890 }
1848 1891
1849 entry->cb_data = cb; 1892 entry->cb_data = cb;
@@ -1954,21 +1997,34 @@ EXPORT_SYMBOL_GPL(ntb_transport_qp_num);
1954unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp) 1997unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp)
1955{ 1998{
1956 unsigned int max; 1999 unsigned int max;
2000 unsigned int copy_align;
1957 2001
1958 if (!qp) 2002 if (!qp)
1959 return 0; 2003 return 0;
1960 2004
1961 if (!qp->dma_chan) 2005 if (!qp->tx_dma_chan && !qp->rx_dma_chan)
1962 return qp->tx_max_frame - sizeof(struct ntb_payload_header); 2006 return qp->tx_max_frame - sizeof(struct ntb_payload_header);
1963 2007
2008 copy_align = max(qp->tx_dma_chan->device->copy_align,
2009 qp->rx_dma_chan->device->copy_align);
2010
1964 /* If DMA engine usage is possible, try to find the max size for that */ 2011 /* If DMA engine usage is possible, try to find the max size for that */
1965 max = qp->tx_max_frame - sizeof(struct ntb_payload_header); 2012 max = qp->tx_max_frame - sizeof(struct ntb_payload_header);
1966 max -= max % (1 << qp->dma_chan->device->copy_align); 2013 max -= max % (1 << copy_align);
1967 2014
1968 return max; 2015 return max;
1969} 2016}
1970EXPORT_SYMBOL_GPL(ntb_transport_max_size); 2017EXPORT_SYMBOL_GPL(ntb_transport_max_size);
1971 2018
2019unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp)
2020{
2021 unsigned int head = qp->tx_index;
2022 unsigned int tail = qp->remote_rx_info->entry;
2023
2024 return tail > head ? tail - head : qp->tx_max_entry + tail - head;
2025}
2026EXPORT_SYMBOL_GPL(ntb_transport_tx_free_entry);
2027
1972static void ntb_transport_doorbell_callback(void *data, int vector) 2028static void ntb_transport_doorbell_callback(void *data, int vector)
1973{ 2029{
1974 struct ntb_transport_ctx *nt = data; 2030 struct ntb_transport_ctx *nt = data;
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index b02f72bb8e32..f798e2afba88 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -522,10 +522,9 @@ static inline int ntb_mw_clear_trans(struct ntb_dev *ntb, int idx)
522 * @speed: OUT - The link speed expressed as PCIe generation number. 522 * @speed: OUT - The link speed expressed as PCIe generation number.
523 * @width: OUT - The link width expressed as the number of PCIe lanes. 523 * @width: OUT - The link width expressed as the number of PCIe lanes.
524 * 524 *
525 * Set the translation of a memory window. The peer may access local memory 525 * Get the current state of the ntb link. It is recommended to query the link
526 * through the window starting at the address, up to the size. The address 526 * state once after every link event. It is safe to query the link state in
527 * must be aligned to the alignment specified by ntb_mw_get_range(). The size 527 * the context of the link event callback.
528 * must be aligned to the size alignment specified by ntb_mw_get_range().
529 * 528 *
530 * Return: One if the link is up, zero if the link is down, otherwise a 529 * Return: One if the link is up, zero if the link is down, otherwise a
531 * negative value indicating the error number. 530 * negative value indicating the error number.
@@ -795,7 +794,7 @@ static inline int ntb_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
795} 794}
796 795
797/** 796/**
798 * ntb_peer_db_clear() - clear bits in the local doorbell register 797 * ntb_peer_db_clear() - clear bits in the peer doorbell register
799 * @ntb: NTB device context. 798 * @ntb: NTB device context.
800 * @db_bits: Doorbell bits to clear. 799 * @db_bits: Doorbell bits to clear.
801 * 800 *
diff --git a/include/linux/ntb_transport.h b/include/linux/ntb_transport.h
index 2862861366a5..7243eb98a722 100644
--- a/include/linux/ntb_transport.h
+++ b/include/linux/ntb_transport.h
@@ -83,3 +83,4 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len);
83void ntb_transport_link_up(struct ntb_transport_qp *qp); 83void ntb_transport_link_up(struct ntb_transport_qp *qp);
84void ntb_transport_link_down(struct ntb_transport_qp *qp); 84void ntb_transport_link_down(struct ntb_transport_qp *qp);
85bool ntb_transport_link_query(struct ntb_transport_qp *qp); 85bool ntb_transport_link_query(struct ntb_transport_qp *qp);
86unsigned int ntb_transport_tx_free_entry(struct ntb_transport_qp *qp);