summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLogan Gunthorpe <logang@deltatee.com>2019-05-23 18:30:56 -0400
committerJon Mason <jdmason@kudzu.us>2019-06-13 09:02:33 -0400
commit26b3a37b928457ba2cd98eaf6d7b0feca5a30fa6 (patch)
tree828a124eb32052fd1603bb77f072a4b3fab1d27a
parentd217e07b32a6750e44f529e0218898c024b2c637 (diff)
NTB: Introduce MSI library
The NTB MSI library allows passing MSI interrupts across a memory window. This offers similar functionality to doorbells or messages except will often have much better latency and the client can potentially use significantly more remote interrupts than typical hardware provides for doorbells. (Which can be important in high-multiport setups.) The library utilizes one memory window per peer and uses the highest index memory windows. Before any ntb_msi function may be used, the user must call ntb_msi_init(). It may then setup and tear down the memory windows when the link state changes using ntb_msi_setup_mws() and ntb_msi_clear_mws(). The peer which receives the interrupt must call ntb_msim_request_irq() to assign the interrupt handler (this function is functionally similar to devm_request_irq()) and the returned descriptor must be transferred to the peer which can use it to trigger the interrupt. The triggering peer, once having received the descriptor, can trigger the interrupt by calling ntb_msi_peer_trigger(). Signed-off-by: Logan Gunthorpe <logang@deltatee.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Allen Hubbe <allenbh@gmail.com> Signed-off-by: Jon Mason <jdmason@kudzu.us>
-rw-r--r--drivers/ntb/Kconfig11
-rw-r--r--drivers/ntb/Makefile3
-rw-r--r--drivers/ntb/msi.c415
-rw-r--r--include/linux/ntb.h73
4 files changed, 501 insertions, 1 deletions
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index 95944e52fa36..5760764052be 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -12,6 +12,17 @@ menuconfig NTB
12 12
13if NTB 13if NTB
14 14
15config NTB_MSI
16 bool "MSI Interrupt Support"
17 depends on PCI_MSI
18 help
19 Support using MSI interrupt forwarding instead of (or in addition to)
20 hardware doorbells. MSI interrupts typically offer lower latency
21 than doorbells and more MSI interrupts can be made available to
22 clients. However this requires an extra memory window and support
23 in the hardware driver for creating the MSI interrupts.
24
25 If unsure, say N.
15source "drivers/ntb/hw/Kconfig" 26source "drivers/ntb/hw/Kconfig"
16 27
17source "drivers/ntb/test/Kconfig" 28source "drivers/ntb/test/Kconfig"
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 537226f8e78d..cc27ad2ef150 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_NTB) += ntb.o hw/ test/ 1obj-$(CONFIG_NTB) += ntb.o hw/ test/
2obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o 2obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
3 3
4ntb-y := core.o 4ntb-y := core.o
5ntb-$(CONFIG_NTB_MSI) += msi.o
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
new file mode 100644
index 000000000000..9dddf133658f
--- /dev/null
+++ b/drivers/ntb/msi.c
@@ -0,0 +1,415 @@
1// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
2
3#include <linux/irq.h>
4#include <linux/module.h>
5#include <linux/ntb.h>
6#include <linux/msi.h>
7#include <linux/pci.h>
8
9MODULE_LICENSE("Dual BSD/GPL");
10MODULE_VERSION("0.1");
11MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
12MODULE_DESCRIPTION("NTB MSI Interrupt Library");
13
14struct ntb_msi {
15 u64 base_addr;
16 u64 end_addr;
17
18 void (*desc_changed)(void *ctx);
19
20 u32 __iomem *peer_mws[];
21};
22
23/**
24 * ntb_msi_init() - Initialize the MSI context
25 * @ntb: NTB device context
26 *
27 * This function must be called before any other ntb_msi function.
28 * It initializes the context for MSI operations and maps
29 * the peer memory windows.
30 *
31 * This function reserves the last N outbound memory windows (where N
32 * is the number of peers).
33 *
34 * Return: Zero on success, otherwise a negative error number.
35 */
36int ntb_msi_init(struct ntb_dev *ntb,
37 void (*desc_changed)(void *ctx))
38{
39 phys_addr_t mw_phys_addr;
40 resource_size_t mw_size;
41 size_t struct_size;
42 int peer_widx;
43 int peers;
44 int ret;
45 int i;
46
47 peers = ntb_peer_port_count(ntb);
48 if (peers <= 0)
49 return -EINVAL;
50
51 struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
52
53 ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
54 if (!ntb->msi)
55 return -ENOMEM;
56
57 ntb->msi->desc_changed = desc_changed;
58
59 for (i = 0; i < peers; i++) {
60 peer_widx = ntb_peer_mw_count(ntb) - 1 - i;
61
62 ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr,
63 &mw_size);
64 if (ret)
65 goto unroll;
66
67 ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr,
68 mw_size);
69 if (!ntb->msi->peer_mws[i]) {
70 ret = -EFAULT;
71 goto unroll;
72 }
73 }
74
75 return 0;
76
77unroll:
78 for (i = 0; i < peers; i++)
79 if (ntb->msi->peer_mws[i])
80 devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]);
81
82 devm_kfree(&ntb->dev, ntb->msi);
83 ntb->msi = NULL;
84 return ret;
85}
86EXPORT_SYMBOL(ntb_msi_init);
87
88/**
89 * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows
90 * @ntb: NTB device context
91 *
92 * This function sets up the required inbound memory windows. It should be
93 * called from a work function after a link up event.
94 *
95 * Over the entire network, this function will reserves the last N
96 * inbound memory windows for each peer (where N is the number of peers).
97 *
98 * ntb_msi_init() must be called before this function.
99 *
100 * Return: Zero on success, otherwise a negative error number.
101 */
102int ntb_msi_setup_mws(struct ntb_dev *ntb)
103{
104 struct msi_desc *desc;
105 u64 addr;
106 int peer, peer_widx;
107 resource_size_t addr_align, size_align, size_max;
108 resource_size_t mw_size = SZ_32K;
109 resource_size_t mw_min_size = mw_size;
110 int i;
111 int ret;
112
113 if (!ntb->msi)
114 return -EINVAL;
115
116 desc = first_msi_entry(&ntb->pdev->dev);
117 addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
118
119 for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
120 peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
121 if (peer_widx < 0)
122 return peer_widx;
123
124 ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align,
125 NULL, NULL);
126 if (ret)
127 return ret;
128
129 addr &= ~(addr_align - 1);
130 }
131
132 for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
133 peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
134 if (peer_widx < 0) {
135 ret = peer_widx;
136 goto error_out;
137 }
138
139 ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL,
140 &size_align, &size_max);
141 if (ret)
142 goto error_out;
143
144 mw_size = round_up(mw_size, size_align);
145 mw_size = max(mw_size, size_max);
146 if (mw_size < mw_min_size)
147 mw_min_size = mw_size;
148
149 ret = ntb_mw_set_trans(ntb, peer, peer_widx,
150 addr, mw_size);
151 if (ret)
152 goto error_out;
153 }
154
155 ntb->msi->base_addr = addr;
156 ntb->msi->end_addr = addr + mw_min_size;
157
158 return 0;
159
160error_out:
161 for (i = 0; i < peer; i++) {
162 peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
163 if (peer_widx < 0)
164 continue;
165
166 ntb_mw_clear_trans(ntb, i, peer_widx);
167 }
168
169 return ret;
170}
171EXPORT_SYMBOL(ntb_msi_setup_mws);
172
173/**
174 * ntb_msi_clear_mws() - Clear all inbound memory windows
175 * @ntb: NTB device context
176 *
177 * This function tears down the resources used by ntb_msi_setup_mws().
178 */
179void ntb_msi_clear_mws(struct ntb_dev *ntb)
180{
181 int peer;
182 int peer_widx;
183
184 for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
185 peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
186 if (peer_widx < 0)
187 continue;
188
189 ntb_mw_clear_trans(ntb, peer, peer_widx);
190 }
191}
192EXPORT_SYMBOL(ntb_msi_clear_mws);
193
194struct ntb_msi_devres {
195 struct ntb_dev *ntb;
196 struct msi_desc *entry;
197 struct ntb_msi_desc *msi_desc;
198};
199
200static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry,
201 struct ntb_msi_desc *msi_desc)
202{
203 u64 addr;
204
205 addr = entry->msg.address_lo +
206 ((uint64_t)entry->msg.address_hi << 32);
207
208 if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) {
209 dev_warn_once(&ntb->dev,
210 "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n",
211 entry->irq, addr, ntb->msi->base_addr,
212 ntb->msi->end_addr);
213 return -EFAULT;
214 }
215
216 msi_desc->addr_offset = addr - ntb->msi->base_addr;
217 msi_desc->data = entry->msg.data;
218
219 return 0;
220}
221
222static void ntb_msi_write_msg(struct msi_desc *entry, void *data)
223{
224 struct ntb_msi_devres *dr = data;
225
226 WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc));
227
228 if (dr->ntb->msi->desc_changed)
229 dr->ntb->msi->desc_changed(dr->ntb->ctx);
230}
231
232static void ntbm_msi_callback_release(struct device *dev, void *res)
233{
234 struct ntb_msi_devres *dr = res;
235
236 dr->entry->write_msi_msg = NULL;
237 dr->entry->write_msi_msg_data = NULL;
238}
239
240static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
241 struct ntb_msi_desc *msi_desc)
242{
243 struct ntb_msi_devres *dr;
244
245 dr = devres_alloc(ntbm_msi_callback_release,
246 sizeof(struct ntb_msi_devres), GFP_KERNEL);
247 if (!dr)
248 return -ENOMEM;
249
250 dr->ntb = ntb;
251 dr->entry = entry;
252 dr->msi_desc = msi_desc;
253
254 devres_add(&ntb->dev, dr);
255
256 dr->entry->write_msi_msg = ntb_msi_write_msg;
257 dr->entry->write_msi_msg_data = dr;
258
259 return 0;
260}
261
262/**
263 * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt
264 * @ntb: NTB device context
265 * @handler: Function to be called when the IRQ occurs
266 * @thread_fn: Function to be called in a threaded interrupt context. NULL
267 * for clients which handle everything in @handler
268 * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
269 * @dev_id: A cookie passed back to the handler function
270 *
271 * This function assigns an interrupt handler to an unused
272 * MSI interrupt and returns the descriptor used to trigger
273 * it. The descriptor can then be sent to a peer to trigger
274 * the interrupt.
275 *
276 * The interrupt resource is managed with devres so it will
277 * be automatically freed when the NTB device is torn down.
278 *
279 * If an IRQ allocated with this function needs to be freed
280 * separately, ntbm_free_irq() must be used.
281 *
282 * Return: IRQ number assigned on success, otherwise a negative error number.
283 */
284int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
285 irq_handler_t thread_fn,
286 const char *name, void *dev_id,
287 struct ntb_msi_desc *msi_desc)
288{
289 struct msi_desc *entry;
290 struct irq_desc *desc;
291 int ret;
292
293 if (!ntb->msi)
294 return -EINVAL;
295
296 for_each_pci_msi_entry(entry, ntb->pdev) {
297 desc = irq_to_desc(entry->irq);
298 if (desc->action)
299 continue;
300
301 ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
302 thread_fn, 0, name, dev_id);
303 if (ret)
304 continue;
305
306 if (ntb_msi_set_desc(ntb, entry, msi_desc)) {
307 devm_free_irq(&ntb->dev, entry->irq, dev_id);
308 continue;
309 }
310
311 ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
312 if (ret) {
313 devm_free_irq(&ntb->dev, entry->irq, dev_id);
314 return ret;
315 }
316
317
318 return entry->irq;
319 }
320
321 return -ENODEV;
322}
323EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
324
325static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
326{
327 struct ntb_dev *ntb = dev_ntb(dev);
328 struct ntb_msi_devres *dr = res;
329
330 return dr->ntb == ntb && dr->entry == data;
331}
332
333/**
334 * ntbm_msi_free_irq() - free an interrupt
335 * @ntb: NTB device context
336 * @irq: Interrupt line to free
337 * @dev_id: Device identity to free
338 *
339 * This function should be used to manually free IRQs allocated with
340 * ntbm_request_[threaded_]irq().
341 */
342void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
343{
344 struct msi_desc *entry = irq_get_msi_desc(irq);
345
346 entry->write_msi_msg = NULL;
347 entry->write_msi_msg_data = NULL;
348
349 WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
350 ntbm_msi_callback_match, entry));
351
352 devm_free_irq(&ntb->dev, irq, dev_id);
353}
354EXPORT_SYMBOL(ntbm_msi_free_irq);
355
356/**
357 * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
358 * @ntb: NTB device context
359 * @peer: Peer index
360 * @desc: MSI descriptor data which triggers the interrupt
361 *
362 * This function triggers an interrupt on a peer. It requires
363 * the descriptor structure to have been passed from that peer
364 * by some other means.
365 *
366 * Return: Zero on success, otherwise a negative error number.
367 */
368int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
369 struct ntb_msi_desc *desc)
370{
371 int idx;
372
373 if (!ntb->msi)
374 return -EINVAL;
375
376 idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]);
377
378 iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]);
379
380 return 0;
381}
382EXPORT_SYMBOL(ntb_msi_peer_trigger);
383
384/**
385 * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
386 * @ntb: NTB device context
387 * @peer: Peer index
388 * @desc: MSI descriptor data which triggers the interrupt
389 * @msi_addr: Physical address to trigger the interrupt
390 *
391 * This function allows using DMA engines to trigger an interrupt
392 * (for example, trigger an interrupt to process the data after
393 * sending it). To trigger the interrupt, write @desc.data to the address
394 * returned in @msi_addr
395 *
396 * Return: Zero on success, otherwise a negative error number.
397 */
398int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
399 struct ntb_msi_desc *desc,
400 phys_addr_t *msi_addr)
401{
402 int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
403 phys_addr_t mw_phys_addr;
404 int ret;
405
406 ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
407 if (ret)
408 return ret;
409
410 if (msi_addr)
411 *msi_addr = mw_phys_addr + desc->addr_offset;
412
413 return 0;
414}
415EXPORT_SYMBOL(ntb_msi_peer_addr);
diff --git a/include/linux/ntb.h b/include/linux/ntb.h
index bed421b9579b..8c13538aeffe 100644
--- a/include/linux/ntb.h
+++ b/include/linux/ntb.h
@@ -58,9 +58,11 @@
58 58
59#include <linux/completion.h> 59#include <linux/completion.h>
60#include <linux/device.h> 60#include <linux/device.h>
61#include <linux/interrupt.h>
61 62
62struct ntb_client; 63struct ntb_client;
63struct ntb_dev; 64struct ntb_dev;
65struct ntb_msi;
64struct pci_dev; 66struct pci_dev;
65 67
66/** 68/**
@@ -426,6 +428,10 @@ struct ntb_dev {
426 spinlock_t ctx_lock; 428 spinlock_t ctx_lock;
427 /* block unregister until device is fully released */ 429 /* block unregister until device is fully released */
428 struct completion released; 430 struct completion released;
431
432#ifdef CONFIG_NTB_MSI
433 struct ntb_msi *msi;
434#endif
429}; 435};
430#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev) 436#define dev_ntb(__dev) container_of((__dev), struct ntb_dev, dev)
431 437
@@ -1627,4 +1633,71 @@ static inline int ntb_peer_highest_mw_idx(struct ntb_dev *ntb, int pidx)
1627 return ntb_mw_count(ntb, pidx) - ret - 1; 1633 return ntb_mw_count(ntb, pidx) - ret - 1;
1628} 1634}
1629 1635
1636struct ntb_msi_desc {
1637 u32 addr_offset;
1638 u32 data;
1639};
1640
1641#ifdef CONFIG_NTB_MSI
1642
1643int ntb_msi_init(struct ntb_dev *ntb, void (*desc_changed)(void *ctx));
1644int ntb_msi_setup_mws(struct ntb_dev *ntb);
1645void ntb_msi_clear_mws(struct ntb_dev *ntb);
1646int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
1647 irq_handler_t thread_fn,
1648 const char *name, void *dev_id,
1649 struct ntb_msi_desc *msi_desc);
1650void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id);
1651int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
1652 struct ntb_msi_desc *desc);
1653int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
1654 struct ntb_msi_desc *desc,
1655 phys_addr_t *msi_addr);
1656
1657#else /* not CONFIG_NTB_MSI */
1658
1659static inline int ntb_msi_init(struct ntb_dev *ntb,
1660 void (*desc_changed)(void *ctx))
1661{
1662 return -EOPNOTSUPP;
1663}
1664static inline int ntb_msi_setup_mws(struct ntb_dev *ntb)
1665{
1666 return -EOPNOTSUPP;
1667}
1668static inline void ntb_msi_clear_mws(struct ntb_dev *ntb) {}
1669static inline int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb,
1670 irq_handler_t handler,
1671 irq_handler_t thread_fn,
1672 const char *name, void *dev_id,
1673 struct ntb_msi_desc *msi_desc)
1674{
1675 return -EOPNOTSUPP;
1676}
1677static inline void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq,
1678 void *dev_id) {}
1679static inline int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
1680 struct ntb_msi_desc *desc)
1681{
1682 return -EOPNOTSUPP;
1683}
1684static inline int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
1685 struct ntb_msi_desc *desc,
1686 phys_addr_t *msi_addr)
1687{
1688 return -EOPNOTSUPP;
1689
1690}
1691
1692#endif /* CONFIG_NTB_MSI */
1693
1694static inline int ntbm_msi_request_irq(struct ntb_dev *ntb,
1695 irq_handler_t handler,
1696 const char *name, void *dev_id,
1697 struct ntb_msi_desc *msi_desc)
1698{
1699 return ntbm_msi_request_threaded_irq(ntb, handler, NULL, name,
1700 dev_id, msi_desc);
1701}
1702
1630#endif 1703#endif