aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/ntb/ntb_hw.c
diff options
context:
space:
mode:
authorJon Mason <jon.mason@intel.com>2012-11-16 21:27:12 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-01-17 22:11:14 -0500
commitfce8a7bb5b4bfb8a27324703fd5b002ee9247e90 (patch)
tree03ea4f4939d399265ecfa5f11081895a969115e7 /drivers/ntb/ntb_hw.c
parentea8a83a4b718f78a8ea2ce3f0237e78a23f8f12b (diff)
PCI-Express Non-Transparent Bridge Support
A PCI-Express non-transparent bridge (NTB) is a point-to-point PCIe bus connecting 2 systems, providing electrical isolation between the two subsystems. A non-transparent bridge is functionally similar to a transparent bridge except that both sides of the bridge have their own independent address domains. The host on one side of the bridge will not have the visibility of the complete memory or I/O space on the other side of the bridge. To communicate across the non-transparent bridge, each NTB endpoint has one (or more) apertures exposed to the local system. Writes to these apertures are mirrored to memory on the remote system. Communications can also occur through the use of doorbell registers that initiate interrupts to the alternate domain, and scratch-pad registers accessible from both sides. The NTB device driver is needed to configure these memory windows, doorbell, and scratch-pad registers as well as use them in such a way as they can be turned into a viable communication channel to the remote system. ntb_hw.[ch] determines the usage model (NTB to NTB or NTB to Root Port) and abstracts away the underlying hardware to provide access and a common interface to the doorbell registers, scratch pads, and memory windows. These hardware interfaces are exported so that other, non-mainlined kernel drivers can access these. ntb_transport.[ch] also uses the exported interfaces in ntb_hw.[ch] to setup a communication channel(s) and provide a reliable way of transferring data from one side to the other, which it then exports so that "client" drivers can access them. These client drivers are used to provide a standard kernel interface (i.e., Ethernet device) to NTB, such that Linux can transfer data from one system to the other in a standard way. Signed-off-by: Jon Mason <jon.mason@intel.com> Reviewed-by: Nicholas Bellinger <nab@linux-iscsi.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/ntb/ntb_hw.c')
-rw-r--r--drivers/ntb/ntb_hw.c1157
1 files changed, 1157 insertions, 0 deletions
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
new file mode 100644
index 000000000000..facad51fbc7a
--- /dev/null
+++ b/drivers/ntb/ntb_hw.c
@@ -0,0 +1,1157 @@
1/*
2 * This file is provided under a dual BSD/GPLv2 license. When using or
3 * redistributing this file, you may do so under either license.
4 *
5 * GPL LICENSE SUMMARY
6 *
7 * Copyright(c) 2012 Intel Corporation. All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * BSD LICENSE
14 *
15 * Copyright(c) 2012 Intel Corporation. All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 *
21 * * Redistributions of source code must retain the above copyright
22 * notice, this list of conditions and the following disclaimer.
23 * * Redistributions in binary form must reproduce the above copy
24 * notice, this list of conditions and the following disclaimer in
25 * the documentation and/or other materials provided with the
26 * distribution.
27 * * Neither the name of Intel Corporation nor the names of its
28 * contributors may be used to endorse or promote products derived
29 * from this software without specific prior written permission.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
35 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 *
43 * Intel PCIe NTB Linux driver
44 *
45 * Contact Information:
46 * Jon Mason <jon.mason@intel.com>
47 */
48#include <linux/debugfs.h>
49#include <linux/init.h>
50#include <linux/interrupt.h>
51#include <linux/module.h>
52#include <linux/pci.h>
53#include <linux/slab.h>
54#include "ntb_hw.h"
55#include "ntb_regs.h"
56
57#define NTB_NAME "Intel(R) PCI-E Non-Transparent Bridge Driver"
58#define NTB_VER "0.24"
59
60MODULE_DESCRIPTION(NTB_NAME);
61MODULE_VERSION(NTB_VER);
62MODULE_LICENSE("Dual BSD/GPL");
63MODULE_AUTHOR("Intel Corporation");
64
65enum {
66 NTB_CONN_CLASSIC = 0,
67 NTB_CONN_B2B,
68 NTB_CONN_RP,
69};
70
71enum {
72 NTB_DEV_USD = 0,
73 NTB_DEV_DSD,
74};
75
76enum {
77 SNB_HW = 0,
78 BWD_HW,
79};
80
81/* Translate memory window 0,1 to BAR 2,4 */
82#define MW_TO_BAR(mw) (mw * 2 + 2)
83
84static DEFINE_PCI_DEVICE_TABLE(ntb_pci_tbl) = {
85 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_BWD)},
86 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_JSF)},
87 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF)},
88 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_JSF)},
89 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_RP_SNB)},
90 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_B2B_SNB)},
91 {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB)},
92 {0}
93};
94MODULE_DEVICE_TABLE(pci, ntb_pci_tbl);
95
96/**
97 * ntb_register_event_callback() - register event callback
98 * @ndev: pointer to ntb_device instance
99 * @func: callback function to register
100 *
101 * This function registers a callback for any HW driver events such as link
102 * up/down, power management notices and etc.
103 *
104 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
105 */
106int ntb_register_event_callback(struct ntb_device *ndev,
107 void (*func)(void *handle, unsigned int event))
108{
109 if (ndev->event_cb)
110 return -EINVAL;
111
112 ndev->event_cb = func;
113
114 return 0;
115}
116
117/**
118 * ntb_unregister_event_callback() - unregisters the event callback
119 * @ndev: pointer to ntb_device instance
120 *
121 * This function unregisters the existing callback from transport
122 */
123void ntb_unregister_event_callback(struct ntb_device *ndev)
124{
125 ndev->event_cb = NULL;
126}
127
128/**
129 * ntb_register_db_callback() - register a callback for doorbell interrupt
130 * @ndev: pointer to ntb_device instance
131 * @idx: doorbell index to register callback, zero based
132 * @func: callback function to register
133 *
134 * This function registers a callback function for the doorbell interrupt
135 * on the primary side. The function will unmask the doorbell as well to
136 * allow interrupt.
137 *
138 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
139 */
140int ntb_register_db_callback(struct ntb_device *ndev, unsigned int idx,
141 void *data, void (*func)(void *data, int db_num))
142{
143 unsigned long mask;
144
145 if (idx >= ndev->max_cbs || ndev->db_cb[idx].callback) {
146 dev_warn(&ndev->pdev->dev, "Invalid Index.\n");
147 return -EINVAL;
148 }
149
150 ndev->db_cb[idx].callback = func;
151 ndev->db_cb[idx].data = data;
152
153 /* unmask interrupt */
154 mask = readw(ndev->reg_ofs.pdb_mask);
155 clear_bit(idx * ndev->bits_per_vector, &mask);
156 writew(mask, ndev->reg_ofs.pdb_mask);
157
158 return 0;
159}
160
161/**
162 * ntb_unregister_db_callback() - unregister a callback for doorbell interrupt
163 * @ndev: pointer to ntb_device instance
164 * @idx: doorbell index to register callback, zero based
165 *
166 * This function unregisters a callback function for the doorbell interrupt
167 * on the primary side. The function will also mask the said doorbell.
168 */
169void ntb_unregister_db_callback(struct ntb_device *ndev, unsigned int idx)
170{
171 unsigned long mask;
172
173 if (idx >= ndev->max_cbs || !ndev->db_cb[idx].callback)
174 return;
175
176 mask = readw(ndev->reg_ofs.pdb_mask);
177 set_bit(idx * ndev->bits_per_vector, &mask);
178 writew(mask, ndev->reg_ofs.pdb_mask);
179
180 ndev->db_cb[idx].callback = NULL;
181}
182
183/**
184 * ntb_find_transport() - find the transport pointer
185 * @transport: pointer to pci device
186 *
187 * Given the pci device pointer, return the transport pointer passed in when
188 * the transport attached when it was inited.
189 *
190 * RETURNS: pointer to transport.
191 */
192void *ntb_find_transport(struct pci_dev *pdev)
193{
194 struct ntb_device *ndev = pci_get_drvdata(pdev);
195 return ndev->ntb_transport;
196}
197
198/**
199 * ntb_register_transport() - Register NTB transport with NTB HW driver
200 * @transport: transport identifier
201 *
202 * This function allows a transport to reserve the hardware driver for
203 * NTB usage.
204 *
205 * RETURNS: pointer to ntb_device, NULL on error.
206 */
207struct ntb_device *ntb_register_transport(struct pci_dev *pdev, void *transport)
208{
209 struct ntb_device *ndev = pci_get_drvdata(pdev);
210
211 if (ndev->ntb_transport)
212 return NULL;
213
214 ndev->ntb_transport = transport;
215 return ndev;
216}
217
218/**
219 * ntb_unregister_transport() - Unregister the transport with the NTB HW driver
220 * @ndev - ntb_device of the transport to be freed
221 *
222 * This function unregisters the transport from the HW driver and performs any
223 * necessary cleanups.
224 */
225void ntb_unregister_transport(struct ntb_device *ndev)
226{
227 int i;
228
229 if (!ndev->ntb_transport)
230 return;
231
232 for (i = 0; i < ndev->max_cbs; i++)
233 ntb_unregister_db_callback(ndev, i);
234
235 ntb_unregister_event_callback(ndev);
236 ndev->ntb_transport = NULL;
237}
238
239/**
240 * ntb_get_max_spads() - get the total scratch regs usable
241 * @ndev: pointer to ntb_device instance
242 *
243 * This function returns the max 32bit scratchpad registers usable by the
244 * upper layer.
245 *
246 * RETURNS: total number of scratch pad registers available
247 */
248int ntb_get_max_spads(struct ntb_device *ndev)
249{
250 return ndev->limits.max_spads;
251}
252
253/**
254 * ntb_write_local_spad() - write to the secondary scratchpad register
255 * @ndev: pointer to ntb_device instance
256 * @idx: index to the scratchpad register, 0 based
257 * @val: the data value to put into the register
258 *
259 * This function allows writing of a 32bit value to the indexed scratchpad
260 * register. This writes over the data mirrored to the local scratchpad register
261 * by the remote system.
262 *
263 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
264 */
265int ntb_write_local_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
266{
267 if (idx >= ndev->limits.max_spads)
268 return -EINVAL;
269
270 dev_dbg(&ndev->pdev->dev, "Writing %x to local scratch pad index %d\n",
271 val, idx);
272 writel(val, ndev->reg_ofs.spad_read + idx * 4);
273
274 return 0;
275}
276
277/**
278 * ntb_read_local_spad() - read from the primary scratchpad register
279 * @ndev: pointer to ntb_device instance
280 * @idx: index to scratchpad register, 0 based
281 * @val: pointer to 32bit integer for storing the register value
282 *
283 * This function allows reading of the 32bit scratchpad register on
284 * the primary (internal) side. This allows the local system to read data
285 * written and mirrored to the scratchpad register by the remote system.
286 *
287 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
288 */
289int ntb_read_local_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
290{
291 if (idx >= ndev->limits.max_spads)
292 return -EINVAL;
293
294 *val = readl(ndev->reg_ofs.spad_write + idx * 4);
295 dev_dbg(&ndev->pdev->dev,
296 "Reading %x from local scratch pad index %d\n", *val, idx);
297
298 return 0;
299}
300
301/**
302 * ntb_write_remote_spad() - write to the secondary scratchpad register
303 * @ndev: pointer to ntb_device instance
304 * @idx: index to the scratchpad register, 0 based
305 * @val: the data value to put into the register
306 *
307 * This function allows writing of a 32bit value to the indexed scratchpad
308 * register. The register resides on the secondary (external) side. This allows
309 * the local system to write data to be mirrored to the remote systems
310 * scratchpad register.
311 *
312 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
313 */
314int ntb_write_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 val)
315{
316 if (idx >= ndev->limits.max_spads)
317 return -EINVAL;
318
319 dev_dbg(&ndev->pdev->dev, "Writing %x to remote scratch pad index %d\n",
320 val, idx);
321 writel(val, ndev->reg_ofs.spad_write + idx * 4);
322
323 return 0;
324}
325
326/**
327 * ntb_read_remote_spad() - read from the primary scratchpad register
328 * @ndev: pointer to ntb_device instance
329 * @idx: index to scratchpad register, 0 based
330 * @val: pointer to 32bit integer for storing the register value
331 *
332 * This function allows reading of the 32bit scratchpad register on
333 * the primary (internal) side. This alloows the local system to read the data
334 * it wrote to be mirrored on the remote system.
335 *
336 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
337 */
338int ntb_read_remote_spad(struct ntb_device *ndev, unsigned int idx, u32 *val)
339{
340 if (idx >= ndev->limits.max_spads)
341 return -EINVAL;
342
343 *val = readl(ndev->reg_ofs.spad_read + idx * 4);
344 dev_dbg(&ndev->pdev->dev,
345 "Reading %x from remote scratch pad index %d\n", *val, idx);
346
347 return 0;
348}
349
350/**
351 * ntb_get_mw_vbase() - get virtual addr for the NTB memory window
352 * @ndev: pointer to ntb_device instance
353 * @mw: memory window number
354 *
355 * This function provides the base virtual address of the memory window
356 * specified.
357 *
358 * RETURNS: pointer to virtual address, or NULL on error.
359 */
360void *ntb_get_mw_vbase(struct ntb_device *ndev, unsigned int mw)
361{
362 if (mw > NTB_NUM_MW)
363 return NULL;
364
365 return ndev->mw[mw].vbase;
366}
367
368/**
369 * ntb_get_mw_size() - return size of NTB memory window
370 * @ndev: pointer to ntb_device instance
371 * @mw: memory window number
372 *
373 * This function provides the physical size of the memory window specified
374 *
375 * RETURNS: the size of the memory window or zero on error
376 */
377resource_size_t ntb_get_mw_size(struct ntb_device *ndev, unsigned int mw)
378{
379 if (mw > NTB_NUM_MW)
380 return 0;
381
382 return ndev->mw[mw].bar_sz;
383}
384
385/**
386 * ntb_set_mw_addr - set the memory window address
387 * @ndev: pointer to ntb_device instance
388 * @mw: memory window number
389 * @addr: base address for data
390 *
391 * This function sets the base physical address of the memory window. This
392 * memory address is where data from the remote system will be transfered into
393 * or out of depending on how the transport is configured.
394 */
395void ntb_set_mw_addr(struct ntb_device *ndev, unsigned int mw, u64 addr)
396{
397 if (mw > NTB_NUM_MW)
398 return;
399
400 dev_dbg(&ndev->pdev->dev, "Writing addr %Lx to BAR %d\n", addr,
401 MW_TO_BAR(mw));
402
403 ndev->mw[mw].phys_addr = addr;
404
405 switch (MW_TO_BAR(mw)) {
406 case NTB_BAR_23:
407 writeq(addr, ndev->reg_ofs.sbar2_xlat);
408 break;
409 case NTB_BAR_45:
410 writeq(addr, ndev->reg_ofs.sbar4_xlat);
411 break;
412 }
413}
414
415/**
416 * ntb_ring_sdb() - Set the doorbell on the secondary/external side
417 * @ndev: pointer to ntb_device instance
418 * @db: doorbell to ring
419 *
420 * This function allows triggering of a doorbell on the secondary/external
421 * side that will initiate an interrupt on the remote host
422 *
423 * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
424 */
425void ntb_ring_sdb(struct ntb_device *ndev, unsigned int db)
426{
427 dev_dbg(&ndev->pdev->dev, "%s: ringing doorbell %d\n", __func__, db);
428
429 if (ndev->hw_type == BWD_HW)
430 writeq((u64) 1 << db, ndev->reg_ofs.sdb);
431 else
432 writew(((1 << ndev->bits_per_vector) - 1) <<
433 (db * ndev->bits_per_vector), ndev->reg_ofs.sdb);
434}
435
436static void ntb_link_event(struct ntb_device *ndev, int link_state)
437{
438 unsigned int event;
439
440 if (ndev->link_status == link_state)
441 return;
442
443 if (link_state == NTB_LINK_UP) {
444 u16 status;
445
446 dev_info(&ndev->pdev->dev, "Link Up\n");
447 ndev->link_status = NTB_LINK_UP;
448 event = NTB_EVENT_HW_LINK_UP;
449
450 if (ndev->hw_type == BWD_HW)
451 status = readw(ndev->reg_ofs.lnk_stat);
452 else {
453 int rc = pci_read_config_word(ndev->pdev,
454 SNB_LINK_STATUS_OFFSET,
455 &status);
456 if (rc)
457 return;
458 }
459 dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
460 (status & NTB_LINK_WIDTH_MASK) >> 4,
461 (status & NTB_LINK_SPEED_MASK));
462 } else {
463 dev_info(&ndev->pdev->dev, "Link Down\n");
464 ndev->link_status = NTB_LINK_DOWN;
465 event = NTB_EVENT_HW_LINK_DOWN;
466 }
467
468 /* notify the upper layer if we have an event change */
469 if (ndev->event_cb)
470 ndev->event_cb(ndev->ntb_transport, event);
471}
472
473static int ntb_link_status(struct ntb_device *ndev)
474{
475 int link_state;
476
477 if (ndev->hw_type == BWD_HW) {
478 u32 ntb_cntl;
479
480 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
481 if (ntb_cntl & BWD_CNTL_LINK_DOWN)
482 link_state = NTB_LINK_DOWN;
483 else
484 link_state = NTB_LINK_UP;
485 } else {
486 u16 status;
487 int rc;
488
489 rc = pci_read_config_word(ndev->pdev, SNB_LINK_STATUS_OFFSET,
490 &status);
491 if (rc)
492 return rc;
493
494 if (status & NTB_LINK_STATUS_ACTIVE)
495 link_state = NTB_LINK_UP;
496 else
497 link_state = NTB_LINK_DOWN;
498 }
499
500 ntb_link_event(ndev, link_state);
501
502 return 0;
503}
504
505/* BWD doesn't have link status interrupt, poll on that platform */
506static void bwd_link_poll(struct work_struct *work)
507{
508 struct ntb_device *ndev = container_of(work, struct ntb_device,
509 hb_timer.work);
510 unsigned long ts = jiffies;
511
512 /* If we haven't gotten an interrupt in a while, check the BWD link
513 * status bit
514 */
515 if (ts > ndev->last_ts + NTB_HB_TIMEOUT) {
516 int rc = ntb_link_status(ndev);
517 if (rc)
518 dev_err(&ndev->pdev->dev,
519 "Error determining link status\n");
520 }
521
522 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
523}
524
525static int ntb_xeon_setup(struct ntb_device *ndev)
526{
527 int rc;
528 u8 val;
529
530 ndev->hw_type = SNB_HW;
531
532 rc = pci_read_config_byte(ndev->pdev, NTB_PPD_OFFSET, &val);
533 if (rc)
534 return rc;
535
536 switch (val & SNB_PPD_CONN_TYPE) {
537 case NTB_CONN_B2B:
538 ndev->conn_type = NTB_CONN_B2B;
539 break;
540 case NTB_CONN_CLASSIC:
541 case NTB_CONN_RP:
542 default:
543 dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
544 return -EINVAL;
545 }
546
547 if (val & SNB_PPD_DEV_TYPE)
548 ndev->dev_type = NTB_DEV_DSD;
549 else
550 ndev->dev_type = NTB_DEV_USD;
551
552 ndev->reg_ofs.pdb = ndev->reg_base + SNB_PDOORBELL_OFFSET;
553 ndev->reg_ofs.pdb_mask = ndev->reg_base + SNB_PDBMSK_OFFSET;
554 ndev->reg_ofs.sbar2_xlat = ndev->reg_base + SNB_SBAR2XLAT_OFFSET;
555 ndev->reg_ofs.sbar4_xlat = ndev->reg_base + SNB_SBAR4XLAT_OFFSET;
556 ndev->reg_ofs.lnk_cntl = ndev->reg_base + SNB_NTBCNTL_OFFSET;
557 ndev->reg_ofs.lnk_stat = ndev->reg_base + SNB_LINK_STATUS_OFFSET;
558 ndev->reg_ofs.spad_read = ndev->reg_base + SNB_SPAD_OFFSET;
559 ndev->reg_ofs.spci_cmd = ndev->reg_base + SNB_PCICMD_OFFSET;
560
561 if (ndev->conn_type == NTB_CONN_B2B) {
562 ndev->reg_ofs.sdb = ndev->reg_base + SNB_B2B_DOORBELL_OFFSET;
563 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_B2B_SPAD_OFFSET;
564 ndev->limits.max_spads = SNB_MAX_SPADS;
565 } else {
566 ndev->reg_ofs.sdb = ndev->reg_base + SNB_SDOORBELL_OFFSET;
567 ndev->reg_ofs.spad_write = ndev->reg_base + SNB_SPAD_OFFSET;
568 ndev->limits.max_spads = SNB_MAX_COMPAT_SPADS;
569 }
570
571 ndev->limits.max_db_bits = SNB_MAX_DB_BITS;
572 ndev->limits.msix_cnt = SNB_MSIX_CNT;
573 ndev->bits_per_vector = SNB_DB_BITS_PER_VEC;
574
575 return 0;
576}
577
578static int ntb_bwd_setup(struct ntb_device *ndev)
579{
580 int rc;
581 u32 val;
582
583 ndev->hw_type = BWD_HW;
584
585 rc = pci_read_config_dword(ndev->pdev, NTB_PPD_OFFSET, &val);
586 if (rc)
587 return rc;
588
589 switch ((val & BWD_PPD_CONN_TYPE) >> 8) {
590 case NTB_CONN_B2B:
591 ndev->conn_type = NTB_CONN_B2B;
592 break;
593 case NTB_CONN_RP:
594 default:
595 dev_err(&ndev->pdev->dev, "Only B2B supported at this time\n");
596 return -EINVAL;
597 }
598
599 if (val & BWD_PPD_DEV_TYPE)
600 ndev->dev_type = NTB_DEV_DSD;
601 else
602 ndev->dev_type = NTB_DEV_USD;
603
604 /* Initiate PCI-E link training */
605 rc = pci_write_config_dword(ndev->pdev, NTB_PPD_OFFSET,
606 val | BWD_PPD_INIT_LINK);
607 if (rc)
608 return rc;
609
610 ndev->reg_ofs.pdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
611 ndev->reg_ofs.pdb_mask = ndev->reg_base + BWD_PDBMSK_OFFSET;
612 ndev->reg_ofs.sbar2_xlat = ndev->reg_base + BWD_SBAR2XLAT_OFFSET;
613 ndev->reg_ofs.sbar4_xlat = ndev->reg_base + BWD_SBAR4XLAT_OFFSET;
614 ndev->reg_ofs.lnk_cntl = ndev->reg_base + BWD_NTBCNTL_OFFSET;
615 ndev->reg_ofs.lnk_stat = ndev->reg_base + BWD_LINK_STATUS_OFFSET;
616 ndev->reg_ofs.spad_read = ndev->reg_base + BWD_SPAD_OFFSET;
617 ndev->reg_ofs.spci_cmd = ndev->reg_base + BWD_PCICMD_OFFSET;
618
619 if (ndev->conn_type == NTB_CONN_B2B) {
620 ndev->reg_ofs.sdb = ndev->reg_base + BWD_B2B_DOORBELL_OFFSET;
621 ndev->reg_ofs.spad_write = ndev->reg_base + BWD_B2B_SPAD_OFFSET;
622 ndev->limits.max_spads = BWD_MAX_SPADS;
623 } else {
624 ndev->reg_ofs.sdb = ndev->reg_base + BWD_PDOORBELL_OFFSET;
625 ndev->reg_ofs.spad_write = ndev->reg_base + BWD_SPAD_OFFSET;
626 ndev->limits.max_spads = BWD_MAX_COMPAT_SPADS;
627 }
628
629 ndev->limits.max_db_bits = BWD_MAX_DB_BITS;
630 ndev->limits.msix_cnt = BWD_MSIX_CNT;
631 ndev->bits_per_vector = BWD_DB_BITS_PER_VEC;
632
633 /* Since bwd doesn't have a link interrupt, setup a poll timer */
634 INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
635 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
636
637 return 0;
638}
639
640static int __devinit ntb_device_setup(struct ntb_device *ndev)
641{
642 int rc;
643
644 switch (ndev->pdev->device) {
645 case PCI_DEVICE_ID_INTEL_NTB_2ND_SNB:
646 case PCI_DEVICE_ID_INTEL_NTB_RP_JSF:
647 case PCI_DEVICE_ID_INTEL_NTB_RP_SNB:
648 case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_JSF:
649 case PCI_DEVICE_ID_INTEL_NTB_CLASSIC_SNB:
650 case PCI_DEVICE_ID_INTEL_NTB_B2B_JSF:
651 case PCI_DEVICE_ID_INTEL_NTB_B2B_SNB:
652 rc = ntb_xeon_setup(ndev);
653 break;
654 case PCI_DEVICE_ID_INTEL_NTB_B2B_BWD:
655 rc = ntb_bwd_setup(ndev);
656 break;
657 default:
658 rc = -ENODEV;
659 }
660
661 /* Enable Bus Master and Memory Space on the secondary side */
662 writew(PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER, ndev->reg_ofs.spci_cmd);
663
664 return rc;
665}
666
667static void ntb_device_free(struct ntb_device *ndev)
668{
669 if (ndev->hw_type == BWD_HW)
670 cancel_delayed_work_sync(&ndev->hb_timer);
671}
672
673static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
674{
675 struct ntb_db_cb *db_cb = data;
676 struct ntb_device *ndev = db_cb->ndev;
677
678 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
679 db_cb->db_num);
680
681 if (db_cb->callback)
682 db_cb->callback(db_cb->data, db_cb->db_num);
683
684 /* No need to check for the specific HB irq, any interrupt means
685 * we're connected.
686 */
687 ndev->last_ts = jiffies;
688
689 writeq((u64) 1 << db_cb->db_num, ndev->reg_ofs.pdb);
690
691 return IRQ_HANDLED;
692}
693
694static irqreturn_t xeon_callback_msix_irq(int irq, void *data)
695{
696 struct ntb_db_cb *db_cb = data;
697 struct ntb_device *ndev = db_cb->ndev;
698
699 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for DB %d\n", irq,
700 db_cb->db_num);
701
702 if (db_cb->callback)
703 db_cb->callback(db_cb->data, db_cb->db_num);
704
705 /* On Sandybridge, there are 16 bits in the interrupt register
706 * but only 4 vectors. So, 5 bits are assigned to the first 3
707 * vectors, with the 4th having a single bit for link
708 * interrupts.
709 */
710 writew(((1 << ndev->bits_per_vector) - 1) <<
711 (db_cb->db_num * ndev->bits_per_vector), ndev->reg_ofs.pdb);
712
713 return IRQ_HANDLED;
714}
715
716/* Since we do not have a HW doorbell in BWD, this is only used in JF/JT */
717static irqreturn_t xeon_event_msix_irq(int irq, void *dev)
718{
719 struct ntb_device *ndev = dev;
720 int rc;
721
722 dev_dbg(&ndev->pdev->dev, "MSI-X irq %d received for Events\n", irq);
723
724 rc = ntb_link_status(ndev);
725 if (rc)
726 dev_err(&ndev->pdev->dev, "Error determining link status\n");
727
728 /* bit 15 is always the link bit */
729 writew(1 << ndev->limits.max_db_bits, ndev->reg_ofs.pdb);
730
731 return IRQ_HANDLED;
732}
733
734static irqreturn_t ntb_interrupt(int irq, void *dev)
735{
736 struct ntb_device *ndev = dev;
737 unsigned int i = 0;
738
739 if (ndev->hw_type == BWD_HW) {
740 u64 pdb = readq(ndev->reg_ofs.pdb);
741
742 dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %Lx\n", irq, pdb);
743
744 while (pdb) {
745 i = __ffs(pdb);
746 pdb &= pdb - 1;
747 bwd_callback_msix_irq(irq, &ndev->db_cb[i]);
748 }
749 } else {
750 u16 pdb = readw(ndev->reg_ofs.pdb);
751
752 dev_dbg(&ndev->pdev->dev, "irq %d - pdb = %x sdb %x\n", irq,
753 pdb, readw(ndev->reg_ofs.sdb));
754
755 if (pdb & SNB_DB_HW_LINK) {
756 xeon_event_msix_irq(irq, dev);
757 pdb &= ~SNB_DB_HW_LINK;
758 }
759
760 while (pdb) {
761 i = __ffs(pdb);
762 pdb &= pdb - 1;
763 xeon_callback_msix_irq(irq, &ndev->db_cb[i]);
764 }
765 }
766
767 return IRQ_HANDLED;
768}
769
770static int ntb_setup_msix(struct ntb_device *ndev)
771{
772 struct pci_dev *pdev = ndev->pdev;
773 struct msix_entry *msix;
774 int msix_entries;
775 int rc, i, pos;
776 u16 val;
777
778 pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
779 if (!pos) {
780 rc = -EIO;
781 goto err;
782 }
783
784 rc = pci_read_config_word(pdev, pos + PCI_MSIX_FLAGS, &val);
785 if (rc)
786 goto err;
787
788 msix_entries = msix_table_size(val);
789 if (msix_entries > ndev->limits.msix_cnt) {
790 rc = -EINVAL;
791 goto err;
792 }
793
794 ndev->msix_entries = kmalloc(sizeof(struct msix_entry) * msix_entries,
795 GFP_KERNEL);
796 if (!ndev->msix_entries) {
797 rc = -ENOMEM;
798 goto err;
799 }
800
801 for (i = 0; i < msix_entries; i++)
802 ndev->msix_entries[i].entry = i;
803
804 rc = pci_enable_msix(pdev, ndev->msix_entries, msix_entries);
805 if (rc < 0)
806 goto err1;
807 if (rc > 0) {
808 /* On SNB, the link interrupt is always tied to 4th vector. If
809 * we can't get all 4, then we can't use MSI-X.
810 */
811 if (ndev->hw_type != BWD_HW) {
812 rc = -EIO;
813 goto err1;
814 }
815
816 dev_warn(&pdev->dev,
817 "Only %d MSI-X vectors. Limiting the number of queues to that number.\n",
818 rc);
819 msix_entries = rc;
820 }
821
822 for (i = 0; i < msix_entries; i++) {
823 msix = &ndev->msix_entries[i];
824 WARN_ON(!msix->vector);
825
826 /* Use the last MSI-X vector for Link status */
827 if (ndev->hw_type == BWD_HW) {
828 rc = request_irq(msix->vector, bwd_callback_msix_irq, 0,
829 "ntb-callback-msix", &ndev->db_cb[i]);
830 if (rc)
831 goto err2;
832 } else {
833 if (i == msix_entries - 1) {
834 rc = request_irq(msix->vector,
835 xeon_event_msix_irq, 0,
836 "ntb-event-msix", ndev);
837 if (rc)
838 goto err2;
839 } else {
840 rc = request_irq(msix->vector,
841 xeon_callback_msix_irq, 0,
842 "ntb-callback-msix",
843 &ndev->db_cb[i]);
844 if (rc)
845 goto err2;
846 }
847 }
848 }
849
850 ndev->num_msix = msix_entries;
851 if (ndev->hw_type == BWD_HW)
852 ndev->max_cbs = msix_entries;
853 else
854 ndev->max_cbs = msix_entries - 1;
855
856 return 0;
857
858err2:
859 while (--i >= 0) {
860 msix = &ndev->msix_entries[i];
861 if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
862 free_irq(msix->vector, ndev);
863 else
864 free_irq(msix->vector, &ndev->db_cb[i]);
865 }
866 pci_disable_msix(pdev);
867err1:
868 kfree(ndev->msix_entries);
869 dev_err(&pdev->dev, "Error allocating MSI-X interrupt\n");
870err:
871 ndev->num_msix = 0;
872 return rc;
873}
874
875static int ntb_setup_msi(struct ntb_device *ndev)
876{
877 struct pci_dev *pdev = ndev->pdev;
878 int rc;
879
880 rc = pci_enable_msi(pdev);
881 if (rc)
882 return rc;
883
884 rc = request_irq(pdev->irq, ntb_interrupt, 0, "ntb-msi", ndev);
885 if (rc) {
886 pci_disable_msi(pdev);
887 dev_err(&pdev->dev, "Error allocating MSI interrupt\n");
888 return rc;
889 }
890
891 return 0;
892}
893
894static int ntb_setup_intx(struct ntb_device *ndev)
895{
896 struct pci_dev *pdev = ndev->pdev;
897 int rc;
898
899 pci_msi_off(pdev);
900
901 /* Verify intx is enabled */
902 pci_intx(pdev, 1);
903
904 rc = request_irq(pdev->irq, ntb_interrupt, IRQF_SHARED, "ntb-intx",
905 ndev);
906 if (rc)
907 return rc;
908
909 return 0;
910}
911
912static int __devinit ntb_setup_interrupts(struct ntb_device *ndev)
913{
914 int rc;
915
916 /* On BWD, disable all interrupts. On SNB, disable all but Link
917 * Interrupt. The rest will be unmasked as callbacks are registered.
918 */
919 if (ndev->hw_type == BWD_HW)
920 writeq(~0, ndev->reg_ofs.pdb_mask);
921 else
922 writew(~(1 << ndev->limits.max_db_bits),
923 ndev->reg_ofs.pdb_mask);
924
925 rc = ntb_setup_msix(ndev);
926 if (!rc)
927 goto done;
928
929 ndev->bits_per_vector = 1;
930 ndev->max_cbs = ndev->limits.max_db_bits;
931
932 rc = ntb_setup_msi(ndev);
933 if (!rc)
934 goto done;
935
936 rc = ntb_setup_intx(ndev);
937 if (rc) {
938 dev_err(&ndev->pdev->dev, "no usable interrupts\n");
939 return rc;
940 }
941
942done:
943 return 0;
944}
945
946static void __devexit ntb_free_interrupts(struct ntb_device *ndev)
947{
948 struct pci_dev *pdev = ndev->pdev;
949
950 /* mask interrupts */
951 if (ndev->hw_type == BWD_HW)
952 writeq(~0, ndev->reg_ofs.pdb_mask);
953 else
954 writew(~0, ndev->reg_ofs.pdb_mask);
955
956 if (ndev->num_msix) {
957 struct msix_entry *msix;
958 u32 i;
959
960 for (i = 0; i < ndev->num_msix; i++) {
961 msix = &ndev->msix_entries[i];
962 if (ndev->hw_type != BWD_HW && i == ndev->num_msix - 1)
963 free_irq(msix->vector, ndev);
964 else
965 free_irq(msix->vector, &ndev->db_cb[i]);
966 }
967 pci_disable_msix(pdev);
968 } else {
969 free_irq(pdev->irq, ndev);
970
971 if (pci_dev_msi_enabled(pdev))
972 pci_disable_msi(pdev);
973 }
974}
975
976static int __devinit ntb_create_callbacks(struct ntb_device *ndev)
977{
978 int i;
979
980 /* Checken-egg issue. We won't know how many callbacks are necessary
981 * until we see how many MSI-X vectors we get, but these pointers need
982 * to be passed into the MSI-X register fucntion. So, we allocate the
983 * max, knowing that they might not all be used, to work around this.
984 */
985 ndev->db_cb = kcalloc(ndev->limits.max_db_bits,
986 sizeof(struct ntb_db_cb),
987 GFP_KERNEL);
988 if (!ndev->db_cb)
989 return -ENOMEM;
990
991 for (i = 0; i < ndev->limits.max_db_bits; i++) {
992 ndev->db_cb[i].db_num = i;
993 ndev->db_cb[i].ndev = ndev;
994 }
995
996 return 0;
997}
998
999static void ntb_free_callbacks(struct ntb_device *ndev)
1000{
1001 int i;
1002
1003 for (i = 0; i < ndev->limits.max_db_bits; i++)
1004 ntb_unregister_db_callback(ndev, i);
1005
1006 kfree(ndev->db_cb);
1007}
1008
1009static int __devinit
1010ntb_pci_probe(struct pci_dev *pdev,
1011 __attribute__((unused)) const struct pci_device_id *id)
1012{
1013 struct ntb_device *ndev;
1014 int rc, i;
1015
1016 ndev = kzalloc(sizeof(struct ntb_device), GFP_KERNEL);
1017 if (!ndev)
1018 return -ENOMEM;
1019
1020 ndev->pdev = pdev;
1021 ndev->link_status = NTB_LINK_DOWN;
1022 pci_set_drvdata(pdev, ndev);
1023
1024 rc = pci_enable_device(pdev);
1025 if (rc)
1026 goto err;
1027
1028 pci_set_master(ndev->pdev);
1029
1030 rc = pci_request_selected_regions(pdev, NTB_BAR_MASK, KBUILD_MODNAME);
1031 if (rc)
1032 goto err1;
1033
1034 ndev->reg_base = pci_ioremap_bar(pdev, NTB_BAR_MMIO);
1035 if (!ndev->reg_base) {
1036 dev_warn(&pdev->dev, "Cannot remap BAR 0\n");
1037 rc = -EIO;
1038 goto err2;
1039 }
1040
1041 for (i = 0; i < NTB_NUM_MW; i++) {
1042 ndev->mw[i].bar_sz = pci_resource_len(pdev, MW_TO_BAR(i));
1043 ndev->mw[i].vbase =
1044 ioremap_wc(pci_resource_start(pdev, MW_TO_BAR(i)),
1045 ndev->mw[i].bar_sz);
1046 dev_info(&pdev->dev, "MW %d size %d\n", i,
1047 (u32) pci_resource_len(pdev, MW_TO_BAR(i)));
1048 if (!ndev->mw[i].vbase) {
1049 dev_warn(&pdev->dev, "Cannot remap BAR %d\n",
1050 MW_TO_BAR(i));
1051 rc = -EIO;
1052 goto err3;
1053 }
1054 }
1055
1056 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1057 if (rc) {
1058 rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1059 if (rc)
1060 goto err3;
1061
1062 dev_warn(&pdev->dev, "Cannot DMA highmem\n");
1063 }
1064
1065 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1066 if (rc) {
1067 rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1068 if (rc)
1069 goto err3;
1070
1071 dev_warn(&pdev->dev, "Cannot DMA consistent highmem\n");
1072 }
1073
1074 rc = ntb_device_setup(ndev);
1075 if (rc)
1076 goto err3;
1077
1078 rc = ntb_create_callbacks(ndev);
1079 if (rc)
1080 goto err4;
1081
1082 rc = ntb_setup_interrupts(ndev);
1083 if (rc)
1084 goto err5;
1085
1086 /* The scratchpad registers keep the values between rmmod/insmod,
1087 * blast them now
1088 */
1089 for (i = 0; i < ndev->limits.max_spads; i++) {
1090 ntb_write_local_spad(ndev, i, 0);
1091 ntb_write_remote_spad(ndev, i, 0);
1092 }
1093
1094 rc = ntb_transport_init(pdev);
1095 if (rc)
1096 goto err6;
1097
1098 /* Let's bring the NTB link up */
1099 writel(NTB_CNTL_BAR23_SNOOP | NTB_CNTL_BAR45_SNOOP,
1100 ndev->reg_ofs.lnk_cntl);
1101
1102 return 0;
1103
1104err6:
1105 ntb_free_interrupts(ndev);
1106err5:
1107 ntb_free_callbacks(ndev);
1108err4:
1109 ntb_device_free(ndev);
1110err3:
1111 for (i--; i >= 0; i--)
1112 iounmap(ndev->mw[i].vbase);
1113 iounmap(ndev->reg_base);
1114err2:
1115 pci_release_selected_regions(pdev, NTB_BAR_MASK);
1116err1:
1117 pci_disable_device(pdev);
1118err:
1119 kfree(ndev);
1120
1121 dev_err(&pdev->dev, "Error loading %s module\n", KBUILD_MODNAME);
1122 return rc;
1123}
1124
1125static void __devexit ntb_pci_remove(struct pci_dev *pdev)
1126{
1127 struct ntb_device *ndev = pci_get_drvdata(pdev);
1128 int i;
1129 u32 ntb_cntl;
1130
1131 /* Bring NTB link down */
1132 ntb_cntl = readl(ndev->reg_ofs.lnk_cntl);
1133 ntb_cntl |= NTB_LINK_DISABLE;
1134 writel(ntb_cntl, ndev->reg_ofs.lnk_cntl);
1135
1136 ntb_transport_free(ndev->ntb_transport);
1137
1138 ntb_free_interrupts(ndev);
1139 ntb_free_callbacks(ndev);
1140 ntb_device_free(ndev);
1141
1142 for (i = 0; i < NTB_NUM_MW; i++)
1143 iounmap(ndev->mw[i].vbase);
1144
1145 iounmap(ndev->reg_base);
1146 pci_release_selected_regions(pdev, NTB_BAR_MASK);
1147 pci_disable_device(pdev);
1148 kfree(ndev);
1149}
1150
1151static struct pci_driver ntb_pci_driver = {
1152 .name = KBUILD_MODNAME,
1153 .id_table = ntb_pci_tbl,
1154 .probe = ntb_pci_probe,
1155 .remove = __devexit_p(ntb_pci_remove),
1156};
1157module_pci_driver(ntb_pci_driver);