aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Mason <jon.mason@intel.com>2012-11-16 20:52:57 -0500
committerJon Mason <jon.mason@intel.com>2013-09-03 17:48:52 -0400
commit113bf1c9f1e035129f8631b032669d6ab36a14dd (patch)
tree839e4b16676e07112941bde5cc739e6fbf786942
parent948d3a65b6d164b34309625f57656cb9e8b1a908 (diff)
NTB: BWD Link Recovery
The BWD NTB device will drop the link if an error is encountered on the point-to-point PCI bridge. The link will stay down until all errors are cleared and the link is re-established. On link down, check to see if the error is detected, if so do the necessary housekeeping to try and recover from the error and reestablish the link. There is a potential race between the 2 NTB devices recovering at the same time. If the times are synchronized, the link will not recover and the driver will be stuck in this loop forever. Add a random interval to the recovery time to prevent this race. Signed-off-by: Jon Mason <jon.mason@intel.com>
-rw-r--r--drivers/ntb/ntb_hw.c110
-rw-r--r--drivers/ntb/ntb_hw.h5
-rw-r--r--drivers/ntb/ntb_regs.h15
3 files changed, 127 insertions, 3 deletions
diff --git a/drivers/ntb/ntb_hw.c b/drivers/ntb/ntb_hw.c
index 226f82b7da65..3493e9516519 100644
--- a/drivers/ntb/ntb_hw.c
+++ b/drivers/ntb/ntb_hw.c
@@ -46,10 +46,12 @@
46 * Jon Mason <jon.mason@intel.com> 46 * Jon Mason <jon.mason@intel.com>
47 */ 47 */
48#include <linux/debugfs.h> 48#include <linux/debugfs.h>
49#include <linux/delay.h>
49#include <linux/init.h> 50#include <linux/init.h>
50#include <linux/interrupt.h> 51#include <linux/interrupt.h>
51#include <linux/module.h> 52#include <linux/module.h>
52#include <linux/pci.h> 53#include <linux/pci.h>
54#include <linux/random.h>
53#include <linux/slab.h> 55#include <linux/slab.h>
54#include "ntb_hw.h" 56#include "ntb_hw.h"
55#include "ntb_regs.h" 57#include "ntb_regs.h"
@@ -84,6 +86,8 @@ enum {
84 86
85static struct dentry *debugfs_dir; 87static struct dentry *debugfs_dir;
86 88
89#define BWD_LINK_RECOVERY_TIME 500
90
87/* Translate memory window 0,1 to BAR 2,4 */ 91/* Translate memory window 0,1 to BAR 2,4 */
88#define MW_TO_BAR(mw) (mw * NTB_MAX_NUM_MW + 2) 92#define MW_TO_BAR(mw) (mw * NTB_MAX_NUM_MW + 2)
89 93
@@ -425,6 +429,49 @@ void ntb_ring_sdb(struct ntb_device *ndev, unsigned int db)
425 (db * ndev->bits_per_vector), ndev->reg_ofs.sdb); 429 (db * ndev->bits_per_vector), ndev->reg_ofs.sdb);
426} 430}
427 431
432static void bwd_recover_link(struct ntb_device *ndev)
433{
434 u32 status;
435
436 /* Driver resets the NTB ModPhy lanes - magic! */
437 writeb(0xe0, ndev->reg_base + BWD_MODPHY_PCSREG6);
438 writeb(0x40, ndev->reg_base + BWD_MODPHY_PCSREG4);
439 writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG4);
440 writeb(0x60, ndev->reg_base + BWD_MODPHY_PCSREG6);
441
442 /* Driver waits 100ms to allow the NTB ModPhy to settle */
443 msleep(100);
444
445 /* Clear AER Errors, write to clear */
446 status = readl(ndev->reg_base + BWD_ERRCORSTS_OFFSET);
447 dev_dbg(&ndev->pdev->dev, "ERRCORSTS = %x\n", status);
448 status &= PCI_ERR_COR_REP_ROLL;
449 writel(status, ndev->reg_base + BWD_ERRCORSTS_OFFSET);
450
451 /* Clear unexpected electrical idle event in LTSSM, write to clear */
452 status = readl(ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
453 dev_dbg(&ndev->pdev->dev, "LTSSMERRSTS0 = %x\n", status);
454 status |= BWD_LTSSMERRSTS0_UNEXPECTEDEI;
455 writel(status, ndev->reg_base + BWD_LTSSMERRSTS0_OFFSET);
456
457 /* Clear DeSkew Buffer error, write to clear */
458 status = readl(ndev->reg_base + BWD_DESKEWSTS_OFFSET);
459 dev_dbg(&ndev->pdev->dev, "DESKEWSTS = %x\n", status);
460 status |= BWD_DESKEWSTS_DBERR;
461 writel(status, ndev->reg_base + BWD_DESKEWSTS_OFFSET);
462
463 status = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
464 dev_dbg(&ndev->pdev->dev, "IBSTERRRCRVSTS0 = %x\n", status);
465 status &= BWD_IBIST_ERR_OFLOW;
466 writel(status, ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
467
468 /* Releases the NTB state machine to allow the link to retrain */
469 status = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
470 dev_dbg(&ndev->pdev->dev, "LTSSMSTATEJMP = %x\n", status);
471 status &= ~BWD_LTSSMSTATEJMP_FORCEDETECT;
472 writel(status, ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
473}
474
428static void ntb_link_event(struct ntb_device *ndev, int link_state) 475static void ntb_link_event(struct ntb_device *ndev, int link_state)
429{ 476{
430 unsigned int event; 477 unsigned int event;
@@ -448,13 +495,16 @@ static void ntb_link_event(struct ntb_device *ndev, int link_state)
448 if (rc) 495 if (rc)
449 return; 496 return;
450 } 497 }
498
499 ndev->link_width = (status & NTB_LINK_WIDTH_MASK) >> 4;
500 ndev->link_speed = (status & NTB_LINK_SPEED_MASK);
451 dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n", 501 dev_info(&ndev->pdev->dev, "Link Width %d, Link Speed %d\n",
452 (status & NTB_LINK_WIDTH_MASK) >> 4, 502 ndev->link_width, ndev->link_speed);
453 (status & NTB_LINK_SPEED_MASK));
454 } else { 503 } else {
455 dev_info(&ndev->pdev->dev, "Link Down\n"); 504 dev_info(&ndev->pdev->dev, "Link Down\n");
456 ndev->link_status = NTB_LINK_DOWN; 505 ndev->link_status = NTB_LINK_DOWN;
457 event = NTB_EVENT_HW_LINK_DOWN; 506 event = NTB_EVENT_HW_LINK_DOWN;
507 /* Don't modify link width/speed, we need it in link recovery */
458 } 508 }
459 509
460 /* notify the upper layer if we have an event change */ 510 /* notify the upper layer if we have an event change */
@@ -494,6 +544,47 @@ static int ntb_link_status(struct ntb_device *ndev)
494 return 0; 544 return 0;
495} 545}
496 546
547static void bwd_link_recovery(struct work_struct *work)
548{
549 struct ntb_device *ndev = container_of(work, struct ntb_device,
550 lr_timer.work);
551 u32 status32;
552
553 bwd_recover_link(ndev);
554 /* There is a potential race between the 2 NTB devices recovering at the
555 * same time. If the times are the same, the link will not recover and
556 * the driver will be stuck in this loop forever. Add a random interval
557 * to the recovery time to prevent this race.
558 */
559 msleep(BWD_LINK_RECOVERY_TIME + prandom_u32() % BWD_LINK_RECOVERY_TIME);
560
561 status32 = readl(ndev->reg_base + BWD_LTSSMSTATEJMP_OFFSET);
562 if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT)
563 goto retry;
564
565 status32 = readl(ndev->reg_base + BWD_IBSTERRRCRVSTS0_OFFSET);
566 if (status32 & BWD_IBIST_ERR_OFLOW)
567 goto retry;
568
569 status32 = readl(ndev->reg_ofs.lnk_cntl);
570 if (!(status32 & BWD_CNTL_LINK_DOWN)) {
571 unsigned char speed, width;
572 u16 status16;
573
574 status16 = readw(ndev->reg_ofs.lnk_stat);
575 width = (status16 & NTB_LINK_WIDTH_MASK) >> 4;
576 speed = (status16 & NTB_LINK_SPEED_MASK);
577 if (ndev->link_width != width || ndev->link_speed != speed)
578 goto retry;
579 }
580
581 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
582 return;
583
584retry:
585 schedule_delayed_work(&ndev->lr_timer, NTB_HB_TIMEOUT);
586}
587
497/* BWD doesn't have link status interrupt, poll on that platform */ 588/* BWD doesn't have link status interrupt, poll on that platform */
498static void bwd_link_poll(struct work_struct *work) 589static void bwd_link_poll(struct work_struct *work)
499{ 590{
@@ -509,6 +600,16 @@ static void bwd_link_poll(struct work_struct *work)
509 if (rc) 600 if (rc)
510 dev_err(&ndev->pdev->dev, 601 dev_err(&ndev->pdev->dev,
511 "Error determining link status\n"); 602 "Error determining link status\n");
603
604 /* Check to see if a link error is the cause of the link down */
605 if (ndev->link_status == NTB_LINK_DOWN) {
606 u32 status32 = readl(ndev->reg_base +
607 BWD_LTSSMSTATEJMP_OFFSET);
608 if (status32 & BWD_LTSSMSTATEJMP_FORCEDETECT) {
609 schedule_delayed_work(&ndev->lr_timer, 0);
610 return;
611 }
612 }
512 } 613 }
513 614
514 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); 615 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
@@ -703,6 +804,7 @@ static int ntb_bwd_setup(struct ntb_device *ndev)
703 804
704 /* Since bwd doesn't have a link interrupt, setup a poll timer */ 805 /* Since bwd doesn't have a link interrupt, setup a poll timer */
705 INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll); 806 INIT_DELAYED_WORK(&ndev->hb_timer, bwd_link_poll);
807 INIT_DELAYED_WORK(&ndev->lr_timer, bwd_link_recovery);
706 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT); 808 schedule_delayed_work(&ndev->hb_timer, NTB_HB_TIMEOUT);
707 809
708 return 0; 810 return 0;
@@ -743,8 +845,10 @@ static int ntb_device_setup(struct ntb_device *ndev)
743 845
744static void ntb_device_free(struct ntb_device *ndev) 846static void ntb_device_free(struct ntb_device *ndev)
745{ 847{
746 if (ndev->hw_type == BWD_HW) 848 if (ndev->hw_type == BWD_HW) {
747 cancel_delayed_work_sync(&ndev->hb_timer); 849 cancel_delayed_work_sync(&ndev->hb_timer);
850 cancel_delayed_work_sync(&ndev->lr_timer);
851 }
748} 852}
749 853
750static irqreturn_t bwd_callback_msix_irq(int irq, void *data) 854static irqreturn_t bwd_callback_msix_irq(int irq, void *data)
diff --git a/drivers/ntb/ntb_hw.h b/drivers/ntb/ntb_hw.h
index 72fcb22d7dd8..3a15d492593c 100644
--- a/drivers/ntb/ntb_hw.h
+++ b/drivers/ntb/ntb_hw.h
@@ -125,10 +125,15 @@ struct ntb_device {
125 unsigned char num_msix; 125 unsigned char num_msix;
126 unsigned char bits_per_vector; 126 unsigned char bits_per_vector;
127 unsigned char max_cbs; 127 unsigned char max_cbs;
128 unsigned char link_width;
129 unsigned char link_speed;
128 unsigned char link_status; 130 unsigned char link_status;
131
129 struct delayed_work hb_timer; 132 struct delayed_work hb_timer;
130 unsigned long last_ts; 133 unsigned long last_ts;
131 134
135 struct delayed_work lr_timer;
136
132 struct dentry *debugfs_dir; 137 struct dentry *debugfs_dir;
133}; 138};
134 139
diff --git a/drivers/ntb/ntb_regs.h b/drivers/ntb/ntb_regs.h
index 4ddc590d03a6..03a66ef32a68 100644
--- a/drivers/ntb/ntb_regs.h
+++ b/drivers/ntb/ntb_regs.h
@@ -115,6 +115,7 @@
115#define BWD_MBAR45_OFFSET 0xb020 115#define BWD_MBAR45_OFFSET 0xb020
116#define BWD_DEVCTRL_OFFSET 0xb048 116#define BWD_DEVCTRL_OFFSET 0xb048
117#define BWD_LINK_STATUS_OFFSET 0xb052 117#define BWD_LINK_STATUS_OFFSET 0xb052
118#define BWD_ERRCORSTS_OFFSET 0xb110
118 119
119#define BWD_SBAR2XLAT_OFFSET 0x0008 120#define BWD_SBAR2XLAT_OFFSET 0x0008
120#define BWD_SBAR4XLAT_OFFSET 0x0010 121#define BWD_SBAR4XLAT_OFFSET 0x0010
@@ -132,6 +133,20 @@
132#define BWD_B2B_SPADSEMA_OFFSET 0x80c0 133#define BWD_B2B_SPADSEMA_OFFSET 0x80c0
133#define BWD_B2B_STKYSPAD_OFFSET 0x80c4 134#define BWD_B2B_STKYSPAD_OFFSET 0x80c4
134 135
136#define BWD_MODPHY_PCSREG4 0x1c004
137#define BWD_MODPHY_PCSREG6 0x1c006
138
139#define BWD_IP_BASE 0xC000
140#define BWD_DESKEWSTS_OFFSET (BWD_IP_BASE + 0x3024)
141#define BWD_LTSSMERRSTS0_OFFSET (BWD_IP_BASE + 0x3180)
142#define BWD_LTSSMSTATEJMP_OFFSET (BWD_IP_BASE + 0x3040)
143#define BWD_IBSTERRRCRVSTS0_OFFSET (BWD_IP_BASE + 0x3324)
144
145#define BWD_DESKEWSTS_DBERR (1 << 15)
146#define BWD_LTSSMERRSTS0_UNEXPECTEDEI (1 << 20)
147#define BWD_LTSSMSTATEJMP_FORCEDETECT (1 << 2)
148#define BWD_IBIST_ERR_OFLOW 0x7FFF7FFF
149
135#define NTB_CNTL_BAR23_SNOOP (1 << 2) 150#define NTB_CNTL_BAR23_SNOOP (1 << 2)
136#define NTB_CNTL_BAR45_SNOOP (1 << 6) 151#define NTB_CNTL_BAR45_SNOOP (1 << 6)
137#define BWD_CNTL_LINK_DOWN (1 << 16) 152#define BWD_CNTL_LINK_DOWN (1 << 16)