aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Gregor <john.gregor@qlogic.com>2008-04-17 00:09:24 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-17 00:09:24 -0400
commit58411d1c012dca53ec9107bd98acb63f648e2435 (patch)
treed48edc5c3c64d91311bb4134b83bfe7b62b10ec4
parent6be979d71a5e8720c8560cc58713407947e5f691 (diff)
IB/ipath: Head of Line blocking vs forward progress of user apps
There's a conflict between our need to quiesce PSM-based applications to avoid HoL blocking when the IB link goes down and the apps' desire to remain running so that their quiescence timout mechanism can keep running. The compromise is to STOP the processes for a fixed period of time and then alternate between CONT and STOP until the link is again active. If there are poor interactions with subnet manager configuration at a given site, the interval can be adjusted via a module paramter. Signed-off-by: John Gregor <john.gregor@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c129
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c287
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h31
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h20
7 files changed, 310 insertions, 182 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index d6f69532d83f..7170bd20cfbb 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -66,6 +66,7 @@
66#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */ 66#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
67#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */ 67#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
68#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */ 68#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
69#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */
69 70
70#else /* _IPATH_DEBUGGING */ 71#else /* _IPATH_DEBUGGING */
71 72
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 4137c7770f1b..96a1c4172f87 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -330,6 +330,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
330 struct ipath_devdata *dd; 330 struct ipath_devdata *dd;
331 ssize_t ret = 0; 331 ssize_t ret = 0;
332 u64 val; 332 u64 val;
333 u32 l_state, lt_state; /* LinkState, LinkTrainingState */
333 334
334 if (count != sizeof(dp)) { 335 if (count != sizeof(dp)) {
335 ret = -EINVAL; 336 ret = -EINVAL;
@@ -396,10 +397,17 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
396 ret = -ENODEV; 397 ret = -ENODEV;
397 goto bail; 398 goto bail;
398 } 399 }
399 /* Check link state, but not if we have custom PBC */ 400 /*
400 val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK; 401 * Want to skip check for l_state if using custom PBC,
401 if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT && 402 * because we might be trying to force an SM packet out.
402 val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) { 403 * first-cut, skip _all_ state checking in that case.
404 */
405 val = ipath_ib_state(dd, dd->ipath_lastibcstat);
406 lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
407 l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
408 if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
409 (val != dd->ib_init && val != dd->ib_arm &&
410 val != dd->ib_active))) {
403 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n", 411 ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
404 dd->ipath_unit, (unsigned long long) val); 412 dd->ipath_unit, (unsigned long long) val);
405 ret = -EINVAL; 413 ret = -EINVAL;
@@ -438,6 +446,8 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
438 ret = -EBUSY; 446 ret = -EBUSY;
439 goto bail; 447 goto bail;
440 } 448 }
449 /* disarm it just to be extra sure */
450 ipath_disarm_piobufs(dd, pbufn, 1);
441 451
442 plen >>= 2; /* in dwords */ 452 plen >>= 2; /* in dwords */
443 453
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 7121fe84ff8b..5605f4f27521 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -77,6 +77,11 @@ unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
77module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); 77module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
78MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); 78MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
79 79
80static unsigned ipath_hol_timeout_ms = 13000;
81module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
82MODULE_PARM_DESC(hol_timeout_ms,
83 "duration of user app suspension after link failure");
84
80MODULE_LICENSE("GPL"); 85MODULE_LICENSE("GPL");
81MODULE_AUTHOR("QLogic <support@pathscale.com>"); 86MODULE_AUTHOR("QLogic <support@pathscale.com>");
82MODULE_DESCRIPTION("QLogic InfiniPath driver"); 87MODULE_DESCRIPTION("QLogic InfiniPath driver");
@@ -1670,11 +1675,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1670 ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate " 1675 ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
1671 "is %s\n", dd->ipath_unit, 1676 "is %s\n", dd->ipath_unit,
1672 what[linkcmd], 1677 what[linkcmd],
1673 ipath_ibcstatus_str[ 1678 ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
1674 (ipath_read_kreg64 1679 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
1675 (dd, dd->ipath_kregs->kr_ibcstatus) >>
1676 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1677 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1678 /* flush all queued sends when going to DOWN to be sure that 1680 /* flush all queued sends when going to DOWN to be sure that
1679 * they don't block MAD packets */ 1681 * they don't block MAD packets */
1680 if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN) 1682 if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
@@ -1925,9 +1927,8 @@ static void ipath_run_led_override(unsigned long opaque)
1925 */ 1927 */
1926 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 1928 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
1927 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & 1929 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1928 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; 1930 dd->ibcs_lts_mask;
1929 lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) & 1931 lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;
1930 INFINIPATH_IBCS_LINKSTATE_MASK;
1931 1932
1932 dd->ipath_f_setextled(dd, lstate, ltstate); 1933 dd->ipath_f_setextled(dd, lstate, ltstate);
1933 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); 1934 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
@@ -1988,6 +1989,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1988 1989
1989 ipath_dbg("Shutting down the device\n"); 1990 ipath_dbg("Shutting down the device\n");
1990 1991
1992 ipath_hol_up(dd); /* make sure user processes aren't suspended */
1993
1991 dd->ipath_flags |= IPATH_LINKUNK; 1994 dd->ipath_flags |= IPATH_LINKUNK;
1992 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | 1995 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
1993 IPATH_LINKINIT | IPATH_LINKARMED | 1996 IPATH_LINKINIT | IPATH_LINKARMED |
@@ -2037,6 +2040,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
2037 */ 2040 */
2038 dd->ipath_f_quiet_serdes(dd); 2041 dd->ipath_f_quiet_serdes(dd);
2039 2042
2043 /* stop all the timers that might still be running */
2044 del_timer_sync(&dd->ipath_hol_timer);
2040 if (dd->ipath_stats_timer_active) { 2045 if (dd->ipath_stats_timer_active) {
2041 del_timer_sync(&dd->ipath_stats_timer); 2046 del_timer_sync(&dd->ipath_stats_timer);
2042 dd->ipath_stats_timer_active = 0; 2047 dd->ipath_stats_timer_active = 0;
@@ -2252,6 +2257,114 @@ bail:
2252 return ret; 2257 return ret;
2253} 2258}
2254 2259
2260/*
2261 * send a signal to all the processes that have the driver open
2262 * through the normal interfaces (i.e., everything other than diags
2263 * interface). Returns number of signalled processes.
2264 */
2265static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2266{
2267 int i, sub, any = 0;
2268 pid_t pid;
2269
2270 if (!dd->ipath_pd)
2271 return 0;
2272 for (i = 1; i < dd->ipath_cfgports; i++) {
2273 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
2274 !dd->ipath_pd[i]->port_pid)
2275 continue;
2276 pid = dd->ipath_pd[i]->port_pid;
2277 dev_info(&dd->pcidev->dev, "context %d in use "
2278 "(PID %u), sending signal %d\n",
2279 i, pid, sig);
2280 kill_proc(pid, sig, 1);
2281 any++;
2282 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
2283 pid = dd->ipath_pd[i]->port_subpid[sub];
2284 if (!pid)
2285 continue;
2286 dev_info(&dd->pcidev->dev, "sub-context "
2287 "%d:%d in use (PID %u), sending "
2288 "signal %d\n", i, sub, pid, sig);
2289 kill_proc(pid, sig, 1);
2290 any++;
2291 }
2292 }
2293 return any;
2294}
2295
2296static void ipath_hol_signal_down(struct ipath_devdata *dd)
2297{
2298 if (ipath_signal_procs(dd, SIGSTOP))
2299 ipath_dbg("Stopped some processes\n");
2300 ipath_cancel_sends(dd, 1);
2301}
2302
2303
2304static void ipath_hol_signal_up(struct ipath_devdata *dd)
2305{
2306 if (ipath_signal_procs(dd, SIGCONT))
2307 ipath_dbg("Continued some processes\n");
2308}
2309
2310/*
2311 * link is down, stop any users processes, and flush pending sends
2312 * to prevent HoL blocking, then start the HoL timer that
2313 * periodically continues, then stop procs, so they can detect
2314 * link down if they want, and do something about it.
2315 * Timer may already be running, so use __mod_timer, not add_timer.
2316 */
2317void ipath_hol_down(struct ipath_devdata *dd)
2318{
2319 dd->ipath_hol_state = IPATH_HOL_DOWN;
2320 ipath_hol_signal_down(dd);
2321 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2322 dd->ipath_hol_timer.expires = jiffies +
2323 msecs_to_jiffies(ipath_hol_timeout_ms);
2324 __mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
2325}
2326
2327/*
2328 * link is up, continue any user processes, and ensure timer
2329 * is a nop, if running. Let timer keep running, if set; it
2330 * will nop when it sees the link is up
2331 */
2332void ipath_hol_up(struct ipath_devdata *dd)
2333{
2334 ipath_hol_signal_up(dd);
2335 dd->ipath_hol_state = IPATH_HOL_UP;
2336}
2337
2338/*
2339 * toggle the running/not running state of user proceses
2340 * to prevent HoL blocking on chip resources, but still allow
2341 * user processes to do link down special case handling.
2342 * Should only be called via the timer
2343 */
2344void ipath_hol_event(unsigned long opaque)
2345{
2346 struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
2347
2348 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
2349 && dd->ipath_hol_state != IPATH_HOL_UP) {
2350 dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
2351 ipath_dbg("Stopping processes\n");
2352 ipath_hol_signal_down(dd);
2353 } else { /* may do "extra" if also in ipath_hol_up() */
2354 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
2355 ipath_dbg("Continuing processes\n");
2356 ipath_hol_signal_up(dd);
2357 }
2358 if (dd->ipath_hol_state == IPATH_HOL_UP)
2359 ipath_dbg("link's up, don't resched timer\n");
2360 else {
2361 dd->ipath_hol_timer.expires = jiffies +
2362 msecs_to_jiffies(ipath_hol_timeout_ms);
2363 __mod_timer(&dd->ipath_hol_timer,
2364 dd->ipath_hol_timer.expires);
2365 }
2366}
2367
2255int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) 2368int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
2256{ 2369{
2257 u64 val; 2370 u64 val;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index f0d7848d9bb1..bed0927916cc 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -908,6 +908,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
908 dd->ipath_stats_timer_active = 1; 908 dd->ipath_stats_timer_active = 1;
909 } 909 }
910 910
911 /* Set up HoL state */
912 init_timer(&dd->ipath_hol_timer);
913 dd->ipath_hol_timer.function = ipath_hol_event;
914 dd->ipath_hol_timer.data = (unsigned long)dd;
915 dd->ipath_hol_state = IPATH_HOL_UP;
916
911done: 917done:
912 if (!ret) { 918 if (!ret) {
913 *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT; 919 *dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index ed2a227ceced..dde5dfc9fcf5 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -32,6 +32,7 @@
32 */ 32 */
33 33
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/delay.h>
35 36
36#include "ipath_kernel.h" 37#include "ipath_kernel.h"
37#include "ipath_verbs.h" 38#include "ipath_verbs.h"
@@ -256,24 +257,20 @@ void ipath_format_hwerrors(u64 hwerrs,
256} 257}
257 258
258/* return the strings for the most common link states */ 259/* return the strings for the most common link states */
259static char *ib_linkstate(u32 linkstate) 260static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
260{ 261{
261 char *ret; 262 char *ret;
263 u32 state;
262 264
263 switch (linkstate) { 265 state = ipath_ib_state(dd, ibcs);
264 case IPATH_IBSTATE_INIT: 266 if (state == dd->ib_init)
265 ret = "Init"; 267 ret = "Init";
266 break; 268 else if (state == dd->ib_arm)
267 case IPATH_IBSTATE_ARM:
268 ret = "Arm"; 269 ret = "Arm";
269 break; 270 else if (state == dd->ib_active)
270 case IPATH_IBSTATE_ACTIVE:
271 ret = "Active"; 271 ret = "Active";
272 break; 272 else
273 default:
274 ret = "Down"; 273 ret = "Down";
275 }
276
277 return ret; 274 return ret;
278} 275}
279 276
@@ -288,103 +285,137 @@ void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
288} 285}
289 286
290static void handle_e_ibstatuschanged(struct ipath_devdata *dd, 287static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
291 ipath_err_t errs, int noprint) 288 ipath_err_t errs)
292{ 289{
293 u64 val; 290 u32 ltstate, lstate, ibstate, lastlstate;
294 u32 ltstate, lstate; 291 u32 init = dd->ib_init;
292 u32 arm = dd->ib_arm;
293 u32 active = dd->ib_active;
294 const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
295
296 lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
297 ibstate = ipath_ib_state(dd, ibcs);
298 /* linkstate at last interrupt */
299 lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
300 ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
295 301
296 /* 302 /*
297 * even if diags are enabled, we want to notice LINKINIT, etc. 303 * if linkstate transitions into INIT from any of the various down
298 * We just don't want to change the LED state, or 304 * states, or if it transitions from any of the up (INIT or better)
299 * dd->ipath_kregs->kr_ibcctrl 305 * states into any of the down states (except link recovery), then
306 * call the chip-specific code to take appropriate actions.
300 */ 307 */
301 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 308 if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
302 lstate = val & IPATH_IBSTATE_MASK; 309 lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
310 /* transitioned to UP */
311 if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
312 ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
313 goto skip_ibchange; /* chip-code handled */
314 }
315 } else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
316 (dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
317 ltstate <= INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE &&
318 ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
319 int handled;
320 handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
321 dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
322 if (handled) {
323 ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
324 goto skip_ibchange; /* chip-code handled */
325 }
326 }
303 327
304 /* 328 /*
305 * this is confusing enough when it happens that I want to always put it 329 * Significant enough to always print and get into logs, if it was
306 * on the console and in the logs. If it was a requested state change, 330 * unexpected. If it was a requested state change, we'll have
307 * we'll have already cleared the flags, so we won't print this warning 331 * already cleared the flags, so we won't print this warning
308 */ 332 */
309 if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE) 333 if ((ibstate != arm && ibstate != active) &&
310 && (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) { 334 (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
311 dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n", 335 dev_info(&dd->pcidev->dev, "Link state changed from %s "
312 (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE", 336 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
313 ib_linkstate(lstate)); 337 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
314 /*
315 * Flush all queued sends when link went to DOWN or INIT,
316 * to be sure that they don't block SMA and other MAD packets
317 */
318 ipath_cancel_sends(dd, 1);
319 }
320 else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
321 lstate == IPATH_IBSTATE_ACTIVE) {
322 /*
323 * only print at SMA if there is a change, debug if not
324 * (sometimes we want to know that, usually not).
325 */
326 if (lstate == ((unsigned) dd->ipath_lastibcstat
327 & IPATH_IBSTATE_MASK)) {
328 ipath_dbg("Status change intr but no change (%s)\n",
329 ib_linkstate(lstate));
330 }
331 else
332 ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
333 "was %s\n", dd->ipath_unit,
334 ib_linkstate(lstate),
335 ib_linkstate((unsigned)
336 dd->ipath_lastibcstat
337 & IPATH_IBSTATE_MASK));
338 }
339 else {
340 lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
341 if (lstate == IPATH_IBSTATE_INIT ||
342 lstate == IPATH_IBSTATE_ARM ||
343 lstate == IPATH_IBSTATE_ACTIVE)
344 ipath_cdbg(VERBOSE, "Unit %u link state down"
345 " (state 0x%x), from %s\n",
346 dd->ipath_unit,
347 (u32)val & IPATH_IBSTATE_MASK,
348 ib_linkstate(lstate));
349 else
350 ipath_cdbg(VERBOSE, "Unit %u link state changed "
351 "to 0x%x from down (%x)\n",
352 dd->ipath_unit, (u32) val, lstate);
353 } 338 }
354 ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
355 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
356 lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
357 INFINIPATH_IBCS_LINKSTATE_MASK;
358 339
359 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE || 340 if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
360 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) { 341 ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
361 u32 last_ltstate; 342 u32 lastlts;
362 343 lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
363 /* 344 /*
364 * Ignore cycling back and forth from Polling.Active 345 * Ignore cycling back and forth from Polling.Active to
365 * to Polling.Quiet while waiting for the other end of 346 * Polling.Quiet while waiting for the other end of the link
366 * the link to come up. We will cycle back and forth 347 * to come up, except to try and decide if we are connected
367 * between them if no cable is plugged in, 348 * to a live IB device or not. We will cycle back and
368 * the other device is powered off or disabled, etc. 349 * forth between them if no cable is plugged in, the other
350 * device is powered off or disabled, etc.
369 */ 351 */
370 last_ltstate = (dd->ipath_lastibcstat >> 352 if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
371 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) 353 lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
372 & INFINIPATH_IBCS_LINKTRAININGSTATE_MASK; 354 if (++dd->ipath_ibpollcnt == 40) {
373 if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
374 || last_ltstate ==
375 INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
376 if (dd->ipath_ibpollcnt > 40) {
377 dd->ipath_flags |= IPATH_NOCABLE; 355 dd->ipath_flags |= IPATH_NOCABLE;
378 *dd->ipath_statusp |= 356 *dd->ipath_statusp |=
379 IPATH_STATUS_IB_NOCABLE; 357 IPATH_STATUS_IB_NOCABLE;
380 } else 358 ipath_cdbg(LINKVERB, "Set NOCABLE\n");
381 dd->ipath_ibpollcnt++; 359 }
360 ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
361 ipath_ibcstatus_str[ltstate], ibstate);
382 goto skip_ibchange; 362 goto skip_ibchange;
383 } 363 }
384 } 364 }
385 dd->ipath_ibpollcnt = 0; /* some state other than 2 or 3 */ 365
366 dd->ipath_ibpollcnt = 0; /* not poll*, now */
386 ipath_stats.sps_iblink++; 367 ipath_stats.sps_iblink++;
387 if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) { 368
369 if (ibstate == init || ibstate == arm || ibstate == active) {
370 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
371 if (ibstate == init || ibstate == arm) {
372 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
373 if (dd->ipath_flags & IPATH_LINKACTIVE)
374 signal_ib_event(dd, IB_EVENT_PORT_ERR);
375 }
376 if (ibstate == arm) {
377 dd->ipath_flags |= IPATH_LINKARMED;
378 dd->ipath_flags &= ~(IPATH_LINKUNK |
379 IPATH_LINKINIT | IPATH_LINKDOWN |
380 IPATH_LINKACTIVE | IPATH_NOCABLE);
381 ipath_hol_down(dd);
382 } else if (ibstate == init) {
383 /*
384 * set INIT and DOWN. Down is checked by
385 * most of the other code, but INIT is
386 * useful to know in a few places.
387 */
388 dd->ipath_flags |= IPATH_LINKINIT |
389 IPATH_LINKDOWN;
390 dd->ipath_flags &= ~(IPATH_LINKUNK |
391 IPATH_LINKARMED | IPATH_LINKACTIVE |
392 IPATH_NOCABLE);
393 ipath_hol_down(dd);
394 } else { /* active */
395 *dd->ipath_statusp |=
396 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
397 dd->ipath_flags |= IPATH_LINKACTIVE;
398 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
399 | IPATH_LINKDOWN | IPATH_LINKARMED |
400 IPATH_NOCABLE);
401 signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
402 /* LED active not handled in chip _f_updown */
403 dd->ipath_f_setextled(dd, lstate, ltstate);
404 ipath_hol_up(dd);
405 }
406
407 /*
408 * print after we've already done the work, so as not to
409 * delay the state changes and notifications, for debugging
410 */
411 if (lstate == lastlstate)
412 ipath_cdbg(LINKVERB, "Unchanged from last: %s "
413 "(%x)\n", ib_linkstate(dd, ibcs), ibstate);
414 else
415 ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
416 dd->ipath_unit, ib_linkstate(dd, ibcs),
417 ipath_ibcstatus_str[ltstate], ibstate);
418 } else { /* down */
388 if (dd->ipath_flags & IPATH_LINKACTIVE) 419 if (dd->ipath_flags & IPATH_LINKACTIVE)
389 signal_ib_event(dd, IB_EVENT_PORT_ERR); 420 signal_ib_event(dd, IB_EVENT_PORT_ERR);
390 dd->ipath_flags |= IPATH_LINKDOWN; 421 dd->ipath_flags |= IPATH_LINKDOWN;
@@ -393,65 +424,22 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
393 IPATH_LINKARMED); 424 IPATH_LINKARMED);
394 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 425 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
395 dd->ipath_lli_counter = 0; 426 dd->ipath_lli_counter = 0;
396 if (!noprint) {
397 if (((dd->ipath_lastibcstat >>
398 INFINIPATH_IBCS_LINKSTATE_SHIFT) &
399 INFINIPATH_IBCS_LINKSTATE_MASK)
400 == INFINIPATH_IBCS_L_STATE_ACTIVE)
401 /* if from up to down be more vocal */
402 ipath_cdbg(VERBOSE,
403 "Unit %u link now down (%s)\n",
404 dd->ipath_unit,
405 ipath_ibcstatus_str[ltstate]);
406 else
407 ipath_cdbg(VERBOSE, "Unit %u link is "
408 "down (%s)\n", dd->ipath_unit,
409 ipath_ibcstatus_str[ltstate]);
410 }
411 427
412 dd->ipath_f_setextled(dd, lstate, ltstate); 428 if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
413 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) { 429 ipath_cdbg(VERBOSE, "Unit %u link state down "
414 dd->ipath_flags |= IPATH_LINKACTIVE; 430 "(state 0x%x), from %s\n",
415 dd->ipath_flags &= 431 dd->ipath_unit, lstate,
416 ~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN | 432 ib_linkstate(dd, dd->ipath_lastibcstat));
417 IPATH_LINKARMED | IPATH_NOCABLE); 433 else
418 *dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE; 434 ipath_cdbg(LINKVERB, "Unit %u link state changed "
419 *dd->ipath_statusp |= 435 "to %s (0x%x) from down (%x)\n",
420 IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF; 436 dd->ipath_unit,
421 dd->ipath_f_setextled(dd, lstate, ltstate); 437 ipath_ibcstatus_str[ltstate],
422 signal_ib_event(dd, IB_EVENT_PORT_ACTIVE); 438 ibstate, lastlstate);
423 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
424 if (dd->ipath_flags & IPATH_LINKACTIVE)
425 signal_ib_event(dd, IB_EVENT_PORT_ERR);
426 /*
427 * set INIT and DOWN. Down is checked by most of the other
428 * code, but INIT is useful to know in a few places.
429 */
430 dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
431 dd->ipath_flags &=
432 ~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
433 | IPATH_NOCABLE);
434 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
435 | IPATH_STATUS_IB_READY);
436 dd->ipath_f_setextled(dd, lstate, ltstate);
437 } else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
438 if (dd->ipath_flags & IPATH_LINKACTIVE)
439 signal_ib_event(dd, IB_EVENT_PORT_ERR);
440 dd->ipath_flags |= IPATH_LINKARMED;
441 dd->ipath_flags &=
442 ~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
443 IPATH_LINKACTIVE | IPATH_NOCABLE);
444 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
445 | IPATH_STATUS_IB_READY);
446 dd->ipath_f_setextled(dd, lstate, ltstate);
447 } else {
448 if (!noprint)
449 ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
450 dd->ipath_unit,
451 ipath_ibcstatus_str[ltstate], ltstate);
452 } 439 }
440
453skip_ibchange: 441skip_ibchange:
454 dd->ipath_lastibcstat = val; 442 dd->ipath_lastibcstat = ibcs;
455} 443}
456 444
457static void handle_supp_msgs(struct ipath_devdata *dd, 445static void handle_supp_msgs(struct ipath_devdata *dd,
@@ -743,16 +731,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
743 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT 731 dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
744 | IPATH_LINKARMED | IPATH_LINKACTIVE); 732 | IPATH_LINKARMED | IPATH_LINKACTIVE);
745 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; 733 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
746 if (!noprint) {
747 u64 st = ipath_read_kreg64(
748 dd, dd->ipath_kregs->kr_ibcstatus);
749 734
750 ipath_dbg("Lost link, link now down (%s)\n", 735 ipath_dbg("Lost link, link now down (%s)\n",
751 ipath_ibcstatus_str[st & 0xf]); 736 ipath_ibcstatus_str[ipath_read_kreg64(dd,
752 } 737 dd->ipath_kregs->kr_ibcstatus) & 0xf]);
753 } 738 }
754 if (errs & INFINIPATH_E_IBSTATUSCHANGED) 739 if (errs & INFINIPATH_E_IBSTATUSCHANGED)
755 handle_e_ibstatuschanged(dd, errs, noprint); 740 handle_e_ibstatuschanged(dd, errs);
756 741
757 if (errs & INFINIPATH_E_RESET) { 742 if (errs & INFINIPATH_E_RESET) {
758 if (!noprint) 743 if (!noprint)
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 70c0a0dd6939..caee731b670f 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -427,6 +427,11 @@ struct ipath_devdata {
427 427
428 unsigned long ipath_ureg_align; /* user register alignment */ 428 unsigned long ipath_ureg_align; /* user register alignment */
429 429
430 /* HoL blocking / user app forward-progress state */
431 unsigned ipath_hol_state;
432 unsigned ipath_hol_next;
433 struct timer_list ipath_hol_timer;
434
430 /* 435 /*
431 * Shadow copies of registers; size indicates read access size. 436 * Shadow copies of registers; size indicates read access size.
432 * Most of them are readonly, but some are write-only register, 437 * Most of them are readonly, but some are write-only register,
@@ -706,6 +711,13 @@ struct ipath_devdata {
706 u16 ipath_jint_max_packets; /* max packets across all ports */ 711 u16 ipath_jint_max_packets; /* max packets across all ports */
707}; 712};
708 713
714/* ipath_hol_state values (stopping/starting user proc, send flushing) */
715#define IPATH_HOL_UP 0
716#define IPATH_HOL_DOWN 1
717/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
718#define IPATH_HOL_DOWNSTOP 0
719#define IPATH_HOL_DOWNCONT 1
720
709/* Private data for file operations */ 721/* Private data for file operations */
710struct ipath_filedata { 722struct ipath_filedata {
711 struct ipath_portdata *pd; 723 struct ipath_portdata *pd;
@@ -775,6 +787,9 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8);
775int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv); 787int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
776void ipath_enable_armlaunch(struct ipath_devdata *); 788void ipath_enable_armlaunch(struct ipath_devdata *);
777void ipath_disable_armlaunch(struct ipath_devdata *); 789void ipath_disable_armlaunch(struct ipath_devdata *);
790void ipath_hol_down(struct ipath_devdata *);
791void ipath_hol_up(struct ipath_devdata *);
792void ipath_hol_event(unsigned long);
778 793
779/* for use in system calls, where we want to know device type, etc. */ 794/* for use in system calls, where we want to know device type, etc. */
780#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd 795#define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
@@ -830,6 +845,7 @@ void ipath_disable_armlaunch(struct ipath_devdata *);
830 /* Suppress heartbeat, even if turning off loopback */ 845 /* Suppress heartbeat, even if turning off loopback */
831#define IPATH_NO_HRTBT 0x1000000 846#define IPATH_NO_HRTBT 0x1000000
832#define IPATH_HAS_MULT_IB_SPEED 0x8000000 847#define IPATH_HAS_MULT_IB_SPEED 0x8000000
848#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
833 849
834/* Bits in GPIO for the added interrupts */ 850/* Bits in GPIO for the added interrupts */
835#define IPATH_GPIO_PORT0_BIT 2 851#define IPATH_GPIO_PORT0_BIT 2
@@ -1030,6 +1046,21 @@ static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
1030} 1046}
1031 1047
1032/* 1048/*
1049 * from contents of IBCStatus (or a saved copy), return logical link state
1050 * combination of link state and linktraining state (down, active, init,
1051 * arm, etc.
1052 */
1053static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
1054{
1055 u32 ibs;
1056 ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1057 dd->ibcs_lts_mask;
1058 ibs |= (u32)(ibcs &
1059 (INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
1060 return ibs;
1061}
1062
1063/*
1033 * sysfs interface. 1064 * sysfs interface.
1034 */ 1065 */
1035 1066
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index cb19ea260837..16d0d74f1dda 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -200,7 +200,6 @@
200#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL 200#define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
201 201
202/* kr_ibcstatus bits */ 202/* kr_ibcstatus bits */
203#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
204#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0 203#define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
205#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7 204#define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
206#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4 205#define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
@@ -221,30 +220,13 @@
221#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c 220#define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN 0x0c
222#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e 221#define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT 0x0e
223#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f 222#define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE 0x0f
224/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */ 223/* link state machine states (shift by ibcs_ls_shift) */
225#define INFINIPATH_IBCS_L_STATE_DOWN 0x0 224#define INFINIPATH_IBCS_L_STATE_DOWN 0x0
226#define INFINIPATH_IBCS_L_STATE_INIT 0x1 225#define INFINIPATH_IBCS_L_STATE_INIT 0x1
227#define INFINIPATH_IBCS_L_STATE_ARM 0x2 226#define INFINIPATH_IBCS_L_STATE_ARM 0x2
228#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3 227#define INFINIPATH_IBCS_L_STATE_ACTIVE 0x3
229#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4 228#define INFINIPATH_IBCS_L_STATE_ACT_DEFER 0x4
230 229
231/* combination link status states that we use with some frequency */
232#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
233 << INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
234 (INFINIPATH_IBCS_LINKSTATE_MASK \
235 <<INFINIPATH_IBCS_LINKSTATE_SHIFT))
236#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
237 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
238 (INFINIPATH_IBCS_LT_STATE_LINKUP \
239 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
240#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
241 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
242 (INFINIPATH_IBCS_LT_STATE_LINKUP \
243 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
244#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
245 << INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
246 (INFINIPATH_IBCS_LT_STATE_LINKUP \
247 <<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
248 230
249/* kr_extstatus bits */ 231/* kr_extstatus bits */
250#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1 232#define INFINIPATH_EXTS_SERDESPLLLOCK 0x1