aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/usb/host/xhci-ring.c
diff options
context:
space:
mode:
authorSarah Sharp <sarah.a.sharp@linux.intel.com>2009-10-27 13:57:01 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2009-12-11 14:55:17 -0500
commit6f5165cf989387e84ef23122330b27cca1cbe831 (patch)
tree44ff1ea0590b00f2851f50ffa2cf9954eb70a767 /drivers/usb/host/xhci-ring.c
parent4f0f0baef017dfd5d62b749716ab980a825e1071 (diff)
USB: xhci: Add watchdog timer for URB cancellation.
In order to giveback a canceled URB, we must ensure that the xHCI hardware will not access the buffer in an URB. We can't modify the buffer pointers on endpoint rings without issuing and waiting for a stop endpoint command. Since URBs can be canceled in interrupt context, we can't wait on that command. The old code trusted that the host controller would respond to the command, and would giveback the URBs in the event handler. If the hardware never responds to the stop endpoint command, the URBs will never be completed, and we might hang the USB subsystem. Implement a watchdog timer that is spawned whenever a stop endpoint command is queued. If a stop endpoint command event is found on the event ring during an interrupt, we need to stop the watchdog timer with del_timer(). Since del_timer() can fail if the timer is running and waiting on the xHCI lock, we need a way to signal to the timer that everything is fine and it should exit. If we simply clear EP_HALT_PENDING, a new stop endpoint command could sneak in and set it before the watchdog timer can grab the lock. Instead we use a combination of the EP_HALT_PENDING flag and a counter for the number of pending stop endpoint commands (xhci_virt_ep->stop_cmds_pending). If we need to cancel the watchdog timer and del_timer() succeeds, we decrement the number of pending stop endpoint commands. If del_timer() fails, we leave the number of pending stop endpoint commands alone. In either case, we clear the EP_HALT_PENDING flag. The timer will decrement the number of pending stop endpoint commands once it obtains the lock. If the timer is the tail end of the last stop endpoint command (xhci_virt_ep->stop_cmds_pending == 0), and the endpoint's command is still pending (EP_HALT_PENDING is set), we assume the host is dying. The watchdog timer will set XHCI_STATE_DYING, try to halt the xHCI host, and give back all pending URBs. Various other places in the driver need to check whether the xHCI host is dying. If the interrupt handler ever notices, it should immediately stop processing events. The URB enqueue function should also return -ESHUTDOWN. The URB dequeue function should simply return the value of usb_hcd_check_unlink_urb() and the watchdog timer will take care of giving the URB back. When a device is disconnected, the xHCI hardware structures should be freed without issuing a disable slot command (since the hardware probably won't respond to it anyway). The debugging polling loop should stop polling if the host is dying. When a device is disconnected, any pending watchdog timers are killed with del_timer_sync(). It must be synchronous so that the watchdog timer doesn't attempt to access the freed endpoint structures. Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/usb/host/xhci-ring.c')
-rw-r--r--drivers/usb/host/xhci-ring.c170
1 files changed, 160 insertions, 10 deletions
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 184e8b6f30b2..9541e88df68f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -475,6 +475,35 @@ void xhci_queue_new_dequeue_state(struct xhci_hcd *xhci,
475 ep->ep_state |= SET_DEQ_PENDING; 475 ep->ep_state |= SET_DEQ_PENDING;
476} 476}
477 477
478static inline void xhci_stop_watchdog_timer_in_irq(struct xhci_hcd *xhci,
479 struct xhci_virt_ep *ep)
480{
481 ep->ep_state &= ~EP_HALT_PENDING;
482 /* Can't del_timer_sync in interrupt, so we attempt to cancel. If the
483 * timer is running on another CPU, we don't decrement stop_cmds_pending
484 * (since we didn't successfully stop the watchdog timer).
485 */
486 if (del_timer(&ep->stop_cmd_timer))
487 ep->stop_cmds_pending--;
488}
489
490/* Must be called with xhci->lock held in interrupt context */
491static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci,
492 struct xhci_td *cur_td, int status, char *adjective)
493{
494 struct usb_hcd *hcd = xhci_to_hcd(xhci);
495
496 cur_td->urb->hcpriv = NULL;
497 usb_hcd_unlink_urb_from_ep(hcd, cur_td->urb);
498 xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, cur_td->urb);
499
500 spin_unlock(&xhci->lock);
501 usb_hcd_giveback_urb(hcd, cur_td->urb, status);
502 kfree(cur_td);
503 spin_lock(&xhci->lock);
504 xhci_dbg(xhci, "%s URB given back\n", adjective);
505}
506
478/* 507/*
479 * When we get a command completion for a Stop Endpoint Command, we need to 508 * When we get a command completion for a Stop Endpoint Command, we need to
480 * unlink any cancelled TDs from the ring. There are two ways to do that: 509 * unlink any cancelled TDs from the ring. There are two ways to do that:
@@ -508,7 +537,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
508 ep_ring = ep->ring; 537 ep_ring = ep->ring;
509 538
510 if (list_empty(&ep->cancelled_td_list)) { 539 if (list_empty(&ep->cancelled_td_list)) {
511 ep->ep_state &= ~EP_HALT_PENDING; 540 xhci_stop_watchdog_timer_in_irq(xhci, ep);
512 ring_ep_doorbell(xhci, slot_id, ep_index); 541 ring_ep_doorbell(xhci, slot_id, ep_index);
513 return; 542 return;
514 } 543 }
@@ -540,7 +569,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
540 list_del(&cur_td->td_list); 569 list_del(&cur_td->td_list);
541 } 570 }
542 last_unlinked_td = cur_td; 571 last_unlinked_td = cur_td;
543 ep->ep_state &= ~EP_HALT_PENDING; 572 xhci_stop_watchdog_timer_in_irq(xhci, ep);
544 573
545 /* If necessary, queue a Set Transfer Ring Dequeue Pointer command */ 574 /* If necessary, queue a Set Transfer Ring Dequeue Pointer command */
546 if (deq_state.new_deq_ptr && deq_state.new_deq_seg) { 575 if (deq_state.new_deq_ptr && deq_state.new_deq_seg) {
@@ -568,23 +597,136 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci,
568 hcd_stat_update(xhci->tp_stat, cur_td->urb->actual_length, 597 hcd_stat_update(xhci->tp_stat, cur_td->urb->actual_length,
569 ktime_sub(stop_time, cur_td->start_time)); 598 ktime_sub(stop_time, cur_td->start_time));
570#endif 599#endif
571 cur_td->urb->hcpriv = NULL;
572 usb_hcd_unlink_urb_from_ep(xhci_to_hcd(xhci), cur_td->urb);
573
574 xhci_dbg(xhci, "Giveback cancelled URB %p\n", cur_td->urb);
575 spin_unlock(&xhci->lock);
576 /* Doesn't matter what we pass for status, since the core will 600 /* Doesn't matter what we pass for status, since the core will
577 * just overwrite it (because the URB has been unlinked). 601 * just overwrite it (because the URB has been unlinked).
578 */ 602 */
579 usb_hcd_giveback_urb(xhci_to_hcd(xhci), cur_td->urb, 0); 603 xhci_giveback_urb_in_irq(xhci, cur_td, 0, "cancelled");
580 kfree(cur_td);
581 604
582 spin_lock(&xhci->lock); 605 /* Stop processing the cancelled list if the watchdog timer is
606 * running.
607 */
608 if (xhci->xhc_state & XHCI_STATE_DYING)
609 return;
583 } while (cur_td != last_unlinked_td); 610 } while (cur_td != last_unlinked_td);
584 611
585 /* Return to the event handler with xhci->lock re-acquired */ 612 /* Return to the event handler with xhci->lock re-acquired */
586} 613}
587 614
615/* Watchdog timer function for when a stop endpoint command fails to complete.
616 * In this case, we assume the host controller is broken or dying or dead. The
617 * host may still be completing some other events, so we have to be careful to
618 * let the event ring handler and the URB dequeueing/enqueueing functions know
619 * through xhci->state.
620 *
621 * The timer may also fire if the host takes a very long time to respond to the
622 * command, and the stop endpoint command completion handler cannot delete the
623 * timer before the timer function is called. Another endpoint cancellation may
624 * sneak in before the timer function can grab the lock, and that may queue
625 * another stop endpoint command and add the timer back. So we cannot use a
626 * simple flag to say whether there is a pending stop endpoint command for a
627 * particular endpoint.
628 *
629 * Instead we use a combination of that flag and a counter for the number of
630 * pending stop endpoint commands. If the timer is the tail end of the last
631 * stop endpoint command, and the endpoint's command is still pending, we assume
632 * the host is dying.
633 */
634void xhci_stop_endpoint_command_watchdog(unsigned long arg)
635{
636 struct xhci_hcd *xhci;
637 struct xhci_virt_ep *ep;
638 struct xhci_virt_ep *temp_ep;
639 struct xhci_ring *ring;
640 struct xhci_td *cur_td;
641 int ret, i, j;
642
643 ep = (struct xhci_virt_ep *) arg;
644 xhci = ep->xhci;
645
646 spin_lock(&xhci->lock);
647
648 ep->stop_cmds_pending--;
649 if (xhci->xhc_state & XHCI_STATE_DYING) {
650 xhci_dbg(xhci, "Stop EP timer ran, but another timer marked "
651 "xHCI as DYING, exiting.\n");
652 spin_unlock(&xhci->lock);
653 return;
654 }
655 if (!(ep->stop_cmds_pending == 0 && (ep->ep_state & EP_HALT_PENDING))) {
656 xhci_dbg(xhci, "Stop EP timer ran, but no command pending, "
657 "exiting.\n");
658 spin_unlock(&xhci->lock);
659 return;
660 }
661
662 xhci_warn(xhci, "xHCI host not responding to stop endpoint command.\n");
663 xhci_warn(xhci, "Assuming host is dying, halting host.\n");
664 /* Oops, HC is dead or dying or at least not responding to the stop
665 * endpoint command.
666 */
667 xhci->xhc_state |= XHCI_STATE_DYING;
668 /* Disable interrupts from the host controller and start halting it */
669 xhci_quiesce(xhci);
670 spin_unlock(&xhci->lock);
671
672 ret = xhci_halt(xhci);
673
674 spin_lock(&xhci->lock);
675 if (ret < 0) {
676 /* This is bad; the host is not responding to commands and it's
677 * not allowing itself to be halted. At least interrupts are
678 * disabled, so we can set HC_STATE_HALT and notify the
679 * USB core. But if we call usb_hc_died(), it will attempt to
680 * disconnect all device drivers under this host. Those
681 * disconnect() methods will wait for all URBs to be unlinked,
682 * so we must complete them.
683 */
684 xhci_warn(xhci, "Non-responsive xHCI host is not halting.\n");
685 xhci_warn(xhci, "Completing active URBs anyway.\n");
686 /* We could turn all TDs on the rings to no-ops. This won't
687 * help if the host has cached part of the ring, and is slow if
688 * we want to preserve the cycle bit. Skip it and hope the host
689 * doesn't touch the memory.
690 */
691 }
692 for (i = 0; i < MAX_HC_SLOTS; i++) {
693 if (!xhci->devs[i])
694 continue;
695 for (j = 0; j < 31; j++) {
696 temp_ep = &xhci->devs[i]->eps[j];
697 ring = temp_ep->ring;
698 if (!ring)
699 continue;
700 xhci_dbg(xhci, "Killing URBs for slot ID %u, "
701 "ep index %u\n", i, j);
702 while (!list_empty(&ring->td_list)) {
703 cur_td = list_first_entry(&ring->td_list,
704 struct xhci_td,
705 td_list);
706 list_del(&cur_td->td_list);
707 if (!list_empty(&cur_td->cancelled_td_list))
708 list_del(&cur_td->cancelled_td_list);
709 xhci_giveback_urb_in_irq(xhci, cur_td,
710 -ESHUTDOWN, "killed");
711 }
712 while (!list_empty(&temp_ep->cancelled_td_list)) {
713 cur_td = list_first_entry(
714 &temp_ep->cancelled_td_list,
715 struct xhci_td,
716 cancelled_td_list);
717 list_del(&cur_td->cancelled_td_list);
718 xhci_giveback_urb_in_irq(xhci, cur_td,
719 -ESHUTDOWN, "killed");
720 }
721 }
722 }
723 spin_unlock(&xhci->lock);
724 xhci_to_hcd(xhci)->state = HC_STATE_HALT;
725 xhci_dbg(xhci, "Calling usb_hc_died()\n");
726 usb_hc_died(xhci_to_hcd(xhci));
727 xhci_dbg(xhci, "xHCI host controller is dead.\n");
728}
729
588/* 730/*
589 * When we get a completion for a Set Transfer Ring Dequeue Pointer command, 731 * When we get a completion for a Set Transfer Ring Dequeue Pointer command,
590 * we need to clear the set deq pending flag in the endpoint ring state, so that 732 * we need to clear the set deq pending flag in the endpoint ring state, so that
@@ -1333,6 +1475,14 @@ void xhci_handle_event(struct xhci_hcd *xhci)
1333 default: 1475 default:
1334 xhci->error_bitmask |= 1 << 3; 1476 xhci->error_bitmask |= 1 << 3;
1335 } 1477 }
1478 /* Any of the above functions may drop and re-acquire the lock, so check
1479 * to make sure a watchdog timer didn't mark the host as non-responsive.
1480 */
1481 if (xhci->xhc_state & XHCI_STATE_DYING) {
1482 xhci_dbg(xhci, "xHCI host dying, returning from "
1483 "event handler.\n");
1484 return;
1485 }
1336 1486
1337 if (update_ptrs) { 1487 if (update_ptrs) {
1338 /* Update SW and HC event ring dequeue pointer */ 1488 /* Update SW and HC event ring dequeue pointer */