aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2018-03-02 09:32:45 -0500
committerChris Wilson <chris@chris-wilson.co.uk>2018-03-02 18:11:12 -0500
commita3e3883646c05828f96369f7809436f1b3247234 (patch)
treec2d2431dcea1baed12750012fb07caa1ec95976d
parentaebbc2d7b3887202c9b02e69e2a470901d0eda71 (diff)
drm/i915/execlists: Split spinlock from its irq disabling side-effect
During reset/wedging, we have to clean up the requests on the timeline and flush the pending interrupt state. Currently, we are abusing the irq disabling of the timeline spinlock to protect the irq state in conjunction to the engine's timeline requests, but this is accidental and conflates the spinlock with the irq state. A baffling state of affairs for the reader. Instead, explicitly disable irqs over the critical section, and separate modifying the irq state from the timeline's requests. Suggested-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Cc: Michel Thierry <michel.thierry@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180302143246.2579-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c35
1 files changed, 29 insertions, 6 deletions
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 0482e54c94f0..36b376e4b105 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -689,11 +689,27 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
689 689
690 GEM_TRACE("%s\n", engine->name); 690 GEM_TRACE("%s\n", engine->name);
691 691
692 spin_lock_irqsave(&engine->timeline->lock, flags); 692 /*
693 * Before we call engine->cancel_requests(), we should have exclusive
694 * access to the submission state. This is arranged for us by the
695 * caller disabling the interrupt generation, the tasklet and other
696 * threads that may then access the same state, giving us a free hand
697 * to reset state. However, we still need to let lockdep be aware that
698 * we know this state may be accessed in hardirq context, so we
699 * disable the irq around this manipulation and we want to keep
700 * the spinlock focused on its duties and not accidentally conflate
701 * coverage to the submission's irq state. (Similarly, although we
702 * shouldn't need to disable irq around the manipulation of the
703 * submission's irq state, we also wish to remind ourselves that
704 * it is irq state.)
705 */
706 local_irq_save(flags);
693 707
694 /* Cancel the requests on the HW and clear the ELSP tracker. */ 708 /* Cancel the requests on the HW and clear the ELSP tracker. */
695 execlists_cancel_port_requests(execlists); 709 execlists_cancel_port_requests(execlists);
696 710
711 spin_lock(&engine->timeline->lock);
712
697 /* Mark all executing requests as skipped. */ 713 /* Mark all executing requests as skipped. */
698 list_for_each_entry(rq, &engine->timeline->requests, link) { 714 list_for_each_entry(rq, &engine->timeline->requests, link) {
699 GEM_BUG_ON(!rq->global_seqno); 715 GEM_BUG_ON(!rq->global_seqno);
@@ -727,6 +743,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
727 execlists->first = NULL; 743 execlists->first = NULL;
728 GEM_BUG_ON(port_isset(execlists->port)); 744 GEM_BUG_ON(port_isset(execlists->port));
729 745
746 spin_unlock(&engine->timeline->lock);
747
730 /* 748 /*
731 * The port is checked prior to scheduling a tasklet, but 749 * The port is checked prior to scheduling a tasklet, but
732 * just in case we have suspended the tasklet to do the 750 * just in case we have suspended the tasklet to do the
@@ -738,7 +756,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
738 /* Mark all CS interrupts as complete */ 756 /* Mark all CS interrupts as complete */
739 execlists->active = 0; 757 execlists->active = 0;
740 758
741 spin_unlock_irqrestore(&engine->timeline->lock, flags); 759 local_irq_restore(flags);
742} 760}
743 761
744/* 762/*
@@ -1618,7 +1636,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
1618 GEM_TRACE("%s seqno=%x\n", 1636 GEM_TRACE("%s seqno=%x\n",
1619 engine->name, request ? request->global_seqno : 0); 1637 engine->name, request ? request->global_seqno : 0);
1620 1638
1621 spin_lock_irqsave(&engine->timeline->lock, flags); 1639 /* See execlists_cancel_requests() for the irq/spinlock split. */
1640 local_irq_save(flags);
1622 1641
1623 reset_irq(engine); 1642 reset_irq(engine);
1624 1643
@@ -1634,14 +1653,17 @@ static void reset_common_ring(struct intel_engine_cs *engine,
1634 execlists_cancel_port_requests(execlists); 1653 execlists_cancel_port_requests(execlists);
1635 1654
1636 /* Push back any incomplete requests for replay after the reset. */ 1655 /* Push back any incomplete requests for replay after the reset. */
1656 spin_lock(&engine->timeline->lock);
1637 __unwind_incomplete_requests(engine); 1657 __unwind_incomplete_requests(engine);
1658 spin_unlock(&engine->timeline->lock);
1638 1659
1639 /* Mark all CS interrupts as complete */ 1660 /* Mark all CS interrupts as complete */
1640 execlists->active = 0; 1661 execlists->active = 0;
1641 1662
1642 spin_unlock_irqrestore(&engine->timeline->lock, flags); 1663 local_irq_restore(flags);
1643 1664
1644 /* If the request was innocent, we leave the request in the ELSP 1665 /*
1666 * If the request was innocent, we leave the request in the ELSP
1645 * and will try to replay it on restarting. The context image may 1667 * and will try to replay it on restarting. The context image may
1646 * have been corrupted by the reset, in which case we may have 1668 * have been corrupted by the reset, in which case we may have
1647 * to service a new GPU hang, but more likely we can continue on 1669 * to service a new GPU hang, but more likely we can continue on
@@ -1654,7 +1676,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
1654 if (!request || request->fence.error != -EIO) 1676 if (!request || request->fence.error != -EIO)
1655 return; 1677 return;
1656 1678
1657 /* We want a simple context + ring to execute the breadcrumb update. 1679 /*
1680 * We want a simple context + ring to execute the breadcrumb update.
1658 * We cannot rely on the context being intact across the GPU hang, 1681 * We cannot rely on the context being intact across the GPU hang,
1659 * so clear it and rebuild just what we need for the breadcrumb. 1682 * so clear it and rebuild just what we need for the breadcrumb.
1660 * All pending requests for this context will be zapped, and any 1683 * All pending requests for this context will be zapped, and any