diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2018-08-30 12:10:42 -0400 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2018-08-30 13:26:48 -0400 |
commit | 70b73f9ac113983f9c7db9887447f1344ac5b69b (patch) | |
tree | 133323ecd9f200a55403d8d562e2039ea6bd81b1 /drivers/gpu/drm/i915/intel_ringbuffer.c | |
parent | 096055487115883dc82fdebb5d16444585e4fc24 (diff) |
drm/i915/ringbuffer: Delay after invalidating gen6+ xcs
During stress testing of full-ppgtt (on Baytrail at least), we found
that the invalidation around a context/mm switch was insufficient (writes
would go astray). Adding a second MI_FLUSH_DW barrier prevents this, but
it is unclear as to whether this is merely a delaying tactic or if it is
truly serialising with the TLB invalidation. Either way, it is
empirically required.
v2: Avoid the loop for readability;
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107715
References: https://bugs.freedesktop.org/show_bug.cgi?id=107759
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180830161042.29193-1-chris@chris-wilson.co.uk
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
-rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 69 |
1 files changed, 34 insertions, 35 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d40f55a8dc34..44432677160c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c | |||
@@ -1944,7 +1944,7 @@ static void gen6_bsd_submit_request(struct i915_request *request) | |||
1944 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); | 1944 | intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); |
1945 | } | 1945 | } |
1946 | 1946 | ||
1947 | static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) | 1947 | static int emit_mi_flush_dw(struct i915_request *rq, u32 flags) |
1948 | { | 1948 | { |
1949 | u32 cmd, *cs; | 1949 | u32 cmd, *cs; |
1950 | 1950 | ||
@@ -1954,7 +1954,8 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) | |||
1954 | 1954 | ||
1955 | cmd = MI_FLUSH_DW; | 1955 | cmd = MI_FLUSH_DW; |
1956 | 1956 | ||
1957 | /* We always require a command barrier so that subsequent | 1957 | /* |
1958 | * We always require a command barrier so that subsequent | ||
1958 | * commands, such as breadcrumb interrupts, are strictly ordered | 1959 | * commands, such as breadcrumb interrupts, are strictly ordered |
1959 | * wrt the contents of the write cache being flushed to memory | 1960 | * wrt the contents of the write cache being flushed to memory |
1960 | * (and thus being coherent from the CPU). | 1961 | * (and thus being coherent from the CPU). |
@@ -1962,22 +1963,49 @@ static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) | |||
1962 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | 1963 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; |
1963 | 1964 | ||
1964 | /* | 1965 | /* |
1965 | * Bspec vol 1c.5 - video engine command streamer: | 1966 | * Bspec vol 1c.3 - blitter engine command streamer: |
1966 | * "If ENABLED, all TLBs will be invalidated once the flush | 1967 | * "If ENABLED, all TLBs will be invalidated once the flush |
1967 | * operation is complete. This bit is only valid when the | 1968 | * operation is complete. This bit is only valid when the |
1968 | * Post-Sync Operation field is a value of 1h or 3h." | 1969 | * Post-Sync Operation field is a value of 1h or 3h." |
1969 | */ | 1970 | */ |
1970 | if (mode & EMIT_INVALIDATE) | 1971 | cmd |= flags; |
1971 | cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; | ||
1972 | 1972 | ||
1973 | *cs++ = cmd; | 1973 | *cs++ = cmd; |
1974 | *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; | 1974 | *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; |
1975 | *cs++ = 0; | 1975 | *cs++ = 0; |
1976 | *cs++ = MI_NOOP; | 1976 | *cs++ = MI_NOOP; |
1977 | |||
1977 | intel_ring_advance(rq, cs); | 1978 | intel_ring_advance(rq, cs); |
1979 | |||
1978 | return 0; | 1980 | return 0; |
1979 | } | 1981 | } |
1980 | 1982 | ||
1983 | static int gen6_flush_dw(struct i915_request *rq, u32 mode, u32 invflags) | ||
1984 | { | ||
1985 | int err; | ||
1986 | |||
1987 | /* | ||
1988 | * Not only do we need a full barrier (post-sync write) after | ||
1989 | * invalidating the TLBs, but we need to wait a little bit | ||
1990 | * longer. Whether this is merely delaying us, or the | ||
1991 | * subsequent flush is a key part of serialising with the | ||
1992 | * post-sync op, this extra pass appears vital before a | ||
1993 | * mm switch! | ||
1994 | */ | ||
1995 | if (mode & EMIT_INVALIDATE) { | ||
1996 | err = emit_mi_flush_dw(rq, invflags); | ||
1997 | if (err) | ||
1998 | return err; | ||
1999 | } | ||
2000 | |||
2001 | return emit_mi_flush_dw(rq, 0); | ||
2002 | } | ||
2003 | |||
2004 | static int gen6_bsd_ring_flush(struct i915_request *rq, u32 mode) | ||
2005 | { | ||
2006 | return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB | MI_INVALIDATE_BSD); | ||
2007 | } | ||
2008 | |||
1981 | static int | 2009 | static int |
1982 | hsw_emit_bb_start(struct i915_request *rq, | 2010 | hsw_emit_bb_start(struct i915_request *rq, |
1983 | u64 offset, u32 len, | 2011 | u64 offset, u32 len, |
@@ -2022,36 +2050,7 @@ gen6_emit_bb_start(struct i915_request *rq, | |||
2022 | 2050 | ||
2023 | static int gen6_ring_flush(struct i915_request *rq, u32 mode) | 2051 | static int gen6_ring_flush(struct i915_request *rq, u32 mode) |
2024 | { | 2052 | { |
2025 | u32 cmd, *cs; | 2053 | return gen6_flush_dw(rq, mode, MI_INVALIDATE_TLB); |
2026 | |||
2027 | cs = intel_ring_begin(rq, 4); | ||
2028 | if (IS_ERR(cs)) | ||
2029 | return PTR_ERR(cs); | ||
2030 | |||
2031 | cmd = MI_FLUSH_DW; | ||
2032 | |||
2033 | /* We always require a command barrier so that subsequent | ||
2034 | * commands, such as breadcrumb interrupts, are strictly ordered | ||
2035 | * wrt the contents of the write cache being flushed to memory | ||
2036 | * (and thus being coherent from the CPU). | ||
2037 | */ | ||
2038 | cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; | ||
2039 | |||
2040 | /* | ||
2041 | * Bspec vol 1c.3 - blitter engine command streamer: | ||
2042 | * "If ENABLED, all TLBs will be invalidated once the flush | ||
2043 | * operation is complete. This bit is only valid when the | ||
2044 | * Post-Sync Operation field is a value of 1h or 3h." | ||
2045 | */ | ||
2046 | if (mode & EMIT_INVALIDATE) | ||
2047 | cmd |= MI_INVALIDATE_TLB; | ||
2048 | *cs++ = cmd; | ||
2049 | *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; | ||
2050 | *cs++ = 0; | ||
2051 | *cs++ = MI_NOOP; | ||
2052 | intel_ring_advance(rq, cs); | ||
2053 | |||
2054 | return 0; | ||
2055 | } | 2054 | } |
2056 | 2055 | ||
2057 | static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, | 2056 | static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, |