drm/i915: Defer context state allocation for legacy ring submission

Almost from the outset for execlists, we used deferred allocation of the logical context and rings. Then we ported the infrastructure for pinning contexts back to legacy, and so now we are able to also implement deferred allocation for context objects prior to first use on the legacy submission. v2: We still need to differentiate between legacy engines, Joonas is fixing that but I want this first ;) (Joonas) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170427104651.22394-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
author: Chris Wilson <chris@chris-wilson.co.uk> 2017-04-27 06:46:51 -0400
committer: Chris Wilson <chris@chris-wilson.co.uk> 2017-04-27 07:22:13 -0400
commit: 3204c343bb691c42a5e568cbd2a9ec9b2b5703c0 (patch)
tree: a1e3c562ab38e3b6a6a3b1bebfde791d0f20c9be /drivers/gpu/drm/i915/intel_ringbuffer.c
parent: e3895af8bb12612972efb1a07f0bb02b2853afda (diff)
1 files changed, 50 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6836efb7e3d2..61f612454ce7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
                            PIN_GLOBAL | PIN_HIGH);
 }
+static struct i915_vma *
+alloc_context_vma(struct intel_engine_cs *engine)
+{
+        struct drm_i915_private *i915 = engine->i915;
+        struct drm_i915_gem_object *obj;
+        struct i915_vma *vma;
+        obj = i915_gem_object_create(i915, i915->hw_context_size);
+        if (IS_ERR(obj))
+                return ERR_CAST(obj);
+        /*
+         * Try to make the context utilize L3 as well as LLC.
+         *
+         * On VLV we don't have L3 controls in the PTEs so we
+         * shouldn't touch the cache level, especially as that
+         * would make the object snooped which might have a
+         * negative performance impact.
+         *
+         * Snooping is required on non-llc platforms in execlist
+         * mode, but since all GGTT accesses use PAT entry 0 we
+         * get snooping anyway regardless of cache_level.
+         *
+         * This is only applicable for Ivy Bridge devices since
+         * later platforms don't have L3 control bits in the PTE.
+         */
+        if (IS_IVYBRIDGE(i915)) {
+                /* Ignore any error, regard it as a simple optimisation */
+                i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
+        }
+        vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
+        if (IS_ERR(vma))
+                i915_gem_object_put(obj);
+        return vma;
+}
 static int intel_ring_context_pin(struct intel_engine_cs *engine,
                                  struct i915_gem_context *ctx)
 {
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
                return 0;
        GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
+        if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
+                struct i915_vma *vma;
+                vma = alloc_context_vma(engine);
+                if (IS_ERR(vma)) {
+                        ret = PTR_ERR(vma);
+                        goto error;
+                }
+                ce->state = vma;
+        }
        if (ce->state) {
                ret = context_pin(ctx);
                if (ret)
author	Chris Wilson <chris@chris-wilson.co.uk>	2017-04-27 06:46:51 -0400
committer	Chris Wilson <chris@chris-wilson.co.uk>	2017-04-27 07:22:13 -0400
commit	3204c343bb691c42a5e568cbd2a9ec9b2b5703c0 (patch)
tree	a1e3c562ab38e3b6a6a3b1bebfde791d0f20c9be /drivers/gpu/drm/i915/intel_ringbuffer.c
parent	e3895af8bb12612972efb1a07f0bb02b2853afda (diff)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 6836efb7e3d2..61f612454ce7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
1437	PIN_GLOBAL \| PIN_HIGH);	1437	PIN_GLOBAL \| PIN_HIGH);
1438	}	1438	}
1439		1439
		1440	static struct i915_vma *
		1441	alloc_context_vma(struct intel_engine_cs *engine)
		1442	{
		1443	struct drm_i915_private *i915 = engine->i915;
		1444	struct drm_i915_gem_object *obj;
		1445	struct i915_vma *vma;
		1446
		1447	obj = i915_gem_object_create(i915, i915->hw_context_size);
		1448	if (IS_ERR(obj))
		1449	return ERR_CAST(obj);
		1450
		1451	/*
		1452	* Try to make the context utilize L3 as well as LLC.
		1453	*
		1454	* On VLV we don't have L3 controls in the PTEs so we
		1455	* shouldn't touch the cache level, especially as that
		1456	* would make the object snooped which might have a
		1457	* negative performance impact.
		1458	*
		1459	* Snooping is required on non-llc platforms in execlist
		1460	* mode, but since all GGTT accesses use PAT entry 0 we
		1461	* get snooping anyway regardless of cache_level.
		1462	*
		1463	* This is only applicable for Ivy Bridge devices since
		1464	* later platforms don't have L3 control bits in the PTE.
		1465	*/
		1466	if (IS_IVYBRIDGE(i915)) {
		1467	/* Ignore any error, regard it as a simple optimisation */
		1468	i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
		1469	}
		1470
		1471	vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
		1472	if (IS_ERR(vma))
		1473	i915_gem_object_put(obj);
		1474
		1475	return vma;
		1476	}
		1477
1440	static int intel_ring_context_pin(struct intel_engine_cs *engine,	1478	static int intel_ring_context_pin(struct intel_engine_cs *engine,
1441	struct i915_gem_context *ctx)	1479	struct i915_gem_context *ctx)
1442	{	1480	{
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
1449	return 0;	1487	return 0;
1450	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */	1488	GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
1451		1489
		1490	if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
		1491	struct i915_vma *vma;
		1492
		1493	vma = alloc_context_vma(engine);
		1494	if (IS_ERR(vma)) {
		1495	ret = PTR_ERR(vma);
		1496	goto error;
		1497	}
		1498
		1499	ce->state = vma;
		1500	}
		1501
1452	if (ce->state) {	1502	if (ce->state) {
1453	ret = context_pin(ctx);	1503	ret = context_pin(ctx);
1454	if (ret)	1504	if (ret)