aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2017-04-27 06:46:51 -0400
committerChris Wilson <chris@chris-wilson.co.uk>2017-04-27 07:22:13 -0400
commit3204c343bb691c42a5e568cbd2a9ec9b2b5703c0 (patch)
treea1e3c562ab38e3b6a6a3b1bebfde791d0f20c9be
parente3895af8bb12612972efb1a07f0bb02b2853afda (diff)
drm/i915: Defer context state allocation for legacy ring submission
Almost from the outset for execlists, we used deferred allocation of the logical context and rings. Then we ported the infrastructure for pinning contexts back to legacy, and so now we are able to also implement deferred allocation for context objects prior to first use on the legacy submission. v2: We still need to differentiate between legacy engines, Joonas is fixing that but I want this first ;) (Joonas) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20170427104651.22394-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-rw-r--r--drivers/gpu/drm/i915/i915_gem_context.c59
-rw-r--r--drivers/gpu/drm/i915/intel_ringbuffer.c50
2 files changed, 50 insertions, 59 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8bd0c4966913..d46a69d3d390 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -151,45 +151,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
151 kfree(ctx); 151 kfree(ctx);
152} 152}
153 153
154static struct drm_i915_gem_object *
155alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
156{
157 struct drm_i915_gem_object *obj;
158 int ret;
159
160 lockdep_assert_held(&dev_priv->drm.struct_mutex);
161
162 obj = i915_gem_object_create(dev_priv, size);
163 if (IS_ERR(obj))
164 return obj;
165
166 /*
167 * Try to make the context utilize L3 as well as LLC.
168 *
169 * On VLV we don't have L3 controls in the PTEs so we
170 * shouldn't touch the cache level, especially as that
171 * would make the object snooped which might have a
172 * negative performance impact.
173 *
174 * Snooping is required on non-llc platforms in execlist
175 * mode, but since all GGTT accesses use PAT entry 0 we
176 * get snooping anyway regardless of cache_level.
177 *
178 * This is only applicable for Ivy Bridge devices since
179 * later platforms don't have L3 control bits in the PTE.
180 */
181 if (IS_IVYBRIDGE(dev_priv)) {
182 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
183 /* Failure shouldn't ever happen this early */
184 if (WARN_ON(ret)) {
185 i915_gem_object_put(obj);
186 return ERR_PTR(ret);
187 }
188 }
189
190 return obj;
191}
192
193static void context_close(struct i915_gem_context *ctx) 154static void context_close(struct i915_gem_context *ctx)
194{ 155{
195 i915_gem_context_set_closed(ctx); 156 i915_gem_context_set_closed(ctx);
@@ -266,26 +227,6 @@ __create_hw_context(struct drm_i915_private *dev_priv,
266 list_add_tail(&ctx->link, &dev_priv->context_list); 227 list_add_tail(&ctx->link, &dev_priv->context_list);
267 ctx->i915 = dev_priv; 228 ctx->i915 = dev_priv;
268 229
269 if (dev_priv->hw_context_size) {
270 struct drm_i915_gem_object *obj;
271 struct i915_vma *vma;
272
273 obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
274 if (IS_ERR(obj)) {
275 ret = PTR_ERR(obj);
276 goto err_out;
277 }
278
279 vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
280 if (IS_ERR(vma)) {
281 i915_gem_object_put(obj);
282 ret = PTR_ERR(vma);
283 goto err_out;
284 }
285
286 ctx->engine[RCS].state = vma;
287 }
288
289 /* Default context will never have a file_priv */ 230 /* Default context will never have a file_priv */
290 ret = DEFAULT_CONTEXT_HANDLE; 231 ret = DEFAULT_CONTEXT_HANDLE;
291 if (file_priv) { 232 if (file_priv) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 6836efb7e3d2..61f612454ce7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1437,6 +1437,44 @@ static int context_pin(struct i915_gem_context *ctx)
1437 PIN_GLOBAL | PIN_HIGH); 1437 PIN_GLOBAL | PIN_HIGH);
1438} 1438}
1439 1439
1440static struct i915_vma *
1441alloc_context_vma(struct intel_engine_cs *engine)
1442{
1443 struct drm_i915_private *i915 = engine->i915;
1444 struct drm_i915_gem_object *obj;
1445 struct i915_vma *vma;
1446
1447 obj = i915_gem_object_create(i915, i915->hw_context_size);
1448 if (IS_ERR(obj))
1449 return ERR_CAST(obj);
1450
1451 /*
1452 * Try to make the context utilize L3 as well as LLC.
1453 *
1454 * On VLV we don't have L3 controls in the PTEs so we
1455 * shouldn't touch the cache level, especially as that
1456 * would make the object snooped which might have a
1457 * negative performance impact.
1458 *
1459 * Snooping is required on non-llc platforms in execlist
1460 * mode, but since all GGTT accesses use PAT entry 0 we
1461 * get snooping anyway regardless of cache_level.
1462 *
1463 * This is only applicable for Ivy Bridge devices since
1464 * later platforms don't have L3 control bits in the PTE.
1465 */
1466 if (IS_IVYBRIDGE(i915)) {
1467 /* Ignore any error, regard it as a simple optimisation */
1468 i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
1469 }
1470
1471 vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
1472 if (IS_ERR(vma))
1473 i915_gem_object_put(obj);
1474
1475 return vma;
1476}
1477
1440static int intel_ring_context_pin(struct intel_engine_cs *engine, 1478static int intel_ring_context_pin(struct intel_engine_cs *engine,
1441 struct i915_gem_context *ctx) 1479 struct i915_gem_context *ctx)
1442{ 1480{
@@ -1449,6 +1487,18 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
1449 return 0; 1487 return 0;
1450 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */ 1488 GEM_BUG_ON(!ce->pin_count); /* no overflow please! */
1451 1489
1490 if (engine->id == RCS && !ce->state && engine->i915->hw_context_size) {
1491 struct i915_vma *vma;
1492
1493 vma = alloc_context_vma(engine);
1494 if (IS_ERR(vma)) {
1495 ret = PTR_ERR(vma);
1496 goto error;
1497 }
1498
1499 ce->state = vma;
1500 }
1501
1452 if (ce->state) { 1502 if (ce->state) {
1453 ret = context_pin(ctx); 1503 ret = context_pin(ctx);
1454 if (ret) 1504 if (ret)