aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOscar Mateo <oscar.mateo@intel.com>2014-11-13 05:28:10 -0500
committerDaniel Vetter <daniel.vetter@ffwll.ch>2014-11-19 13:32:58 -0500
commitdcb4c12a687710ab745c2cdee8298c3e97f6f707 (patch)
tree6423d5ef1b946f6e1a27c21bb44be7f44a6e38f3
parentc86ee3a9f8cddcf2e637da19d6e7c05bdea11a96 (diff)
drm/i915/bdw: Pin the context backing objects to GGTT on-demand
Up until now, we have pinned every logical ring context backing object during creation, and left it pinned until destruction. This made my life easier, but it's a harmful thing to do, because we cause fragmentation of the GGTT (and, eventually, we would run out of space). This patch makes the pinning on-demand: the backing objects of the two contexts that are written to the ELSP are pinned right before submission and unpinned once the hardware is done with them. The only context that is still pinned regardless is the global default one, so that the HWS can still be accessed in the same way (ring->status_page). v2: In the early version of this patch, we were pinning the context as we put it into the ELSP: on the one hand, this is very efficient because only a maximum two contexts are pinned at any given time, but on the other hand, we cannot really pin in interrupt time :( v3: Use a mutex rather than atomic_t to protect pin count to avoid races. Do not unpin default context in free_request. v4: Break out pin and unpin into functions. Fix style problems reported by checkpatch v5: Remove unpin_lock as all pinning and unpinning is done with the struct mutex already locked. Add WARN_ONs to make sure this is the case in future. Issue: VIZ-4277 Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> Reviewed-by: Akash Goel <akash.goels@gmail.com> Reviewed-by: Deepak S<deepak.s@linux.intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_debugfs.c12
-rw-r--r--drivers/gpu/drm/i915/i915_drv.h1
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c39
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c69
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.h4
5 files changed, 98 insertions, 27 deletions
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 319da61354b0..bb1458381dc4 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1799,10 +1799,16 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
1799 continue; 1799 continue;
1800 1800
1801 if (ctx_obj) { 1801 if (ctx_obj) {
1802 struct page *page = i915_gem_object_get_page(ctx_obj, 1); 1802 struct page *page;
1803 uint32_t *reg_state = kmap_atomic(page); 1803 uint32_t *reg_state;
1804 int j; 1804 int j;
1805 1805
1806 i915_gem_obj_ggtt_pin(ctx_obj,
1807 GEN8_LR_CONTEXT_ALIGN, 0);
1808
1809 page = i915_gem_object_get_page(ctx_obj, 1);
1810 reg_state = kmap_atomic(page);
1811
1806 seq_printf(m, "CONTEXT: %s %u\n", ring->name, 1812 seq_printf(m, "CONTEXT: %s %u\n", ring->name,
1807 intel_execlists_ctx_id(ctx_obj)); 1813 intel_execlists_ctx_id(ctx_obj));
1808 1814
@@ -1814,6 +1820,8 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
1814 } 1820 }
1815 kunmap_atomic(reg_state); 1821 kunmap_atomic(reg_state);
1816 1822
1823 i915_gem_object_ggtt_unpin(ctx_obj);
1824
1817 seq_putc(m, '\n'); 1825 seq_putc(m, '\n');
1818 } 1826 }
1819 } 1827 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4f08160d6192..1097d20a4f35 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -685,6 +685,7 @@ struct intel_context {
685 struct { 685 struct {
686 struct drm_i915_gem_object *state; 686 struct drm_i915_gem_object *state;
687 struct intel_ringbuffer *ringbuf; 687 struct intel_ringbuffer *ringbuf;
688 int unpin_count;
688 } engine[I915_NUM_RINGS]; 689 } engine[I915_NUM_RINGS];
689 690
690 struct list_head link; 691 struct list_head link;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2e85ef1650bf..7f95d8ff28af 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2576,12 +2576,18 @@ static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2576 2576
2577static void i915_gem_free_request(struct drm_i915_gem_request *request) 2577static void i915_gem_free_request(struct drm_i915_gem_request *request)
2578{ 2578{
2579 struct intel_context *ctx = request->ctx;
2580
2579 list_del(&request->list); 2581 list_del(&request->list);
2580 i915_gem_request_remove_from_client(request); 2582 i915_gem_request_remove_from_client(request);
2581 2583
2582 if (request->ctx) 2584 if (i915.enable_execlists && ctx) {
2583 i915_gem_context_unreference(request->ctx); 2585 struct intel_engine_cs *ring = request->ring;
2584 2586
2587 if (ctx != ring->default_context)
2588 intel_lr_context_unpin(ring, ctx);
2589 i915_gem_context_unreference(ctx);
2590 }
2585 kfree(request); 2591 kfree(request);
2586} 2592}
2587 2593
@@ -2636,6 +2642,23 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2636 } 2642 }
2637 2643
2638 /* 2644 /*
2645 * Clear the execlists queue up before freeing the requests, as those
2646 * are the ones that keep the context and ringbuffer backing objects
2647 * pinned in place.
2648 */
2649 while (!list_empty(&ring->execlist_queue)) {
2650 struct intel_ctx_submit_request *submit_req;
2651
2652 submit_req = list_first_entry(&ring->execlist_queue,
2653 struct intel_ctx_submit_request,
2654 execlist_link);
2655 list_del(&submit_req->execlist_link);
2656 intel_runtime_pm_put(dev_priv);
2657 i915_gem_context_unreference(submit_req->ctx);
2658 kfree(submit_req);
2659 }
2660
2661 /*
2639 * We must free the requests after all the corresponding objects have 2662 * We must free the requests after all the corresponding objects have
2640 * been moved off active lists. Which is the same order as the normal 2663 * been moved off active lists. Which is the same order as the normal
2641 * retire_requests function does. This is important if object hold 2664 * retire_requests function does. This is important if object hold
@@ -2652,18 +2675,6 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
2652 i915_gem_free_request(request); 2675 i915_gem_free_request(request);
2653 } 2676 }
2654 2677
2655 while (!list_empty(&ring->execlist_queue)) {
2656 struct intel_ctx_submit_request *submit_req;
2657
2658 submit_req = list_first_entry(&ring->execlist_queue,
2659 struct intel_ctx_submit_request,
2660 execlist_link);
2661 list_del(&submit_req->execlist_link);
2662 intel_runtime_pm_put(dev_priv);
2663 i915_gem_context_unreference(submit_req->ctx);
2664 kfree(submit_req);
2665 }
2666
2667 /* These may not have been flush before the reset, do so now */ 2678 /* These may not have been flush before the reset, do so now */
2668 kfree(ring->preallocated_lazy_request); 2679 kfree(ring->preallocated_lazy_request);
2669 ring->preallocated_lazy_request = NULL; 2680 ring->preallocated_lazy_request = NULL;
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c855051ba18d..9560e634c9b8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -140,8 +140,6 @@
140#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 140#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
141#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) 141#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
142 142
143#define GEN8_LR_CONTEXT_ALIGN 4096
144
145#define RING_EXECLIST_QFULL (1 << 0x2) 143#define RING_EXECLIST_QFULL (1 << 0x2)
146#define RING_EXECLIST1_VALID (1 << 0x3) 144#define RING_EXECLIST1_VALID (1 << 0x3)
147#define RING_EXECLIST0_VALID (1 << 0x4) 145#define RING_EXECLIST0_VALID (1 << 0x4)
@@ -814,9 +812,40 @@ void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
814 execlists_context_queue(ring, ctx, ringbuf->tail); 812 execlists_context_queue(ring, ctx, ringbuf->tail);
815} 813}
816 814
815static int intel_lr_context_pin(struct intel_engine_cs *ring,
816 struct intel_context *ctx)
817{
818 struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
819 int ret = 0;
820
821 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
822 if (ctx->engine[ring->id].unpin_count++ == 0) {
823 ret = i915_gem_obj_ggtt_pin(ctx_obj,
824 GEN8_LR_CONTEXT_ALIGN, 0);
825 if (ret)
826 ctx->engine[ring->id].unpin_count = 0;
827 }
828
829 return ret;
830}
831
832void intel_lr_context_unpin(struct intel_engine_cs *ring,
833 struct intel_context *ctx)
834{
835 struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
836
837 if (ctx_obj) {
838 WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
839 if (--ctx->engine[ring->id].unpin_count == 0)
840 i915_gem_object_ggtt_unpin(ctx_obj);
841 }
842}
843
817static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, 844static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
818 struct intel_context *ctx) 845 struct intel_context *ctx)
819{ 846{
847 int ret;
848
820 if (ring->outstanding_lazy_seqno) 849 if (ring->outstanding_lazy_seqno)
821 return 0; 850 return 0;
822 851
@@ -827,6 +856,14 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
827 if (request == NULL) 856 if (request == NULL)
828 return -ENOMEM; 857 return -ENOMEM;
829 858
859 if (ctx != ring->default_context) {
860 ret = intel_lr_context_pin(ring, ctx);
861 if (ret) {
862 kfree(request);
863 return ret;
864 }
865 }
866
830 /* Hold a reference to the context this request belongs to 867 /* Hold a reference to the context this request belongs to
831 * (we will need it when the time comes to emit/retire the 868 * (we will need it when the time comes to emit/retire the
832 * request). 869 * request).
@@ -1680,12 +1717,16 @@ void intel_lr_context_free(struct intel_context *ctx)
1680 1717
1681 for (i = 0; i < I915_NUM_RINGS; i++) { 1718 for (i = 0; i < I915_NUM_RINGS; i++) {
1682 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; 1719 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
1683 struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
1684 1720
1685 if (ctx_obj) { 1721 if (ctx_obj) {
1722 struct intel_ringbuffer *ringbuf =
1723 ctx->engine[i].ringbuf;
1724 struct intel_engine_cs *ring = ringbuf->ring;
1725
1686 intel_destroy_ringbuffer_obj(ringbuf); 1726 intel_destroy_ringbuffer_obj(ringbuf);
1687 kfree(ringbuf); 1727 kfree(ringbuf);
1688 i915_gem_object_ggtt_unpin(ctx_obj); 1728 if (ctx == ring->default_context)
1729 i915_gem_object_ggtt_unpin(ctx_obj);
1689 drm_gem_object_unreference(&ctx_obj->base); 1730 drm_gem_object_unreference(&ctx_obj->base);
1690 } 1731 }
1691 } 1732 }
@@ -1748,6 +1789,7 @@ static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
1748int intel_lr_context_deferred_create(struct intel_context *ctx, 1789int intel_lr_context_deferred_create(struct intel_context *ctx,
1749 struct intel_engine_cs *ring) 1790 struct intel_engine_cs *ring)
1750{ 1791{
1792 const bool is_global_default_ctx = (ctx == ring->default_context);
1751 struct drm_device *dev = ring->dev; 1793 struct drm_device *dev = ring->dev;
1752 struct drm_i915_gem_object *ctx_obj; 1794 struct drm_i915_gem_object *ctx_obj;
1753 uint32_t context_size; 1795 uint32_t context_size;
@@ -1767,18 +1809,22 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
1767 return ret; 1809 return ret;
1768 } 1810 }
1769 1811
1770 ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0); 1812 if (is_global_default_ctx) {
1771 if (ret) { 1813 ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
1772 DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret); 1814 if (ret) {
1773 drm_gem_object_unreference(&ctx_obj->base); 1815 DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
1774 return ret; 1816 ret);
1817 drm_gem_object_unreference(&ctx_obj->base);
1818 return ret;
1819 }
1775 } 1820 }
1776 1821
1777 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); 1822 ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
1778 if (!ringbuf) { 1823 if (!ringbuf) {
1779 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", 1824 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
1780 ring->name); 1825 ring->name);
1781 i915_gem_object_ggtt_unpin(ctx_obj); 1826 if (is_global_default_ctx)
1827 i915_gem_object_ggtt_unpin(ctx_obj);
1782 drm_gem_object_unreference(&ctx_obj->base); 1828 drm_gem_object_unreference(&ctx_obj->base);
1783 ret = -ENOMEM; 1829 ret = -ENOMEM;
1784 return ret; 1830 return ret;
@@ -1841,7 +1887,8 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
1841 1887
1842error: 1888error:
1843 kfree(ringbuf); 1889 kfree(ringbuf);
1844 i915_gem_object_ggtt_unpin(ctx_obj); 1890 if (is_global_default_ctx)
1891 i915_gem_object_ggtt_unpin(ctx_obj);
1845 drm_gem_object_unreference(&ctx_obj->base); 1892 drm_gem_object_unreference(&ctx_obj->base);
1846 return ret; 1893 return ret;
1847} 1894}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 84bbf1916324..14b216b9be7f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -24,6 +24,8 @@
24#ifndef _INTEL_LRC_H_ 24#ifndef _INTEL_LRC_H_
25#define _INTEL_LRC_H_ 25#define _INTEL_LRC_H_
26 26
27#define GEN8_LR_CONTEXT_ALIGN 4096
28
27/* Execlists regs */ 29/* Execlists regs */
28#define RING_ELSP(ring) ((ring)->mmio_base+0x230) 30#define RING_ELSP(ring) ((ring)->mmio_base+0x230)
29#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234) 31#define RING_EXECLIST_STATUS(ring) ((ring)->mmio_base+0x234)
@@ -67,6 +69,8 @@ int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
67void intel_lr_context_free(struct intel_context *ctx); 69void intel_lr_context_free(struct intel_context *ctx);
68int intel_lr_context_deferred_create(struct intel_context *ctx, 70int intel_lr_context_deferred_create(struct intel_context *ctx,
69 struct intel_engine_cs *ring); 71 struct intel_engine_cs *ring);
72void intel_lr_context_unpin(struct intel_engine_cs *ring,
73 struct intel_context *ctx);
70 74
71/* Execlists */ 75/* Execlists */
72int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); 76int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);