aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMonk Liu <Monk.Liu@amd.com>2016-08-26 01:28:28 -0400
committerAlex Deucher <alexander.deucher@amd.com>2016-09-14 15:10:29 -0400
commit753ad49c9fdfc732972b0d03f2889f473ed35e59 (patch)
tree5b56188b3eb258fdcb167503979d657a0e9699be
parent1db422de72c31fcf81f31311468a747aad389fa5 (diff)
drm/amdgpu:implement CONTEXT_CONTROL (v5)
v1: for gfx8, use CONTEXT_CONTROL package to dynamically skip preamble CEIB and other load_xxx command in sequence. v2: support GFX7 as well. remove cntxcntl in compute ring funcs because CPC doesn't support this packet. v3: fix reduntant judgement in cntxcntl. v4: some cleanups, don't change cs_submit() v5: keep old MESA supported & bump up KMS version. Signed-off-by: Monk Liu <Monk.Liu@amd.com> Ack-by: Chunming Zhou <David1.Zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c30
6 files changed, 79 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 10ec29c50077..717c3b4e1d54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -320,6 +320,7 @@ struct amdgpu_ring_funcs {
320 void (*begin_use)(struct amdgpu_ring *ring); 320 void (*begin_use)(struct amdgpu_ring *ring);
321 void (*end_use)(struct amdgpu_ring *ring); 321 void (*end_use)(struct amdgpu_ring *ring);
322 void (*emit_switch_buffer) (struct amdgpu_ring *ring); 322 void (*emit_switch_buffer) (struct amdgpu_ring *ring);
323 void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
323}; 324};
324 325
325/* 326/*
@@ -966,6 +967,7 @@ struct amdgpu_ctx {
966 spinlock_t ring_lock; 967 spinlock_t ring_lock;
967 struct fence **fences; 968 struct fence **fences;
968 struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; 969 struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
970 bool preamble_presented;
969}; 971};
970 972
971struct amdgpu_ctx_mgr { 973struct amdgpu_ctx_mgr {
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
1231 struct amdgpu_bo_list_entry uf_entry; 1233 struct amdgpu_bo_list_entry uf_entry;
1232}; 1234};
1233 1235
1236#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
1237#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
1238#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
1239
1234struct amdgpu_job { 1240struct amdgpu_job {
1235 struct amd_sched_job base; 1241 struct amd_sched_job base;
1236 struct amdgpu_device *adev; 1242 struct amdgpu_device *adev;
@@ -1239,6 +1245,7 @@ struct amdgpu_job {
1239 struct amdgpu_sync sync; 1245 struct amdgpu_sync sync;
1240 struct amdgpu_ib *ibs; 1246 struct amdgpu_ib *ibs;
1241 struct fence *fence; /* the hw fence */ 1247 struct fence *fence; /* the hw fence */
1248 uint32_t preamble_status;
1242 uint32_t num_ibs; 1249 uint32_t num_ibs;
1243 void *owner; 1250 void *owner;
1244 uint64_t fence_ctx; /* the fence_context this job uses */ 1251 uint64_t fence_ctx; /* the fence_context this job uses */
@@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
2276#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) 2283#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
2277#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) 2284#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
2278#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) 2285#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
2286#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
2279#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) 2287#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
2280#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) 2288#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
2281#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) 2289#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 56bde6436a1f..61b7e25535bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
850 if (r) 850 if (r)
851 return r; 851 return r;
852 852
853 if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
854 parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
855 if (!parser->ctx->preamble_presented) {
856 parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
857 parser->ctx->preamble_presented = true;
858 }
859 }
860
853 if (parser->job->ring && parser->job->ring != ring) 861 if (parser->job->ring && parser->job->ring != ring)
854 return -EINVAL; 862 return -EINVAL;
855 863
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3bbc0faf48c8..ca3d87aac7fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -55,9 +55,10 @@
55 * - 3.3.0 - Add VM support for UVD on supported hardware. 55 * - 3.3.0 - Add VM support for UVD on supported hardware.
56 * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. 56 * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
57 * - 3.5.0 - Add support for new UVD_NO_OP register. 57 * - 3.5.0 - Add support for new UVD_NO_OP register.
58 * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
58 */ 59 */
59#define KMS_DRIVER_MAJOR 3 60#define KMS_DRIVER_MAJOR 3
60#define KMS_DRIVER_MINOR 5 61#define KMS_DRIVER_MINOR 6
61#define KMS_DRIVER_PATCHLEVEL 0 62#define KMS_DRIVER_PATCHLEVEL 0
62 63
63int amdgpu_vram_limit = 0; 64int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 04263f0fd1af..2aa741c2a64c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
125 unsigned patch_offset = ~0; 125 unsigned patch_offset = ~0;
126 struct amdgpu_vm *vm; 126 struct amdgpu_vm *vm;
127 uint64_t fence_ctx; 127 uint64_t fence_ctx;
128 uint32_t status = 0;
128 129
129 unsigned i; 130 unsigned i;
130 int r = 0; 131 int r = 0;
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
176 177
177 skip_preamble = ring->current_ctx == fence_ctx; 178 skip_preamble = ring->current_ctx == fence_ctx;
178 need_ctx_switch = ring->current_ctx != fence_ctx; 179 need_ctx_switch = ring->current_ctx != fence_ctx;
180 if (job && ring->funcs->emit_cntxcntl) {
181 if (need_ctx_switch)
182 status |= AMDGPU_HAVE_CTX_SWITCH;
183 status |= job->preamble_status;
184 amdgpu_ring_emit_cntxcntl(ring, status);
185 }
186
179 for (i = 0; i < num_ibs; ++i) { 187 for (i = 0; i < num_ibs; ++i) {
180 ib = &ibs[i]; 188 ib = &ibs[i];
181 189
182 /* drop preamble IBs if we don't have a context switch */ 190 /* drop preamble IBs if we don't have a context switch */
183 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) 191 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
192 skip_preamble &&
193 !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
184 continue; 194 continue;
185 195
186 amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, 196 amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a93a803b659e..8c780f6c1276 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
2096 amdgpu_ring_write(ring, control); 2096 amdgpu_ring_write(ring, control);
2097} 2097}
2098 2098
2099static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
2100{
2101 uint32_t dw2 = 0;
2102
2103 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
2104 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
2105 /* set load_global_config & load_global_uconfig */
2106 dw2 |= 0x8001;
2107 /* set load_cs_sh_regs */
2108 dw2 |= 0x01000000;
2109 /* set load_per_context_state & load_gfx_sh_regs */
2110 dw2 |= 0x10002;
2111 }
2112
2113 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2114 amdgpu_ring_write(ring, dw2);
2115 amdgpu_ring_write(ring, 0);
2116}
2117
2099/** 2118/**
2100 * gfx_v7_0_ring_test_ib - basic ring IB test 2119 * gfx_v7_0_ring_test_ib - basic ring IB test
2101 * 2120 *
@@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
4938 .test_ib = gfx_v7_0_ring_test_ib, 4957 .test_ib = gfx_v7_0_ring_test_ib,
4939 .insert_nop = amdgpu_ring_insert_nop, 4958 .insert_nop = amdgpu_ring_insert_nop,
4940 .pad_ib = amdgpu_ring_generic_pad_ib, 4959 .pad_ib = amdgpu_ring_generic_pad_ib,
4960 .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
4941}; 4961};
4942 4962
4943static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { 4963static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 44915056297b..dca8b368728c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6076,6 +6076,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6076 amdgpu_ring_write(ring, 0); 6076 amdgpu_ring_write(ring, 0);
6077} 6077}
6078 6078
6079static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6080{
6081 uint32_t dw2 = 0;
6082
6083 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6084 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6085 /* set load_global_config & load_global_uconfig */
6086 dw2 |= 0x8001;
6087 /* set load_cs_sh_regs */
6088 dw2 |= 0x01000000;
6089 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6090 dw2 |= 0x10002;
6091
6092 /* set load_ce_ram if preamble presented */
6093 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6094 dw2 |= 0x10000000;
6095 } else {
6096 /* still load_ce_ram if this is the first time preamble presented
6097 * although there is no context switch happens.
6098 */
6099 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6100 dw2 |= 0x10000000;
6101 }
6102
6103 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6104 amdgpu_ring_write(ring, dw2);
6105 amdgpu_ring_write(ring, 0);
6106}
6107
6079static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6108static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6080 enum amdgpu_interrupt_state state) 6109 enum amdgpu_interrupt_state state)
6081{ 6110{
@@ -6258,6 +6287,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6258 .insert_nop = amdgpu_ring_insert_nop, 6287 .insert_nop = amdgpu_ring_insert_nop,
6259 .pad_ib = amdgpu_ring_generic_pad_ib, 6288 .pad_ib = amdgpu_ring_generic_pad_ib,
6260 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6289 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6290 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6261}; 6291};
6262 6292
6263static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6293static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {