diff options
author | Monk Liu <Monk.Liu@amd.com> | 2016-08-26 01:28:28 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2016-09-14 15:10:29 -0400 |
commit | 753ad49c9fdfc732972b0d03f2889f473ed35e59 (patch) | |
tree | 5b56188b3eb258fdcb167503979d657a0e9699be | |
parent | 1db422de72c31fcf81f31311468a747aad389fa5 (diff) |
drm/amdgpu:implement CONTEXT_CONTROL (v5)
v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.
v2:
support GFX7 as well.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.
v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 |
6 files changed, 79 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 10ec29c50077..717c3b4e1d54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -320,6 +320,7 @@ struct amdgpu_ring_funcs { | |||
320 | void (*begin_use)(struct amdgpu_ring *ring); | 320 | void (*begin_use)(struct amdgpu_ring *ring); |
321 | void (*end_use)(struct amdgpu_ring *ring); | 321 | void (*end_use)(struct amdgpu_ring *ring); |
322 | void (*emit_switch_buffer) (struct amdgpu_ring *ring); | 322 | void (*emit_switch_buffer) (struct amdgpu_ring *ring); |
323 | void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); | ||
323 | }; | 324 | }; |
324 | 325 | ||
325 | /* | 326 | /* |
@@ -966,6 +967,7 @@ struct amdgpu_ctx { | |||
966 | spinlock_t ring_lock; | 967 | spinlock_t ring_lock; |
967 | struct fence **fences; | 968 | struct fence **fences; |
968 | struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; | 969 | struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; |
970 | bool preamble_presented; | ||
969 | }; | 971 | }; |
970 | 972 | ||
971 | struct amdgpu_ctx_mgr { | 973 | struct amdgpu_ctx_mgr { |
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser { | |||
1231 | struct amdgpu_bo_list_entry uf_entry; | 1233 | struct amdgpu_bo_list_entry uf_entry; |
1232 | }; | 1234 | }; |
1233 | 1235 | ||
1236 | #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ | ||
1237 | #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ | ||
1238 | #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ | ||
1239 | |||
1234 | struct amdgpu_job { | 1240 | struct amdgpu_job { |
1235 | struct amd_sched_job base; | 1241 | struct amd_sched_job base; |
1236 | struct amdgpu_device *adev; | 1242 | struct amdgpu_device *adev; |
@@ -1239,6 +1245,7 @@ struct amdgpu_job { | |||
1239 | struct amdgpu_sync sync; | 1245 | struct amdgpu_sync sync; |
1240 | struct amdgpu_ib *ibs; | 1246 | struct amdgpu_ib *ibs; |
1241 | struct fence *fence; /* the hw fence */ | 1247 | struct fence *fence; /* the hw fence */ |
1248 | uint32_t preamble_status; | ||
1242 | uint32_t num_ibs; | 1249 | uint32_t num_ibs; |
1243 | void *owner; | 1250 | void *owner; |
1244 | uint64_t fence_ctx; /* the fence_context this job uses */ | 1251 | uint64_t fence_ctx; /* the fence_context this job uses */ |
@@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
2276 | #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) | 2283 | #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) |
2277 | #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) | 2284 | #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) |
2278 | #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) | 2285 | #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) |
2286 | #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) | ||
2279 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) | 2287 | #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) |
2280 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) | 2288 | #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) |
2281 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) | 2289 | #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 56bde6436a1f..61b7e25535bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | |||
@@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, | |||
850 | if (r) | 850 | if (r) |
851 | return r; | 851 | return r; |
852 | 852 | ||
853 | if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { | ||
854 | parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; | ||
855 | if (!parser->ctx->preamble_presented) { | ||
856 | parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; | ||
857 | parser->ctx->preamble_presented = true; | ||
858 | } | ||
859 | } | ||
860 | |||
853 | if (parser->job->ring && parser->job->ring != ring) | 861 | if (parser->job->ring && parser->job->ring != ring) |
854 | return -EINVAL; | 862 | return -EINVAL; |
855 | 863 | ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3bbc0faf48c8..ca3d87aac7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -55,9 +55,10 @@ | |||
55 | * - 3.3.0 - Add VM support for UVD on supported hardware. | 55 | * - 3.3.0 - Add VM support for UVD on supported hardware. |
56 | * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. | 56 | * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. |
57 | * - 3.5.0 - Add support for new UVD_NO_OP register. | 57 | * - 3.5.0 - Add support for new UVD_NO_OP register. |
58 | * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer. | ||
58 | */ | 59 | */ |
59 | #define KMS_DRIVER_MAJOR 3 | 60 | #define KMS_DRIVER_MAJOR 3 |
60 | #define KMS_DRIVER_MINOR 5 | 61 | #define KMS_DRIVER_MINOR 6 |
61 | #define KMS_DRIVER_PATCHLEVEL 0 | 62 | #define KMS_DRIVER_PATCHLEVEL 0 |
62 | 63 | ||
63 | int amdgpu_vram_limit = 0; | 64 | int amdgpu_vram_limit = 0; |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 04263f0fd1af..2aa741c2a64c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | |||
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
125 | unsigned patch_offset = ~0; | 125 | unsigned patch_offset = ~0; |
126 | struct amdgpu_vm *vm; | 126 | struct amdgpu_vm *vm; |
127 | uint64_t fence_ctx; | 127 | uint64_t fence_ctx; |
128 | uint32_t status = 0; | ||
128 | 129 | ||
129 | unsigned i; | 130 | unsigned i; |
130 | int r = 0; | 131 | int r = 0; |
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, | |||
176 | 177 | ||
177 | skip_preamble = ring->current_ctx == fence_ctx; | 178 | skip_preamble = ring->current_ctx == fence_ctx; |
178 | need_ctx_switch = ring->current_ctx != fence_ctx; | 179 | need_ctx_switch = ring->current_ctx != fence_ctx; |
180 | if (job && ring->funcs->emit_cntxcntl) { | ||
181 | if (need_ctx_switch) | ||
182 | status |= AMDGPU_HAVE_CTX_SWITCH; | ||
183 | status |= job->preamble_status; | ||
184 | amdgpu_ring_emit_cntxcntl(ring, status); | ||
185 | } | ||
186 | |||
179 | for (i = 0; i < num_ibs; ++i) { | 187 | for (i = 0; i < num_ibs; ++i) { |
180 | ib = &ibs[i]; | 188 | ib = &ibs[i]; |
181 | 189 | ||
182 | /* drop preamble IBs if we don't have a context switch */ | 190 | /* drop preamble IBs if we don't have a context switch */ |
183 | if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) | 191 | if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && |
192 | skip_preamble && | ||
193 | !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST)) | ||
184 | continue; | 194 | continue; |
185 | 195 | ||
186 | amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, | 196 | amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a93a803b659e..8c780f6c1276 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | |||
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, | |||
2096 | amdgpu_ring_write(ring, control); | 2096 | amdgpu_ring_write(ring, control); |
2097 | } | 2097 | } |
2098 | 2098 | ||
2099 | static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | ||
2100 | { | ||
2101 | uint32_t dw2 = 0; | ||
2102 | |||
2103 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ | ||
2104 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { | ||
2105 | /* set load_global_config & load_global_uconfig */ | ||
2106 | dw2 |= 0x8001; | ||
2107 | /* set load_cs_sh_regs */ | ||
2108 | dw2 |= 0x01000000; | ||
2109 | /* set load_per_context_state & load_gfx_sh_regs */ | ||
2110 | dw2 |= 0x10002; | ||
2111 | } | ||
2112 | |||
2113 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); | ||
2114 | amdgpu_ring_write(ring, dw2); | ||
2115 | amdgpu_ring_write(ring, 0); | ||
2116 | } | ||
2117 | |||
2099 | /** | 2118 | /** |
2100 | * gfx_v7_0_ring_test_ib - basic ring IB test | 2119 | * gfx_v7_0_ring_test_ib - basic ring IB test |
2101 | * | 2120 | * |
@@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { | |||
4938 | .test_ib = gfx_v7_0_ring_test_ib, | 4957 | .test_ib = gfx_v7_0_ring_test_ib, |
4939 | .insert_nop = amdgpu_ring_insert_nop, | 4958 | .insert_nop = amdgpu_ring_insert_nop, |
4940 | .pad_ib = amdgpu_ring_generic_pad_ib, | 4959 | .pad_ib = amdgpu_ring_generic_pad_ib, |
4960 | .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, | ||
4941 | }; | 4961 | }; |
4942 | 4962 | ||
4943 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { | 4963 | static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { |
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 44915056297b..dca8b368728c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |||
@@ -6076,6 +6076,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) | |||
6076 | amdgpu_ring_write(ring, 0); | 6076 | amdgpu_ring_write(ring, 0); |
6077 | } | 6077 | } |
6078 | 6078 | ||
6079 | static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) | ||
6080 | { | ||
6081 | uint32_t dw2 = 0; | ||
6082 | |||
6083 | dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ | ||
6084 | if (flags & AMDGPU_HAVE_CTX_SWITCH) { | ||
6085 | /* set load_global_config & load_global_uconfig */ | ||
6086 | dw2 |= 0x8001; | ||
6087 | /* set load_cs_sh_regs */ | ||
6088 | dw2 |= 0x01000000; | ||
6089 | /* set load_per_context_state & load_gfx_sh_regs for GFX */ | ||
6090 | dw2 |= 0x10002; | ||
6091 | |||
6092 | /* set load_ce_ram if preamble presented */ | ||
6093 | if (AMDGPU_PREAMBLE_IB_PRESENT & flags) | ||
6094 | dw2 |= 0x10000000; | ||
6095 | } else { | ||
6096 | /* still load_ce_ram if this is the first time preamble presented | ||
6097 | * although there is no context switch happens. | ||
6098 | */ | ||
6099 | if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) | ||
6100 | dw2 |= 0x10000000; | ||
6101 | } | ||
6102 | |||
6103 | amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); | ||
6104 | amdgpu_ring_write(ring, dw2); | ||
6105 | amdgpu_ring_write(ring, 0); | ||
6106 | } | ||
6107 | |||
6079 | static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, | 6108 | static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, |
6080 | enum amdgpu_interrupt_state state) | 6109 | enum amdgpu_interrupt_state state) |
6081 | { | 6110 | { |
@@ -6258,6 +6287,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { | |||
6258 | .insert_nop = amdgpu_ring_insert_nop, | 6287 | .insert_nop = amdgpu_ring_insert_nop, |
6259 | .pad_ib = amdgpu_ring_generic_pad_ib, | 6288 | .pad_ib = amdgpu_ring_generic_pad_ib, |
6260 | .emit_switch_buffer = gfx_v8_ring_emit_sb, | 6289 | .emit_switch_buffer = gfx_v8_ring_emit_sb, |
6290 | .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, | ||
6261 | }; | 6291 | }; |
6262 | 6292 | ||
6263 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { | 6293 | static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { |