diff options
author | Jay Cornwall <Jay.Cornwall@amd.com> | 2018-05-01 17:56:02 -0400 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2018-05-01 17:56:02 -0400 |
commit | 87e6d4e0777daf774ed9aa59ed25b6ebaaad7052 (patch) | |
tree | 1062fa04a237eef7021afd3401bea92ddb5015f3 | |
parent | 24f48a42038f5baaae49b181b64782ecfb703a9c (diff) |
drm/amdkfd: Reduce priority of context-saving waves before spin-wait
Synchronization between context-saving wavefronts is achieved by
sending a SAVEWAVE message to the SPI and then spin-waiting for a
response. These spin-waiting wavefronts may inhibit the progress
of other wavefronts in the context save handler, leading to the
synchronization condition never being achieved.
Before spin-waiting reduce the priority of each wavefront to
guarantee foward progress in the others.
Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | 8 |
2 files changed, 15 insertions, 3 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 997a383dcb8b..34eabcdd27a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm | |||
@@ -98,6 +98,7 @@ var SWIZZLE_EN = 0 //whether we use swi | |||
98 | /**************************************************************************/ | 98 | /**************************************************************************/ |
99 | var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 | 99 | var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 |
100 | var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 | 100 | var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 |
101 | var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 | ||
101 | var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 | 102 | var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 |
102 | 103 | ||
103 | var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 | 104 | var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 |
@@ -319,6 +320,10 @@ end | |||
319 | s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC | 320 | s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC |
320 | end | 321 | end |
321 | 322 | ||
323 | // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. | ||
324 | s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) | ||
325 | s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp | ||
326 | |||
322 | L_SLEEP: | 327 | L_SLEEP: |
323 | s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 | 328 | s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 |
324 | 329 | ||
@@ -1132,7 +1137,7 @@ end | |||
1132 | #endif | 1137 | #endif |
1133 | 1138 | ||
1134 | static const uint32_t cwsr_trap_gfx8_hex[] = { | 1139 | static const uint32_t cwsr_trap_gfx8_hex[] = { |
1135 | 0xbf820001, 0xbf820123, | 1140 | 0xbf820001, 0xbf820125, |
1136 | 0xb8f4f802, 0x89748674, | 1141 | 0xb8f4f802, 0x89748674, |
1137 | 0xb8f5f803, 0x8675ff75, | 1142 | 0xb8f5f803, 0x8675ff75, |
1138 | 0x00000400, 0xbf850011, | 1143 | 0x00000400, 0xbf850011, |
@@ -1158,7 +1163,8 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { | |||
1158 | 0x867aff7a, 0x00007fff, | 1163 | 0x867aff7a, 0x00007fff, |
1159 | 0xb97af807, 0xbef2007e, | 1164 | 0xb97af807, 0xbef2007e, |
1160 | 0xbef3007f, 0xbefe0180, | 1165 | 0xbef3007f, 0xbefe0180, |
1161 | 0xbf900004, 0xbf8e0002, | 1166 | 0xbf900004, 0x877a8474, |
1167 | 0xb97af802, 0xbf8e0002, | ||
1162 | 0xbf88fffe, 0xbef8007e, | 1168 | 0xbf88fffe, 0xbef8007e, |
1163 | 0x8679ff7f, 0x0000ffff, | 1169 | 0x8679ff7f, 0x0000ffff, |
1164 | 0x8779ff79, 0x00040000, | 1170 | 0x8779ff79, 0x00040000, |
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 033580c997ea..cac8d4992e04 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm | |||
@@ -97,6 +97,7 @@ var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing | |||
97 | /**************************************************************************/ | 97 | /**************************************************************************/ |
98 | var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 | 98 | var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 |
99 | var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 | 99 | var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 |
100 | var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 | ||
100 | var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 | 101 | var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 |
101 | var SQ_WAVE_STATUS_HALT_MASK = 0x2000 | 102 | var SQ_WAVE_STATUS_HALT_MASK = 0x2000 |
102 | 103 | ||
@@ -362,6 +363,10 @@ end | |||
362 | s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC | 363 | s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC |
363 | end | 364 | end |
364 | 365 | ||
366 | // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. | ||
367 | s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) | ||
368 | s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp | ||
369 | |||
365 | L_SLEEP: | 370 | L_SLEEP: |
366 | s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 | 371 | s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 |
367 | 372 | ||
@@ -1210,7 +1215,7 @@ end | |||
1210 | #endif | 1215 | #endif |
1211 | 1216 | ||
1212 | static const uint32_t cwsr_trap_gfx9_hex[] = { | 1217 | static const uint32_t cwsr_trap_gfx9_hex[] = { |
1213 | 0xbf820001, 0xbf820158, | 1218 | 0xbf820001, 0xbf82015a, |
1214 | 0xb8f8f802, 0x89788678, | 1219 | 0xb8f8f802, 0x89788678, |
1215 | 0xb8f1f803, 0x866eff71, | 1220 | 0xb8f1f803, 0x866eff71, |
1216 | 0x00000400, 0xbf850034, | 1221 | 0x00000400, 0xbf850034, |
@@ -1249,6 +1254,7 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { | |||
1249 | 0x00007fff, 0xb970f807, | 1254 | 0x00007fff, 0xb970f807, |
1250 | 0xbeee007e, 0xbeef007f, | 1255 | 0xbeee007e, 0xbeef007f, |
1251 | 0xbefe0180, 0xbf900004, | 1256 | 0xbefe0180, 0xbf900004, |
1257 | 0x87708478, 0xb970f802, | ||
1252 | 0xbf8e0002, 0xbf88fffe, | 1258 | 0xbf8e0002, 0xbf88fffe, |
1253 | 0xb8f02a05, 0x80708170, | 1259 | 0xb8f02a05, 0x80708170, |
1254 | 0x8e708a70, 0xb8f11605, | 1260 | 0x8e708a70, 0xb8f11605, |