diff options
author | Tony Luck <tony.luck@intel.com> | 2009-09-25 11:42:16 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2009-09-25 11:42:16 -0400 |
commit | 2c86963b093c1a0887dfc6b32c6e5ea3a80f2922 (patch) | |
tree | d1ed58dd0a644dd670e3724a575112fedd4b4250 /arch/ia64/kernel/head.S | |
parent | 53cddfcc0e760d2b364878b6dadbd0c6d087cfae (diff) |
[IA64] implement ticket locks for Itanium
Back in January 2008 Nick Piggin implemented "ticket" spinlocks
for X86 (See commit 314cdbefd1fd0a7acf3780e9628465b77ea6a836).
IA64 implementation has a couple of differences because of the
available atomic operations ... e.g. we have no fetchadd2 instruction
that operates on a 16-bit quantity so we make ticket locks use
a 32-bit word for each of the current ticket and now-serving values.
Performance on uncontended locks is about 8% worse than the previous
implementation, but this seems a good trade for determinism in the
contended case. Performance impact on macro-level benchmarks is in
the noise.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/head.S')
-rw-r--r-- | arch/ia64/kernel/head.S | 89 |
1 files changed, 0 insertions, 89 deletions
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index 1a6e44515eb4..696eff28a0c4 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S | |||
@@ -1130,95 +1130,6 @@ SET_REG(b5); | |||
1130 | #endif /* CONFIG_IA64_BRL_EMU */ | 1130 | #endif /* CONFIG_IA64_BRL_EMU */ |
1131 | 1131 | ||
1132 | #ifdef CONFIG_SMP | 1132 | #ifdef CONFIG_SMP |
1133 | /* | ||
1134 | * This routine handles spinlock contention. It uses a non-standard calling | ||
1135 | * convention to avoid converting leaf routines into interior routines. Because | ||
1136 | * of this special convention, there are several restrictions: | ||
1137 | * | ||
1138 | * - do not use gp relative variables, this code is called from the kernel | ||
1139 | * and from modules, r1 is undefined. | ||
1140 | * - do not use stacked registers, the caller owns them. | ||
1141 | * - do not use the scratch stack space, the caller owns it. | ||
1142 | * - do not use any registers other than the ones listed below | ||
1143 | * | ||
1144 | * Inputs: | ||
1145 | * ar.pfs - saved CFM of caller | ||
1146 | * ar.ccv - 0 (and available for use) | ||
1147 | * r27 - flags from spin_lock_irqsave or 0. Must be preserved. | ||
1148 | * r28 - available for use. | ||
1149 | * r29 - available for use. | ||
1150 | * r30 - available for use. | ||
1151 | * r31 - address of lock, available for use. | ||
1152 | * b6 - return address | ||
1153 | * p14 - available for use. | ||
1154 | * p15 - used to track flag status. | ||
1155 | * | ||
1156 | * If you patch this code to use more registers, do not forget to update | ||
1157 | * the clobber lists for spin_lock() in arch/ia64/include/asm/spinlock.h. | ||
1158 | */ | ||
1159 | |||
1160 | #if (__GNUC__ == 3 && __GNUC_MINOR__ < 3) | ||
1161 | |||
1162 | GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) | ||
1163 | .prologue | ||
1164 | .save ar.pfs, r0 // this code effectively has a zero frame size | ||
1165 | .save rp, r28 | ||
1166 | .body | ||
1167 | nop 0 | ||
1168 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT | ||
1169 | .restore sp // pop existing prologue after next insn | ||
1170 | mov b6 = r28 | ||
1171 | .prologue | ||
1172 | .save ar.pfs, r0 | ||
1173 | .altrp b6 | ||
1174 | .body | ||
1175 | ;; | ||
1176 | (p15) ssm psr.i // reenable interrupts if they were on | ||
1177 | // DavidM says that srlz.d is slow and is not required in this case | ||
1178 | .wait: | ||
1179 | // exponential backoff, kdb, lockmeter etc. go in here | ||
1180 | hint @pause | ||
1181 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word | ||
1182 | nop 0 | ||
1183 | ;; | ||
1184 | cmp4.ne p14,p0=r30,r0 | ||
1185 | (p14) br.cond.sptk.few .wait | ||
1186 | (p15) rsm psr.i // disable interrupts if we reenabled them | ||
1187 | br.cond.sptk.few b6 // lock is now free, try to acquire | ||
1188 | .global ia64_spinlock_contention_pre3_4_end // for kernprof | ||
1189 | ia64_spinlock_contention_pre3_4_end: | ||
1190 | END(ia64_spinlock_contention_pre3_4) | ||
1191 | |||
1192 | #else | ||
1193 | |||
1194 | GLOBAL_ENTRY(ia64_spinlock_contention) | ||
1195 | .prologue | ||
1196 | .altrp b6 | ||
1197 | .body | ||
1198 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT | ||
1199 | ;; | ||
1200 | .wait: | ||
1201 | (p15) ssm psr.i // reenable interrupts if they were on | ||
1202 | // DavidM says that srlz.d is slow and is not required in this case | ||
1203 | .wait2: | ||
1204 | // exponential backoff, kdb, lockmeter etc. go in here | ||
1205 | hint @pause | ||
1206 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word | ||
1207 | ;; | ||
1208 | cmp4.ne p14,p0=r30,r0 | ||
1209 | mov r30 = 1 | ||
1210 | (p14) br.cond.sptk.few .wait2 | ||
1211 | (p15) rsm psr.i // disable interrupts if we reenabled them | ||
1212 | ;; | ||
1213 | cmpxchg4.acq r30=[r31], r30, ar.ccv | ||
1214 | ;; | ||
1215 | cmp4.ne p14,p0=r0,r30 | ||
1216 | (p14) br.cond.sptk.few .wait | ||
1217 | |||
1218 | br.ret.sptk.many b6 // lock is now taken | ||
1219 | END(ia64_spinlock_contention) | ||
1220 | |||
1221 | #endif | ||
1222 | 1133 | ||
1223 | #ifdef CONFIG_HOTPLUG_CPU | 1134 | #ifdef CONFIG_HOTPLUG_CPU |
1224 | GLOBAL_ENTRY(ia64_jump_to_sal) | 1135 | GLOBAL_ENTRY(ia64_jump_to_sal) |