diff options
Diffstat (limited to 'arch/tile/lib/atomic_asm_32.S')
-rw-r--r-- | arch/tile/lib/atomic_asm_32.S | 196 |
1 files changed, 196 insertions, 0 deletions
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S new file mode 100644 index 00000000000..5a5514b77e7 --- /dev/null +++ b/arch/tile/lib/atomic_asm_32.S | |||
@@ -0,0 +1,196 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Support routines for atomic operations. Each function takes: | ||
15 | * | ||
16 | * r0: address to manipulate | ||
17 | * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) | ||
18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against | ||
19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; | ||
20 | * (atomic64 ops) high word of value to write | ||
21 | * r4/r5: (cmpxchg64/add_unless64) new value to write or add | ||
22 | * | ||
23 | * The 32-bit routines return a "struct __get_user" so that the futex code | ||
24 | * has an opportunity to return -EFAULT to the user if needed. | ||
25 | * The 64-bit routines just return a "long long" with the value, | ||
26 | * since they are only used from kernel space and don't expect to fault. | ||
27 | * Support for 16-bit ops is included in the framework but we don't provide | ||
28 | * any (x86_64 has an atomic_inc_short(), so we might want to some day). | ||
29 | * | ||
30 | * Note that the caller is advised to issue a suitable L1 or L2 | ||
31 | * prefetch on the address being manipulated to avoid extra stalls. | ||
32 | * In addition, the hot path is on two icache lines, and we start with | ||
33 | * a jump to the second line to make sure they are both in cache so | ||
34 | * that we never stall waiting on icache fill while holding the lock. | ||
35 | * (This doesn't work out with most 64-bit ops, since they consume | ||
36 | * too many bundles, so may take an extra i-cache stall.) | ||
37 | * | ||
38 | * These routines set the INTERRUPT_CRITICAL_SECTION bit, just | ||
39 | * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt | ||
40 | * the code, just page faults. | ||
41 | * | ||
42 | * If the load or store faults in a way that can be directly fixed in | ||
43 | * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it | ||
44 | * directly, return to the instruction that faulted, and retry it. | ||
45 | * | ||
46 | * If the load or store faults in a way that potentially requires us | ||
47 | * to release the atomic lock, then retry (e.g. a migrating PTE), we | ||
48 | * reset the PC in do_page_fault_ics() to the "tns" instruction so | ||
49 | * that on return we will reacquire the lock and restart the op. We | ||
50 | * are somewhat overloading the exception_table_entry notion by doing | ||
51 | * this, since those entries are not normally used for migrating PTEs. | ||
52 | * | ||
53 | * If the main page fault handler discovers a bad address, it will see | ||
54 | * the PC pointing to the "tns" instruction (due to the earlier | ||
55 | * exception_table_entry processing in do_page_fault_ics), and | ||
56 | * re-reset the PC to the fault handler, atomic_bad_address(), which | ||
57 | * effectively takes over from the atomic op and can either return a | ||
58 | * bad "struct __get_user" (for user addresses) or can just panic (for | ||
59 | * bad kernel addresses). | ||
60 | * | ||
61 | * Note that if the value we would store is the same as what we | ||
62 | * loaded, we bypass the load. Other platforms with true atomics can | ||
63 | * make the guarantee that a non-atomic __clear_bit(), for example, | ||
64 | * can safely race with an atomic test_and_set_bit(); this example is | ||
65 | * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do | ||
66 | * that on Tile since the "atomic" op is really just a | ||
67 | * read/modify/write, and can race with the non-atomic | ||
68 | * read/modify/write. However, if we can short-circuit the write when | ||
69 | * it is not needed, in the atomic case, we avoid the race. | ||
70 | */ | ||
71 | |||
72 | #include <linux/linkage.h> | ||
73 | #include <asm/atomic.h> | ||
74 | #include <asm/page.h> | ||
75 | #include <asm/processor.h> | ||
76 | |||
77 | .section .text.atomic,"ax" | ||
78 | ENTRY(__start_atomic_asm_code) | ||
79 | |||
80 | .macro atomic_op, name, bitwidth, body | ||
81 | .align 64 | ||
82 | STD_ENTRY_SECTION(__atomic\name, .text.atomic) | ||
83 | { | ||
84 | movei r24, 1 | ||
85 | j 4f /* branch to second cache line */ | ||
86 | } | ||
87 | 1: { | ||
88 | .ifc \bitwidth,16 | ||
89 | lh r22, r0 | ||
90 | .else | ||
91 | lw r22, r0 | ||
92 | addi r28, r0, 4 | ||
93 | .endif | ||
94 | } | ||
95 | .ifc \bitwidth,64 | ||
96 | lw r23, r28 | ||
97 | .endif | ||
98 | \body /* set r24, and r25 if 64-bit */ | ||
99 | { | ||
100 | seq r26, r22, r24 | ||
101 | seq r27, r23, r25 | ||
102 | } | ||
103 | .ifc \bitwidth,64 | ||
104 | bbnst r27, 2f | ||
105 | .endif | ||
106 | bbs r26, 3f /* skip write-back if it's the same value */ | ||
107 | 2: { | ||
108 | .ifc \bitwidth,16 | ||
109 | sh r0, r24 | ||
110 | .else | ||
111 | sw r0, r24 | ||
112 | .endif | ||
113 | } | ||
114 | .ifc \bitwidth,64 | ||
115 | sw r28, r25 | ||
116 | .endif | ||
117 | mf | ||
118 | 3: { | ||
119 | move r0, r22 | ||
120 | .ifc \bitwidth,64 | ||
121 | move r1, r23 | ||
122 | .else | ||
123 | move r1, zero | ||
124 | .endif | ||
125 | sw ATOMIC_LOCK_REG_NAME, zero | ||
126 | } | ||
127 | mtspr INTERRUPT_CRITICAL_SECTION, zero | ||
128 | jrp lr | ||
129 | 4: { | ||
130 | move ATOMIC_LOCK_REG_NAME, r1 | ||
131 | mtspr INTERRUPT_CRITICAL_SECTION, r24 | ||
132 | } | ||
133 | #ifndef CONFIG_SMP | ||
134 | j 1b /* no atomic locks */ | ||
135 | #else | ||
136 | { | ||
137 | tns r21, ATOMIC_LOCK_REG_NAME | ||
138 | moveli r23, 2048 /* maximum backoff time in cycles */ | ||
139 | } | ||
140 | { | ||
141 | bzt r21, 1b /* branch if lock acquired */ | ||
142 | moveli r25, 32 /* starting backoff time in cycles */ | ||
143 | } | ||
144 | 5: mtspr INTERRUPT_CRITICAL_SECTION, zero | ||
145 | mfspr r26, CYCLE_LOW /* get start point for this backoff */ | ||
146 | 6: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */ | ||
147 | sub r22, r22, r26 | ||
148 | slt r22, r22, r25 | ||
149 | bbst r22, 6b | ||
150 | { | ||
151 | mtspr INTERRUPT_CRITICAL_SECTION, r24 | ||
152 | shli r25, r25, 1 /* double the backoff; retry the tns */ | ||
153 | } | ||
154 | { | ||
155 | tns r21, ATOMIC_LOCK_REG_NAME | ||
156 | slt r26, r23, r25 /* is the proposed backoff too big? */ | ||
157 | } | ||
158 | { | ||
159 | bzt r21, 1b /* branch if lock acquired */ | ||
160 | mvnz r25, r26, r23 | ||
161 | } | ||
162 | j 5b | ||
163 | #endif | ||
164 | STD_ENDPROC(__atomic\name) | ||
165 | .ifc \bitwidth,32 | ||
166 | .pushsection __ex_table,"a" | ||
167 | .word 1b, __atomic\name | ||
168 | .word 2b, __atomic\name | ||
169 | .word __atomic\name, __atomic_bad_address | ||
170 | .popsection | ||
171 | .endif | ||
172 | .endm | ||
173 | |||
174 | atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }" | ||
175 | atomic_op _xchg, 32, "move r24, r2" | ||
176 | atomic_op _xchg_add, 32, "add r24, r22, r2" | ||
177 | atomic_op _xchg_add_unless, 32, \ | ||
178 | "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }" | ||
179 | atomic_op _or, 32, "or r24, r22, r2" | ||
180 | atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2" | ||
181 | atomic_op _xor, 32, "xor r24, r22, r2" | ||
182 | |||
183 | atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \ | ||
184 | { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }" | ||
185 | atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }" | ||
186 | atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \ | ||
187 | slt_u r26, r24, r22; add r25, r25, r26" | ||
188 | atomic_op 64_xchg_add_unless, 64, \ | ||
189 | "{ sne r26, r22, r2; sne r27, r23, r3 }; \ | ||
190 | { bbns r26, 3f; add r24, r22, r4 }; \ | ||
191 | { bbns r27, 3f; add r25, r23, r5 }; \ | ||
192 | slt_u r26, r24, r22; add r25, r25, r26" | ||
193 | |||
194 | jrp lr /* happy backtracer */ | ||
195 | |||
196 | ENTRY(__end_atomic_asm_code) | ||