aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/lib/atomic_asm_32.S
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2010-05-28 23:09:12 -0400
committerChris Metcalf <cmetcalf@tilera.com>2010-06-04 17:11:18 -0400
commit867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
treec5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib/atomic_asm_32.S
parent5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
arch/tile: core support for Tilera 32-bit chips.
This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
Diffstat (limited to 'arch/tile/lib/atomic_asm_32.S')
-rw-r--r--arch/tile/lib/atomic_asm_32.S197
1 files changed, 197 insertions, 0 deletions
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
new file mode 100644
index 000000000000..c0d058578192
--- /dev/null
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -0,0 +1,197 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Support routines for atomic operations. Each function takes:
15 *
16 * r0: address to manipulate
17 * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG)
18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against
19 * r3: (cmpxchg/xchg_add_unless) new value to write or add;
20 * (atomic64 ops) high word of value to write
21 * r4/r5: (cmpxchg64/add_unless64) new value to write or add
22 *
23 * The 32-bit routines return a "struct __get_user" so that the futex code
24 * has an opportunity to return -EFAULT to the user if needed.
25 * The 64-bit routines just return a "long long" with the value,
26 * since they are only used from kernel space and don't expect to fault.
27 * Support for 16-bit ops is included in the framework but we don't provide
28 * any (x86_64 has an atomic_inc_short(), so we might want to some day).
29 *
30 * Note that the caller is advised to issue a suitable L1 or L2
31 * prefetch on the address being manipulated to avoid extra stalls.
32 * In addition, the hot path is on two icache lines, and we start with
33 * a jump to the second line to make sure they are both in cache so
34 * that we never stall waiting on icache fill while holding the lock.
35 * (This doesn't work out with most 64-bit ops, since they consume
36 * too many bundles, so may take an extra i-cache stall.)
37 *
38 * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
39 * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
40 * the code, just page faults.
41 *
42 * If the load or store faults in a way that can be directly fixed in
43 * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
44 * directly, return to the instruction that faulted, and retry it.
45 *
46 * If the load or store faults in a way that potentially requires us
47 * to release the atomic lock, then retry (e.g. a migrating PTE), we
48 * reset the PC in do_page_fault_ics() to the "tns" instruction so
49 * that on return we will reacquire the lock and restart the op. We
50 * are somewhat overloading the exception_table_entry notion by doing
51 * this, since those entries are not normally used for migrating PTEs.
52 *
53 * If the main page fault handler discovers a bad address, it will see
54 * the PC pointing to the "tns" instruction (due to the earlier
55 * exception_table_entry processing in do_page_fault_ics), and
56 * re-reset the PC to the fault handler, atomic_bad_address(), which
57 * effectively takes over from the atomic op and can either return a
58 * bad "struct __get_user" (for user addresses) or can just panic (for
59 * bad kernel addresses).
60 *
61 * Note that if the value we would store is the same as what we
62 * loaded, we bypass the load. Other platforms with true atomics can
63 * make the guarantee that a non-atomic __clear_bit(), for example,
64 * can safely race with an atomic test_and_set_bit(); this example is
65 * from bit_spinlock.h in slub_lock() / slub_unlock(). We can't do
66 * that on Tile since the "atomic" op is really just a
67 * read/modify/write, and can race with the non-atomic
68 * read/modify/write. However, if we can short-circuit the write when
69 * it is not needed, in the atomic case, we avoid the race.
70 */
71
72#include <linux/linkage.h>
73#include <asm/atomic.h>
74#include <asm/page.h>
75#include <asm/processor.h>
76
77 .section .text.atomic,"ax"
78ENTRY(__start_atomic_asm_code)
79
80 .macro atomic_op, name, bitwidth, body
81 .align 64
82STD_ENTRY_SECTION(__atomic\name, .text.atomic)
83 {
84 movei r24, 1
85 j 4f /* branch to second cache line */
86 }
871: {
88 .ifc \bitwidth,16
89 lh r22, r0
90 .else
91 lw r22, r0
92 addi r23, r0, 4
93 .endif
94 }
95 .ifc \bitwidth,64
96 lw r23, r23
97 .endif
98 \body /* set r24, and r25 if 64-bit */
99 {
100 seq r26, r22, r24
101 seq r27, r23, r25
102 }
103 .ifc \bitwidth,64
104 bbnst r27, 2f
105 .endif
106 bbs r26, 3f /* skip write-back if it's the same value */
1072: {
108 .ifc \bitwidth,16
109 sh r0, r24
110 .else
111 sw r0, r24
112 addi r23, r0, 4
113 .endif
114 }
115 .ifc \bitwidth,64
116 sw r23, r25
117 .endif
118 mf
1193: {
120 move r0, r22
121 .ifc \bitwidth,64
122 move r1, r23
123 .else
124 move r1, zero
125 .endif
126 sw ATOMIC_LOCK_REG_NAME, zero
127 }
128 mtspr INTERRUPT_CRITICAL_SECTION, zero
129 jrp lr
1304: {
131 move ATOMIC_LOCK_REG_NAME, r1
132 mtspr INTERRUPT_CRITICAL_SECTION, r24
133 }
134#ifndef CONFIG_SMP
135 j 1b /* no atomic locks */
136#else
137 {
138 tns r21, ATOMIC_LOCK_REG_NAME
139 moveli r23, 2048 /* maximum backoff time in cycles */
140 }
141 {
142 bzt r21, 1b /* branch if lock acquired */
143 moveli r25, 32 /* starting backoff time in cycles */
144 }
1455: mtspr INTERRUPT_CRITICAL_SECTION, zero
146 mfspr r26, CYCLE_LOW /* get start point for this backoff */
1476: mfspr r22, CYCLE_LOW /* test to see if we've backed off enough */
148 sub r22, r22, r26
149 slt r22, r22, r25
150 bbst r22, 6b
151 {
152 mtspr INTERRUPT_CRITICAL_SECTION, r24
153 shli r25, r25, 1 /* double the backoff; retry the tns */
154 }
155 {
156 tns r21, ATOMIC_LOCK_REG_NAME
157 slt r26, r23, r25 /* is the proposed backoff too big? */
158 }
159 {
160 bzt r21, 1b /* branch if lock acquired */
161 mvnz r25, r26, r23
162 }
163 j 5b
164#endif
165 STD_ENDPROC(__atomic\name)
166 .ifc \bitwidth,32
167 .pushsection __ex_table,"a"
168 .word 1b, __atomic\name
169 .word 2b, __atomic\name
170 .word __atomic\name, __atomic_bad_address
171 .popsection
172 .endif
173 .endm
174
175atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
176atomic_op _xchg, 32, "move r24, r2"
177atomic_op _xchg_add, 32, "add r24, r22, r2"
178atomic_op _xchg_add_unless, 32, \
179 "sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
180atomic_op _or, 32, "or r24, r22, r2"
181atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
182atomic_op _xor, 32, "xor r24, r22, r2"
183
184atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
185 { bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
186atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
187atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
188 slt_u r26, r24, r22; add r25, r25, r26"
189atomic_op 64_xchg_add_unless, 64, \
190 "{ sne r26, r22, r2; sne r27, r23, r3 }; \
191 { bbns r26, 3f; add r24, r22, r4 }; \
192 { bbns r27, 3f; add r25, r23, r5 }; \
193 slt_u r26, r24, r22; add r25, r25, r26"
194
195 jrp lr /* happy backtracer */
196
197ENTRY(__end_atomic_asm_code)