aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorTrent Piepho <tpiepho@freescale.com>2008-05-27 19:48:32 -0400
committerPaul Mackerras <paulus@samba.org>2008-06-08 23:54:36 -0400
commit0f3d6bcd391b058c619fc30e8022e8a29fbf4bef (patch)
treeab5f0b8fb95afc01c7b1fd4c5254045ad30bbe4c /include
parent19fc65b5251dfd90312ae0142cc8650cd273e6a6 (diff)
powerpc: Improve (in|out)_[bl]eXX() asm code
Since commit 4cb3cee03d558fd457cb58f56c80a2a09a66110c the code generated for the in_beXX() and out_beXX() mmio functions has been sub-optimal. The out_leXX() family of functions are created with the macro DEF_MMIO_OUT_LE() while the out_beXX() family are created with DEF_MMIO_OUT_BE(). In what was perhaps a bit too much macro use, both of these macros are in turn created via the macro DEF_MMIO_OUT(). For the LE versions, eventually they boil down to an asm that will look something like this: asm("sync; stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); The issue is that the "stwbrx" instruction only comes in an indexed, or 'x', version, in which the address is represented by the sum of two registers (the "0,%2"). Unfortunately, gcc doesn't have a constraint for an indexed memory reference. The "m" constraint allows both indexed and offset, i.e. register plus constant, memory references and there is no "stwbr" version for offset references. "m" also allows updating addresses and there is no 'u' version of "stwbrx" like there is with "stwux". The unused first operand to the asm is just to tell gcc that *addr is an output of the asm. The address used is passed in a single register via the third asm operand, and the index register is just hard coded as 0. This means gcc is forced to put the address in a single register and can't use index addressing, e.g. if one has the data in register 9, a base address in register 3 and an index in register 4, gcc must emit code like "add 11,4,3; stwbrx 9,0,11" instead of just "stwbrx 9,4,3". This costs an extra add instruction and another register. For gcc 4.0 and older, there doesn't appear to be anything that can be done. But for 4.1 and newer, there is a 'Z' constraint. It does not allow "updating" addresses, but does allow both indexed and offset addresses. However, the only allowed constant offset is 0. We can then use the undocumented 'y' operand modifier, which causes gcc to convert "0(reg)" into the equivilient "0,reg" format that can be used with stwbrx. This brings us the to problem with the BE version. In this case, the "stw" instruction does have both indexed and non-indexed versions. The final asm ends up looking like this: asm("sync; stw%U0%X0 %1,%0" : "=m" (*addr) : "r" (val), "r" (addr)); The undocumented codes "%U0" and "%0X" will generate a 'u' if the memory reference should be an auto-updating one, and an 'x' if the memory reference is indexed, respectively. The third operand is unused, it's just there because asm the code is reused from the LE version. However, gcc does not know this, and generates unnecessary code to stick addr in a register! To use the example from the LE version, gcc will generate "add 11,4,3; stwx 9,4,3". It is able to use the indexed address "4,3" for the "stwx", but still thinks it needs to put 4+3 into register 11, which will never be used. This also ends up happening a lot for the offset addressing mode, where common code like this: out_be32(&device_registers->some_register, data); uses an instruction like "stw 9, 42(3)", where register 3 has the pointer device_registers and 42 is the offset of some_register in that structure. gcc will be forced to generate the unnecessary instruction "addi 11, 3, 42" to put the address into a single (unused) register. The in_* versions end up having these exact same problems as well. Signed-off-by: Trent Piepho <tpiepho@freescale.com> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org> CC: Andreas Schwab <schwab@suse.de> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-powerpc/io.h57
1 files changed, 42 insertions, 15 deletions
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 89189488e286..6db422d8e2a0 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -95,33 +95,60 @@ extern resource_size_t isa_mem_base;
95#define IO_SET_SYNC_FLAG() 95#define IO_SET_SYNC_FLAG()
96#endif 96#endif
97 97
98#define DEF_MMIO_IN(name, type, insn) \ 98/* gcc 4.0 and older doesn't have 'Z' constraint */
99static inline type name(const volatile type __iomem *addr) \ 99#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
100#define DEF_MMIO_IN_LE(name, size, insn) \
101static inline u##size name(const volatile u##size __iomem *addr) \
100{ \ 102{ \
101 type ret; \ 103 u##size ret; \
102 __asm__ __volatile__("sync;" insn ";twi 0,%0,0;isync" \ 104 __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync" \
103 : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \ 105 : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \
104 return ret; \ 106 return ret; \
105} 107}
106 108
107#define DEF_MMIO_OUT(name, type, insn) \ 109#define DEF_MMIO_OUT_LE(name, size, insn) \
108static inline void name(volatile type __iomem *addr, type val) \ 110static inline void name(volatile u##size __iomem *addr, u##size val) \
109{ \ 111{ \
110 __asm__ __volatile__("sync;" insn \ 112 __asm__ __volatile__("sync;"#insn" %1,0,%2" \
111 : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \ 113 : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
112 IO_SET_SYNC_FLAG(); \ 114 IO_SET_SYNC_FLAG(); \
113} 115}
116#else /* newer gcc */
117#define DEF_MMIO_IN_LE(name, size, insn) \
118static inline u##size name(const volatile u##size __iomem *addr) \
119{ \
120 u##size ret; \
121 __asm__ __volatile__("sync;"#insn" %0,%y1;twi 0,%0,0;isync" \
122 : "=r" (ret) : "Z" (*addr) : "memory"); \
123 return ret; \
124}
125
126#define DEF_MMIO_OUT_LE(name, size, insn) \
127static inline void name(volatile u##size __iomem *addr, u##size val) \
128{ \
129 __asm__ __volatile__("sync;"#insn" %1,%y0" \
130 : "=Z" (*addr) : "r" (val) : "memory"); \
131 IO_SET_SYNC_FLAG(); \
132}
133#endif
114 134
135#define DEF_MMIO_IN_BE(name, size, insn) \
136static inline u##size name(const volatile u##size __iomem *addr) \
137{ \
138 u##size ret; \
139 __asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
140 : "=r" (ret) : "m" (*addr) : "memory"); \
141 return ret; \
142}
115 143
116#define DEF_MMIO_IN_BE(name, size, insn) \ 144#define DEF_MMIO_OUT_BE(name, size, insn) \
117 DEF_MMIO_IN(name, u##size, __stringify(insn)"%U2%X2 %0,%2") 145static inline void name(volatile u##size __iomem *addr, u##size val) \
118#define DEF_MMIO_IN_LE(name, size, insn) \ 146{ \
119 DEF_MMIO_IN(name, u##size, __stringify(insn)" %0,0,%1") 147 __asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
148 : "=m" (*addr) : "r" (val) : "memory"); \
149 IO_SET_SYNC_FLAG(); \
150}
120 151
121#define DEF_MMIO_OUT_BE(name, size, insn) \
122 DEF_MMIO_OUT(name, u##size, __stringify(insn)"%U0%X0 %1,%0")
123#define DEF_MMIO_OUT_LE(name, size, insn) \
124 DEF_MMIO_OUT(name, u##size, __stringify(insn)" %1,0,%2")
125 152
126DEF_MMIO_IN_BE(in_8, 8, lbz); 153DEF_MMIO_IN_BE(in_8, 8, lbz);
127DEF_MMIO_IN_BE(in_be16, 16, lhz); 154DEF_MMIO_IN_BE(in_be16, 16, lhz);