diff options
58 files changed, 1648 insertions, 1007 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 6e696bd37cf9..7d6e12dbdffa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -6127,6 +6127,7 @@ S: Supported | |||
6127 | F: arch/tile/ | 6127 | F: arch/tile/ |
6128 | F: drivers/tty/hvc/hvc_tile.c | 6128 | F: drivers/tty/hvc/hvc_tile.c |
6129 | F: drivers/net/tile/ | 6129 | F: drivers/net/tile/ |
6130 | F: drivers/edac/tile_edac.c | ||
6130 | 6131 | ||
6131 | TLAN NETWORK DRIVER | 6132 | TLAN NETWORK DRIVER |
6132 | M: Samuel Chessman <chessman@tux.org> | 6133 | M: Samuel Chessman <chessman@tux.org> |
@@ -24,7 +24,7 @@ ON WHAT HARDWARE DOES IT RUN? | |||
24 | today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and | 24 | today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and |
25 | UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH, Cell, | 25 | UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH, Cell, |
26 | IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS, | 26 | IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS, |
27 | Xtensa, AVR32 and Renesas M32R architectures. | 27 | Xtensa, Tilera TILE, AVR32 and Renesas M32R architectures. |
28 | 28 | ||
29 | Linux is easily portable to most general-purpose 32- or 64-bit architectures | 29 | Linux is easily portable to most general-purpose 32- or 64-bit architectures |
30 | as long as they have a paged memory management unit (PMMU) and a port of the | 30 | as long as they have a paged memory management unit (PMMU) and a port of the |
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 08948e4e1503..f3b78701c219 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -1,5 +1,5 @@ | |||
1 | # For a description of the syntax of this configuration file, | 1 | # For a description of the syntax of this configuration file, |
2 | # see Documentation/kbuild/config-language.txt. | 2 | # see Documentation/kbuild/kconfig-language.txt. |
3 | 3 | ||
4 | config TILE | 4 | config TILE |
5 | def_bool y | 5 | def_bool y |
@@ -11,17 +11,18 @@ config TILE | |||
11 | select HAVE_GENERIC_HARDIRQS | 11 | select HAVE_GENERIC_HARDIRQS |
12 | select GENERIC_IRQ_PROBE | 12 | select GENERIC_IRQ_PROBE |
13 | select GENERIC_PENDING_IRQ if SMP | 13 | select GENERIC_PENDING_IRQ if SMP |
14 | select GENERIC_HARDIRQS_NO_DEPRECATED | ||
14 | 15 | ||
15 | # FIXME: investigate whether we need/want these options. | 16 | # FIXME: investigate whether we need/want these options. |
16 | # select HAVE_IOREMAP_PROT | 17 | # select HAVE_IOREMAP_PROT |
17 | # select HAVE_OPTPROBES | 18 | # select HAVE_OPTPROBES |
18 | # select HAVE_REGS_AND_STACK_ACCESS_API | 19 | # select HAVE_REGS_AND_STACK_ACCESS_API |
19 | # select HAVE_HW_BREAKPOINT | 20 | # select HAVE_HW_BREAKPOINT |
20 | # select PERF_EVENTS | 21 | # select PERF_EVENTS |
21 | # select HAVE_USER_RETURN_NOTIFIER | 22 | # select HAVE_USER_RETURN_NOTIFIER |
22 | # config NO_BOOTMEM | 23 | # config NO_BOOTMEM |
23 | # config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 24 | # config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
24 | # config HUGETLB_PAGE_SIZE_VARIABLE | 25 | # config HUGETLB_PAGE_SIZE_VARIABLE |
25 | 26 | ||
26 | config MMU | 27 | config MMU |
27 | def_bool y | 28 | def_bool y |
@@ -39,7 +40,7 @@ config HAVE_SETUP_PER_CPU_AREA | |||
39 | def_bool y | 40 | def_bool y |
40 | 41 | ||
41 | config NEED_PER_CPU_PAGE_FIRST_CHUNK | 42 | config NEED_PER_CPU_PAGE_FIRST_CHUNK |
42 | def_bool y | 43 | def_bool y |
43 | 44 | ||
44 | config SYS_SUPPORTS_HUGETLBFS | 45 | config SYS_SUPPORTS_HUGETLBFS |
45 | def_bool y | 46 | def_bool y |
@@ -201,12 +202,6 @@ config NODES_SHIFT | |||
201 | By default, 2, i.e. 2^2 == 4 DDR2 controllers. | 202 | By default, 2, i.e. 2^2 == 4 DDR2 controllers. |
202 | In a system with more controllers, this value should be raised. | 203 | In a system with more controllers, this value should be raised. |
203 | 204 | ||
204 | # Need 16MB areas to enable hugetlb | ||
205 | # See build-time check in arch/tile/mm/init.c. | ||
206 | config FORCE_MAX_ZONEORDER | ||
207 | int | ||
208 | default 9 | ||
209 | |||
210 | choice | 205 | choice |
211 | depends on !TILEGX | 206 | depends on !TILEGX |
212 | prompt "Memory split" if EXPERT | 207 | prompt "Memory split" if EXPERT |
@@ -233,8 +228,12 @@ choice | |||
233 | bool "3.5G/0.5G user/kernel split" | 228 | bool "3.5G/0.5G user/kernel split" |
234 | config VMSPLIT_3G | 229 | config VMSPLIT_3G |
235 | bool "3G/1G user/kernel split" | 230 | bool "3G/1G user/kernel split" |
236 | config VMSPLIT_3G_OPT | 231 | config VMSPLIT_2_75G |
237 | bool "3G/1G user/kernel split (for full 1G low memory)" | 232 | bool "2.75G/1.25G user/kernel split (for full 1G low memory)" |
233 | config VMSPLIT_2_5G | ||
234 | bool "2.5G/1.5G user/kernel split" | ||
235 | config VMSPLIT_2_25G | ||
236 | bool "2.25G/1.75G user/kernel split" | ||
238 | config VMSPLIT_2G | 237 | config VMSPLIT_2G |
239 | bool "2G/2G user/kernel split" | 238 | bool "2G/2G user/kernel split" |
240 | config VMSPLIT_1G | 239 | config VMSPLIT_1G |
@@ -245,7 +244,9 @@ config PAGE_OFFSET | |||
245 | hex | 244 | hex |
246 | default 0xF0000000 if VMSPLIT_3_75G | 245 | default 0xF0000000 if VMSPLIT_3_75G |
247 | default 0xE0000000 if VMSPLIT_3_5G | 246 | default 0xE0000000 if VMSPLIT_3_5G |
248 | default 0xB0000000 if VMSPLIT_3G_OPT | 247 | default 0xB0000000 if VMSPLIT_2_75G |
248 | default 0xA0000000 if VMSPLIT_2_5G | ||
249 | default 0x90000000 if VMSPLIT_2_25G | ||
249 | default 0x80000000 if VMSPLIT_2G | 250 | default 0x80000000 if VMSPLIT_2G |
250 | default 0x40000000 if VMSPLIT_1G | 251 | default 0x40000000 if VMSPLIT_1G |
251 | default 0xC0000000 | 252 | default 0xC0000000 |
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h index 9d0bfa7e59be..96b5710505b6 100644 --- a/arch/tile/include/arch/interrupts_32.h +++ b/arch/tile/include/arch/interrupts_32.h | |||
@@ -16,10 +16,11 @@ | |||
16 | #define __ARCH_INTERRUPTS_H__ | 16 | #define __ARCH_INTERRUPTS_H__ |
17 | 17 | ||
18 | /** Mask for an interrupt. */ | 18 | /** Mask for an interrupt. */ |
19 | #ifdef __ASSEMBLER__ | ||
20 | /* Note: must handle breaking interrupts into high and low words manually. */ | 19 | /* Note: must handle breaking interrupts into high and low words manually. */ |
21 | #define INT_MASK(intno) (1 << (intno)) | 20 | #define INT_MASK_LO(intno) (1 << (intno)) |
22 | #else | 21 | #define INT_MASK_HI(intno) (1 << ((intno) - 32)) |
22 | |||
23 | #ifndef __ASSEMBLER__ | ||
23 | #define INT_MASK(intno) (1ULL << (intno)) | 24 | #define INT_MASK(intno) (1ULL << (intno)) |
24 | #endif | 25 | #endif |
25 | 26 | ||
@@ -89,6 +90,7 @@ | |||
89 | 90 | ||
90 | #define NUM_INTERRUPTS 49 | 91 | #define NUM_INTERRUPTS 49 |
91 | 92 | ||
93 | #ifndef __ASSEMBLER__ | ||
92 | #define QUEUED_INTERRUPTS ( \ | 94 | #define QUEUED_INTERRUPTS ( \ |
93 | INT_MASK(INT_MEM_ERROR) | \ | 95 | INT_MASK(INT_MEM_ERROR) | \ |
94 | INT_MASK(INT_DMATLB_MISS) | \ | 96 | INT_MASK(INT_DMATLB_MISS) | \ |
@@ -301,4 +303,5 @@ | |||
301 | INT_MASK(INT_DOUBLE_FAULT) | \ | 303 | INT_MASK(INT_DOUBLE_FAULT) | \ |
302 | INT_MASK(INT_AUX_PERF_COUNT) | \ | 304 | INT_MASK(INT_AUX_PERF_COUNT) | \ |
303 | 0) | 305 | 0) |
306 | #endif /* !__ASSEMBLER__ */ | ||
304 | #endif /* !__ARCH_INTERRUPTS_H__ */ | 307 | #endif /* !__ARCH_INTERRUPTS_H__ */ |
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h index 74b7c1624d34..e54b7b0527f3 100644 --- a/arch/tile/include/arch/sim.h +++ b/arch/tile/include/arch/sim.h | |||
@@ -152,16 +152,33 @@ sim_dump(unsigned int mask) | |||
152 | /** | 152 | /** |
153 | * Print a string to the simulator stdout. | 153 | * Print a string to the simulator stdout. |
154 | * | 154 | * |
155 | * @param str The string to be written; a newline is automatically added. | 155 | * @param str The string to be written. |
156 | */ | ||
157 | static __inline void | ||
158 | sim_print(const char* str) | ||
159 | { | ||
160 | for ( ; *str != '\0'; str++) | ||
161 | { | ||
162 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | ||
163 | (*str << _SIM_CONTROL_OPERATOR_BITS)); | ||
164 | } | ||
165 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | ||
166 | (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS)); | ||
167 | } | ||
168 | |||
169 | |||
170 | /** | ||
171 | * Print a string to the simulator stdout. | ||
172 | * | ||
173 | * @param str The string to be written (a newline is automatically added). | ||
156 | */ | 174 | */ |
157 | static __inline void | 175 | static __inline void |
158 | sim_print_string(const char* str) | 176 | sim_print_string(const char* str) |
159 | { | 177 | { |
160 | int i; | 178 | for ( ; *str != '\0'; str++) |
161 | for (i = 0; str[i] != 0; i++) | ||
162 | { | 179 | { |
163 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | 180 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | |
164 | (str[i] << _SIM_CONTROL_OPERATOR_BITS)); | 181 | (*str << _SIM_CONTROL_OPERATOR_BITS)); |
165 | } | 182 | } |
166 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | | 183 | __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | |
167 | (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); | 184 | (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); |
@@ -203,7 +220,7 @@ sim_command(const char* str) | |||
203 | * we are passing to the simulator are actually valid in the registers | 220 | * we are passing to the simulator are actually valid in the registers |
204 | * (i.e. returned from memory) prior to the SIM_CONTROL spr. | 221 | * (i.e. returned from memory) prior to the SIM_CONTROL spr. |
205 | */ | 222 | */ |
206 | static __inline int _sim_syscall0(int val) | 223 | static __inline long _sim_syscall0(int val) |
207 | { | 224 | { |
208 | long result; | 225 | long result; |
209 | __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" | 226 | __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" |
@@ -211,7 +228,7 @@ static __inline int _sim_syscall0(int val) | |||
211 | return result; | 228 | return result; |
212 | } | 229 | } |
213 | 230 | ||
214 | static __inline int _sim_syscall1(int val, long arg1) | 231 | static __inline long _sim_syscall1(int val, long arg1) |
215 | { | 232 | { |
216 | long result; | 233 | long result; |
217 | __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" | 234 | __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" |
@@ -219,7 +236,7 @@ static __inline int _sim_syscall1(int val, long arg1) | |||
219 | return result; | 236 | return result; |
220 | } | 237 | } |
221 | 238 | ||
222 | static __inline int _sim_syscall2(int val, long arg1, long arg2) | 239 | static __inline long _sim_syscall2(int val, long arg1, long arg2) |
223 | { | 240 | { |
224 | long result; | 241 | long result; |
225 | __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" | 242 | __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" |
@@ -233,7 +250,7 @@ static __inline int _sim_syscall2(int val, long arg1, long arg2) | |||
233 | the register values for arguments 3 and up may still be in flight | 250 | the register values for arguments 3 and up may still be in flight |
234 | to the core from a stack frame reload. */ | 251 | to the core from a stack frame reload. */ |
235 | 252 | ||
236 | static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) | 253 | static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3) |
237 | { | 254 | { |
238 | long result; | 255 | long result; |
239 | __asm__ __volatile__ ("{ and zero, r3, r3 };" | 256 | __asm__ __volatile__ ("{ and zero, r3, r3 };" |
@@ -244,7 +261,7 @@ static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) | |||
244 | return result; | 261 | return result; |
245 | } | 262 | } |
246 | 263 | ||
247 | static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, | 264 | static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3, |
248 | long arg4) | 265 | long arg4) |
249 | { | 266 | { |
250 | long result; | 267 | long result; |
@@ -256,7 +273,7 @@ static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, | |||
256 | return result; | 273 | return result; |
257 | } | 274 | } |
258 | 275 | ||
259 | static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | 276 | static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3, |
260 | long arg4, long arg5) | 277 | long arg4, long arg5) |
261 | { | 278 | { |
262 | long result; | 279 | long result; |
@@ -268,7 +285,6 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | |||
268 | return result; | 285 | return result; |
269 | } | 286 | } |
270 | 287 | ||
271 | |||
272 | /** | 288 | /** |
273 | * Make a special syscall to the simulator itself, if running under | 289 | * Make a special syscall to the simulator itself, if running under |
274 | * simulation. This is used as the implementation of other functions | 290 | * simulation. This is used as the implementation of other functions |
@@ -281,7 +297,8 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, | |||
281 | */ | 297 | */ |
282 | #define _sim_syscall(syscall_num, nr, args...) \ | 298 | #define _sim_syscall(syscall_num, nr, args...) \ |
283 | _sim_syscall##nr( \ | 299 | _sim_syscall##nr( \ |
284 | ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, args) | 300 | ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \ |
301 | ##args) | ||
285 | 302 | ||
286 | 303 | ||
287 | /* Values for the "access_mask" parameters below. */ | 304 | /* Values for the "access_mask" parameters below. */ |
@@ -365,6 +382,13 @@ sim_validate_lines_evicted(unsigned long long pa, unsigned long length) | |||
365 | } | 382 | } |
366 | 383 | ||
367 | 384 | ||
385 | /* Return the current CPU speed in cycles per second. */ | ||
386 | static __inline long | ||
387 | sim_query_cpu_speed(void) | ||
388 | { | ||
389 | return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0); | ||
390 | } | ||
391 | |||
368 | #endif /* !__DOXYGEN__ */ | 392 | #endif /* !__DOXYGEN__ */ |
369 | 393 | ||
370 | 394 | ||
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h index 7a17082c3773..4b44a2b6a09a 100644 --- a/arch/tile/include/arch/sim_def.h +++ b/arch/tile/include/arch/sim_def.h | |||
@@ -243,6 +243,9 @@ | |||
243 | */ | 243 | */ |
244 | #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 | 244 | #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 |
245 | 245 | ||
246 | /** Syscall number for sim_query_cpu_speed(). */ | ||
247 | #define SIM_SYSCALL_QUERY_CPU_SPEED 6 | ||
248 | |||
246 | 249 | ||
247 | /* | 250 | /* |
248 | * Bit masks which can be shifted by 8, combined with | 251 | * Bit masks which can be shifted by 8, combined with |
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 3b8f55b82dee..849ab2fa1f5c 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild | |||
@@ -1,3 +1,4 @@ | |||
1 | include include/asm-generic/Kbuild.asm | 1 | include include/asm-generic/Kbuild.asm |
2 | 2 | ||
3 | header-y += ucontext.h | 3 | header-y += ucontext.h |
4 | header-y += hardwall.h | ||
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h index b8c49f98a44c..75a16028a952 100644 --- a/arch/tile/include/asm/atomic.h +++ b/arch/tile/include/asm/atomic.h | |||
@@ -32,7 +32,7 @@ | |||
32 | */ | 32 | */ |
33 | static inline int atomic_read(const atomic_t *v) | 33 | static inline int atomic_read(const atomic_t *v) |
34 | { | 34 | { |
35 | return v->counter; | 35 | return ACCESS_ONCE(v->counter); |
36 | } | 36 | } |
37 | 37 | ||
38 | /** | 38 | /** |
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h index 7a93c001ac19..2638be51a164 100644 --- a/arch/tile/include/asm/bitops_32.h +++ b/arch/tile/include/asm/bitops_32.h | |||
@@ -122,7 +122,7 @@ static inline int test_and_change_bit(unsigned nr, | |||
122 | return (_atomic_xor(addr, mask) & mask) != 0; | 122 | return (_atomic_xor(addr, mask) & mask) != 0; |
123 | } | 123 | } |
124 | 124 | ||
125 | /* See discussion at smp_mb__before_atomic_dec() in <asm/atomic.h>. */ | 125 | /* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */ |
126 | #define smp_mb__before_clear_bit() smp_mb() | 126 | #define smp_mb__before_clear_bit() smp_mb() |
127 | #define smp_mb__after_clear_bit() do {} while (0) | 127 | #define smp_mb__after_clear_bit() do {} while (0) |
128 | 128 | ||
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h index 08a2815b5e4e..392e5333dd8b 100644 --- a/arch/tile/include/asm/cache.h +++ b/arch/tile/include/asm/cache.h | |||
@@ -40,7 +40,7 @@ | |||
40 | #define INTERNODE_CACHE_BYTES L2_CACHE_BYTES | 40 | #define INTERNODE_CACHE_BYTES L2_CACHE_BYTES |
41 | 41 | ||
42 | /* Group together read-mostly things to avoid cache false sharing */ | 42 | /* Group together read-mostly things to avoid cache false sharing */ |
43 | #define __read_mostly __attribute__((__section__(".data.read_mostly"))) | 43 | #define __read_mostly __attribute__((__section__(".data..read_mostly"))) |
44 | 44 | ||
45 | /* | 45 | /* |
46 | * Attribute for data that is kept read/write coherent until the end of | 46 | * Attribute for data that is kept read/write coherent until the end of |
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index 14a3f8556ace..12fb0fb330ee 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h | |||
@@ -138,55 +138,12 @@ static inline void finv_buffer(void *buffer, size_t size) | |||
138 | } | 138 | } |
139 | 139 | ||
140 | /* | 140 | /* |
141 | * Flush & invalidate a VA range that is homed remotely on a single core, | 141 | * Flush and invalidate a VA range that is homed remotely, waiting |
142 | * waiting until the memory controller holds the flushed values. | 142 | * until the memory controller holds the flushed values. If "hfh" is |
143 | * true, we will do a more expensive flush involving additional loads | ||
144 | * to make sure we have touched all the possible home cpus of a buffer | ||
145 | * that is homed with "hash for home". | ||
143 | */ | 146 | */ |
144 | static inline void finv_buffer_remote(void *buffer, size_t size) | 147 | void finv_buffer_remote(void *buffer, size_t size, int hfh); |
145 | { | ||
146 | char *p; | ||
147 | int i; | ||
148 | |||
149 | /* | ||
150 | * Flush and invalidate the buffer out of the local L1/L2 | ||
151 | * and request the home cache to flush and invalidate as well. | ||
152 | */ | ||
153 | __finv_buffer(buffer, size); | ||
154 | |||
155 | /* | ||
156 | * Wait for the home cache to acknowledge that it has processed | ||
157 | * all the flush-and-invalidate requests. This does not mean | ||
158 | * that the flushed data has reached the memory controller yet, | ||
159 | * but it does mean the home cache is processing the flushes. | ||
160 | */ | ||
161 | __insn_mf(); | ||
162 | |||
163 | /* | ||
164 | * Issue a load to the last cache line, which can't complete | ||
165 | * until all the previously-issued flushes to the same memory | ||
166 | * controller have also completed. If we weren't striping | ||
167 | * memory, that one load would be sufficient, but since we may | ||
168 | * be, we also need to back up to the last load issued to | ||
169 | * another memory controller, which would be the point where | ||
170 | * we crossed an 8KB boundary (the granularity of striping | ||
171 | * across memory controllers). Keep backing up and doing this | ||
172 | * until we are before the beginning of the buffer, or have | ||
173 | * hit all the controllers. | ||
174 | */ | ||
175 | for (i = 0, p = (char *)buffer + size - 1; | ||
176 | i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; | ||
177 | ++i) { | ||
178 | const unsigned long STRIPE_WIDTH = 8192; | ||
179 | |||
180 | /* Force a load instruction to issue. */ | ||
181 | *(volatile char *)p; | ||
182 | |||
183 | /* Jump to end of previous stripe. */ | ||
184 | p -= STRIPE_WIDTH; | ||
185 | p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); | ||
186 | } | ||
187 | |||
188 | /* Wait for the loads (and thus flushes) to have completed. */ | ||
189 | __insn_mf(); | ||
190 | } | ||
191 | 148 | ||
192 | #endif /* _ASM_TILE_CACHEFLUSH_H */ | 149 | #endif /* _ASM_TILE_CACHEFLUSH_H */ |
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h new file mode 100644 index 000000000000..87fc83eeaffd --- /dev/null +++ b/arch/tile/include/asm/edac.h | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | #ifndef _ASM_TILE_EDAC_H | ||
16 | #define _ASM_TILE_EDAC_H | ||
17 | |||
18 | /* ECC atomic, DMA, SMP and interrupt safe scrub function */ | ||
19 | |||
20 | static inline void atomic_scrub(void *va, u32 size) | ||
21 | { | ||
22 | /* | ||
23 | * These is nothing to be done here because CE is | ||
24 | * corrected by the mshim. | ||
25 | */ | ||
26 | return; | ||
27 | } | ||
28 | |||
29 | #endif /* _ASM_TILE_EDAC_H */ | ||
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index 0521c277bbde..d396d1805163 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h | |||
@@ -54,7 +54,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
54 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 54 | static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
55 | pte_t *ptep, pte_t pte) | 55 | pte_t *ptep, pte_t pte) |
56 | { | 56 | { |
57 | set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER); | 57 | set_pte(ptep, pte); |
58 | } | 58 | } |
59 | 59 | ||
60 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, | 60 | static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, |
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index 641e4ff3d805..5db0ce54284d 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <arch/interrupts.h> | 18 | #include <arch/interrupts.h> |
19 | #include <arch/chip.h> | 19 | #include <arch/chip.h> |
20 | 20 | ||
21 | #if !defined(__tilegx__) && defined(__ASSEMBLY__) | ||
22 | |||
21 | /* | 23 | /* |
22 | * The set of interrupts we want to allow when interrupts are nominally | 24 | * The set of interrupts we want to allow when interrupts are nominally |
23 | * disabled. The remainder are effectively "NMI" interrupts from | 25 | * disabled. The remainder are effectively "NMI" interrupts from |
@@ -25,6 +27,16 @@ | |||
25 | * interrupts (aka "non-queued") are not blocked by the mask in any case. | 27 | * interrupts (aka "non-queued") are not blocked by the mask in any case. |
26 | */ | 28 | */ |
27 | #if CHIP_HAS_AUX_PERF_COUNTERS() | 29 | #if CHIP_HAS_AUX_PERF_COUNTERS() |
30 | #define LINUX_MASKABLE_INTERRUPTS_HI \ | ||
31 | (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT))) | ||
32 | #else | ||
33 | #define LINUX_MASKABLE_INTERRUPTS_HI \ | ||
34 | (~(INT_MASK_HI(INT_PERF_COUNT))) | ||
35 | #endif | ||
36 | |||
37 | #else | ||
38 | |||
39 | #if CHIP_HAS_AUX_PERF_COUNTERS() | ||
28 | #define LINUX_MASKABLE_INTERRUPTS \ | 40 | #define LINUX_MASKABLE_INTERRUPTS \ |
29 | (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) | 41 | (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) |
30 | #else | 42 | #else |
@@ -32,6 +44,8 @@ | |||
32 | (~(INT_MASK(INT_PERF_COUNT))) | 44 | (~(INT_MASK(INT_PERF_COUNT))) |
33 | #endif | 45 | #endif |
34 | 46 | ||
47 | #endif | ||
48 | |||
35 | #ifndef __ASSEMBLY__ | 49 | #ifndef __ASSEMBLY__ |
36 | 50 | ||
37 | /* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ | 51 | /* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ |
@@ -224,11 +238,11 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); | |||
224 | #define IRQ_DISABLE(tmp0, tmp1) \ | 238 | #define IRQ_DISABLE(tmp0, tmp1) \ |
225 | { \ | 239 | { \ |
226 | movei tmp0, -1; \ | 240 | movei tmp0, -1; \ |
227 | moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS) \ | 241 | moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \ |
228 | }; \ | 242 | }; \ |
229 | { \ | 243 | { \ |
230 | mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ | 244 | mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ |
231 | auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS) \ | 245 | auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \ |
232 | }; \ | 246 | }; \ |
233 | mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 | 247 | mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 |
234 | 248 | ||
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 7979a45430d3..3eb53525bf9d 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h | |||
@@ -16,10 +16,11 @@ | |||
16 | #define _ASM_TILE_PAGE_H | 16 | #define _ASM_TILE_PAGE_H |
17 | 17 | ||
18 | #include <linux/const.h> | 18 | #include <linux/const.h> |
19 | #include <hv/pagesize.h> | ||
19 | 20 | ||
20 | /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ | 21 | /* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ |
21 | #define PAGE_SHIFT 16 | 22 | #define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL |
22 | #define HPAGE_SHIFT 24 | 23 | #define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE |
23 | 24 | ||
24 | #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) | 25 | #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) |
25 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) | 26 | #define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) |
@@ -29,25 +30,18 @@ | |||
29 | 30 | ||
30 | #ifdef __KERNEL__ | 31 | #ifdef __KERNEL__ |
31 | 32 | ||
32 | #include <hv/hypervisor.h> | ||
33 | #include <arch/chip.h> | ||
34 | |||
35 | /* | 33 | /* |
36 | * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx | 34 | * If the Kconfig doesn't specify, set a maximum zone order that |
37 | * definitions in <hv/hypervisor.h>. We validate this at build time | 35 | * is enough so that we can create huge pages from small pages given |
38 | * here, and again at runtime during early boot. We provide a | 36 | * the respective sizes of the two page types. See <linux/mmzone.h>. |
39 | * separate definition since userspace doesn't have <hv/hypervisor.h>. | ||
40 | * | ||
41 | * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since | ||
42 | * they are the same on i386 but not TILE. | ||
43 | */ | 37 | */ |
44 | #if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT | 38 | #ifndef CONFIG_FORCE_MAX_ZONEORDER |
45 | # error Small page size mismatch in Linux | 39 | #define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1) |
46 | #endif | ||
47 | #if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT | ||
48 | # error Huge page size mismatch in Linux | ||
49 | #endif | 40 | #endif |
50 | 41 | ||
42 | #include <hv/hypervisor.h> | ||
43 | #include <arch/chip.h> | ||
44 | |||
51 | #ifndef __ASSEMBLY__ | 45 | #ifndef __ASSEMBLY__ |
52 | 46 | ||
53 | #include <linux/types.h> | 47 | #include <linux/types.h> |
@@ -81,12 +75,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, | |||
81 | * Hypervisor page tables are made of the same basic structure. | 75 | * Hypervisor page tables are made of the same basic structure. |
82 | */ | 76 | */ |
83 | 77 | ||
84 | typedef __u64 pteval_t; | ||
85 | typedef __u64 pmdval_t; | ||
86 | typedef __u64 pudval_t; | ||
87 | typedef __u64 pgdval_t; | ||
88 | typedef __u64 pgprotval_t; | ||
89 | |||
90 | typedef HV_PTE pte_t; | 78 | typedef HV_PTE pte_t; |
91 | typedef HV_PTE pgd_t; | 79 | typedef HV_PTE pgd_t; |
92 | typedef HV_PTE pgprot_t; | 80 | typedef HV_PTE pgprot_t; |
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h index cf52791a5501..e919c0bdc22d 100644 --- a/arch/tile/include/asm/pgalloc.h +++ b/arch/tile/include/asm/pgalloc.h | |||
@@ -41,9 +41,9 @@ | |||
41 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) | 41 | static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) |
42 | { | 42 | { |
43 | #ifdef CONFIG_64BIT | 43 | #ifdef CONFIG_64BIT |
44 | set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER); | 44 | set_pte(pmdp, pmd); |
45 | #else | 45 | #else |
46 | set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER); | 46 | set_pte(&pmdp->pud.pgd, pmd.pud.pgd); |
47 | #endif | 47 | #endif |
48 | } | 48 | } |
49 | 49 | ||
@@ -100,6 +100,9 @@ pte_t *get_prealloc_pte(unsigned long pfn); | |||
100 | /* During init, we can shatter kernel huge pages if needed. */ | 100 | /* During init, we can shatter kernel huge pages if needed. */ |
101 | void shatter_pmd(pmd_t *pmd); | 101 | void shatter_pmd(pmd_t *pmd); |
102 | 102 | ||
103 | /* After init, a more complex technique is required. */ | ||
104 | void shatter_huge_page(unsigned long addr); | ||
105 | |||
103 | #ifdef __tilegx__ | 106 | #ifdef __tilegx__ |
104 | /* We share a single page allocator for both L1 and L2 page tables. */ | 107 | /* We share a single page allocator for both L1 and L2 page tables. */ |
105 | #if HV_L1_SIZE != HV_L2_SIZE | 108 | #if HV_L1_SIZE != HV_L2_SIZE |
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index a6604e9485da..1a20b7ef8ea2 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h | |||
@@ -233,15 +233,23 @@ static inline void __pte_clear(pte_t *ptep) | |||
233 | #define pgd_ERROR(e) \ | 233 | #define pgd_ERROR(e) \ |
234 | pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) | 234 | pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) |
235 | 235 | ||
236 | /* Return PA and protection info for a given kernel VA. */ | ||
237 | int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte); | ||
238 | |||
239 | /* | ||
240 | * __set_pte() ensures we write the 64-bit PTE with 32-bit words in | ||
241 | * the right order on 32-bit platforms and also allows us to write | ||
242 | * hooks to check valid PTEs, etc., if we want. | ||
243 | */ | ||
244 | void __set_pte(pte_t *ptep, pte_t pte); | ||
245 | |||
236 | /* | 246 | /* |
237 | * set_pte_order() sets the given PTE and also sanity-checks the | 247 | * set_pte() sets the given PTE and also sanity-checks the |
238 | * requested PTE against the page homecaching. Unspecified parts | 248 | * requested PTE against the page homecaching. Unspecified parts |
239 | * of the PTE are filled in when it is written to memory, i.e. all | 249 | * of the PTE are filled in when it is written to memory, i.e. all |
240 | * caching attributes if "!forcecache", or the home cpu if "anyhome". | 250 | * caching attributes if "!forcecache", or the home cpu if "anyhome". |
241 | */ | 251 | */ |
242 | extern void set_pte_order(pte_t *ptep, pte_t pte, int order); | 252 | extern void set_pte(pte_t *ptep, pte_t pte); |
243 | |||
244 | #define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0) | ||
245 | #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) | 253 | #define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) |
246 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) | 254 | #define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) |
247 | 255 | ||
@@ -293,21 +301,6 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next); | |||
293 | #define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) | 301 | #define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) |
294 | 302 | ||
295 | /* | 303 | /* |
296 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | ||
297 | * | ||
298 | * dst - pointer to pgd range anwhere on a pgd page | ||
299 | * src - "" | ||
300 | * count - the number of pgds to copy. | ||
301 | * | ||
302 | * dst and src can be on the same page, but the range must not overlap, | ||
303 | * and must not cross a page boundary. | ||
304 | */ | ||
305 | static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | ||
306 | { | ||
307 | memcpy(dst, src, count * sizeof(pgd_t)); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Conversion functions: convert a page and protection to a page entry, | 304 | * Conversion functions: convert a page and protection to a page entry, |
312 | * and a page entry and page directory to the page they refer to. | 305 | * and a page entry and page directory to the page they refer to. |
313 | */ | 306 | */ |
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index 53ec34884744..9f98529761fd 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h | |||
@@ -24,6 +24,7 @@ | |||
24 | #define PGDIR_SIZE HV_PAGE_SIZE_LARGE | 24 | #define PGDIR_SIZE HV_PAGE_SIZE_LARGE |
25 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | 25 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) |
26 | #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) | 26 | #define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) |
27 | #define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t)) | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * The level-2 index is defined by the difference between the huge | 30 | * The level-2 index is defined by the difference between the huge |
@@ -33,6 +34,7 @@ | |||
33 | * this nomenclature is somewhat confusing. | 34 | * this nomenclature is somewhat confusing. |
34 | */ | 35 | */ |
35 | #define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) | 36 | #define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) |
37 | #define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t)) | ||
36 | 38 | ||
37 | #ifndef __ASSEMBLY__ | 39 | #ifndef __ASSEMBLY__ |
38 | 40 | ||
@@ -94,7 +96,6 @@ static inline int pgd_addr_invalid(unsigned long addr) | |||
94 | */ | 96 | */ |
95 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG | 97 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
96 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT | 98 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
97 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR | ||
98 | 99 | ||
99 | extern int ptep_test_and_clear_young(struct vm_area_struct *, | 100 | extern int ptep_test_and_clear_young(struct vm_area_struct *, |
100 | unsigned long addr, pte_t *); | 101 | unsigned long addr, pte_t *); |
@@ -110,6 +111,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, | |||
110 | return pte; | 111 | return pte; |
111 | } | 112 | } |
112 | 113 | ||
114 | static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
115 | { | ||
116 | set_pte(&pmdp->pud.pgd, pmdval.pud.pgd); | ||
117 | } | ||
118 | |||
113 | /* Create a pmd from a PTFN. */ | 119 | /* Create a pmd from a PTFN. */ |
114 | static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) | 120 | static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) |
115 | { | 121 | { |
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index a9e7c8760334..e6889474038a 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h | |||
@@ -269,7 +269,6 @@ extern char chip_model[64]; | |||
269 | /* Data on which physical memory controller corresponds to which NUMA node. */ | 269 | /* Data on which physical memory controller corresponds to which NUMA node. */ |
270 | extern int node_controller[]; | 270 | extern int node_controller[]; |
271 | 271 | ||
272 | |||
273 | /* Do we dump information to the console when a user application crashes? */ | 272 | /* Do we dump information to the console when a user application crashes? */ |
274 | extern int show_crashinfo; | 273 | extern int show_crashinfo; |
275 | 274 | ||
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index ac6d343129d3..6be2246e015c 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h | |||
@@ -141,6 +141,9 @@ struct single_step_state { | |||
141 | /* Single-step the instruction at regs->pc */ | 141 | /* Single-step the instruction at regs->pc */ |
142 | extern void single_step_once(struct pt_regs *regs); | 142 | extern void single_step_once(struct pt_regs *regs); |
143 | 143 | ||
144 | /* Clean up after execve(). */ | ||
145 | extern void single_step_execve(void); | ||
146 | |||
144 | struct task_struct; | 147 | struct task_struct; |
145 | 148 | ||
146 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | 149 | extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, |
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h index 88efdde8dd2b..a8f2c6e31a87 100644 --- a/arch/tile/include/asm/spinlock_32.h +++ b/arch/tile/include/asm/spinlock_32.h | |||
@@ -78,13 +78,6 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock); | |||
78 | #define _RD_COUNT_SHIFT 24 | 78 | #define _RD_COUNT_SHIFT 24 |
79 | #define _RD_COUNT_WIDTH 8 | 79 | #define _RD_COUNT_WIDTH 8 |
80 | 80 | ||
81 | /* Internal functions; do not use. */ | ||
82 | void arch_read_lock_slow(arch_rwlock_t *, u32); | ||
83 | int arch_read_trylock_slow(arch_rwlock_t *); | ||
84 | void arch_read_unlock_slow(arch_rwlock_t *); | ||
85 | void arch_write_lock_slow(arch_rwlock_t *, u32); | ||
86 | void arch_write_unlock_slow(arch_rwlock_t *, u32); | ||
87 | |||
88 | /** | 81 | /** |
89 | * arch_read_can_lock() - would read_trylock() succeed? | 82 | * arch_read_can_lock() - would read_trylock() succeed? |
90 | */ | 83 | */ |
@@ -104,94 +97,32 @@ static inline int arch_write_can_lock(arch_rwlock_t *rwlock) | |||
104 | /** | 97 | /** |
105 | * arch_read_lock() - acquire a read lock. | 98 | * arch_read_lock() - acquire a read lock. |
106 | */ | 99 | */ |
107 | static inline void arch_read_lock(arch_rwlock_t *rwlock) | 100 | void arch_read_lock(arch_rwlock_t *rwlock); |
108 | { | ||
109 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
110 | if (unlikely(val << _RD_COUNT_WIDTH)) { | ||
111 | arch_read_lock_slow(rwlock, val); | ||
112 | return; | ||
113 | } | ||
114 | rwlock->lock = val + (1 << _RD_COUNT_SHIFT); | ||
115 | } | ||
116 | 101 | ||
117 | /** | 102 | /** |
118 | * arch_read_lock() - acquire a write lock. | 103 | * arch_write_lock() - acquire a write lock. |
119 | */ | 104 | */ |
120 | static inline void arch_write_lock(arch_rwlock_t *rwlock) | 105 | void arch_write_lock(arch_rwlock_t *rwlock); |
121 | { | ||
122 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
123 | if (unlikely(val != 0)) { | ||
124 | arch_write_lock_slow(rwlock, val); | ||
125 | return; | ||
126 | } | ||
127 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
128 | } | ||
129 | 106 | ||
130 | /** | 107 | /** |
131 | * arch_read_trylock() - try to acquire a read lock. | 108 | * arch_read_trylock() - try to acquire a read lock. |
132 | */ | 109 | */ |
133 | static inline int arch_read_trylock(arch_rwlock_t *rwlock) | 110 | int arch_read_trylock(arch_rwlock_t *rwlock); |
134 | { | ||
135 | int locked; | ||
136 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
137 | if (unlikely(val & 1)) | ||
138 | return arch_read_trylock_slow(rwlock); | ||
139 | locked = (val << _RD_COUNT_WIDTH) == 0; | ||
140 | rwlock->lock = val + (locked << _RD_COUNT_SHIFT); | ||
141 | return locked; | ||
142 | } | ||
143 | 111 | ||
144 | /** | 112 | /** |
145 | * arch_write_trylock() - try to acquire a write lock. | 113 | * arch_write_trylock() - try to acquire a write lock. |
146 | */ | 114 | */ |
147 | static inline int arch_write_trylock(arch_rwlock_t *rwlock) | 115 | int arch_write_trylock(arch_rwlock_t *rwlock); |
148 | { | ||
149 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
150 | |||
151 | /* | ||
152 | * If a tns is in progress, or there's a waiting or active locker, | ||
153 | * or active readers, we can't take the lock, so give up. | ||
154 | */ | ||
155 | if (unlikely(val != 0)) { | ||
156 | if (!(val & 1)) | ||
157 | rwlock->lock = val; | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | /* Set the "next" field to mark it locked. */ | ||
162 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
163 | return 1; | ||
164 | } | ||
165 | 116 | ||
166 | /** | 117 | /** |
167 | * arch_read_unlock() - release a read lock. | 118 | * arch_read_unlock() - release a read lock. |
168 | */ | 119 | */ |
169 | static inline void arch_read_unlock(arch_rwlock_t *rwlock) | 120 | void arch_read_unlock(arch_rwlock_t *rwlock); |
170 | { | ||
171 | u32 val; | ||
172 | mb(); /* guarantee anything modified under the lock is visible */ | ||
173 | val = __insn_tns((int *)&rwlock->lock); | ||
174 | if (unlikely(val & 1)) { | ||
175 | arch_read_unlock_slow(rwlock); | ||
176 | return; | ||
177 | } | ||
178 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); | ||
179 | } | ||
180 | 121 | ||
181 | /** | 122 | /** |
182 | * arch_write_unlock() - release a write lock. | 123 | * arch_write_unlock() - release a write lock. |
183 | */ | 124 | */ |
184 | static inline void arch_write_unlock(arch_rwlock_t *rwlock) | 125 | void arch_write_unlock(arch_rwlock_t *rwlock); |
185 | { | ||
186 | u32 val; | ||
187 | mb(); /* guarantee anything modified under the lock is visible */ | ||
188 | val = __insn_tns((int *)&rwlock->lock); | ||
189 | if (unlikely(val != (1 << _WR_NEXT_SHIFT))) { | ||
190 | arch_write_unlock_slow(rwlock, val); | ||
191 | return; | ||
192 | } | ||
193 | rwlock->lock = 0; | ||
194 | } | ||
195 | 126 | ||
196 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) | 127 | #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) |
197 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) | 128 | #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) |
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index f908473c322d..4d97a2db932e 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h | |||
@@ -18,13 +18,14 @@ | |||
18 | #include <linux/types.h> | 18 | #include <linux/types.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <asm/backtrace.h> | 20 | #include <asm/backtrace.h> |
21 | #include <asm/page.h> | ||
21 | #include <hv/hypervisor.h> | 22 | #include <hv/hypervisor.h> |
22 | 23 | ||
23 | /* Everything we need to keep track of a backtrace iteration */ | 24 | /* Everything we need to keep track of a backtrace iteration */ |
24 | struct KBacktraceIterator { | 25 | struct KBacktraceIterator { |
25 | BacktraceIterator it; | 26 | BacktraceIterator it; |
26 | struct task_struct *task; /* task we are backtracing */ | 27 | struct task_struct *task; /* task we are backtracing */ |
27 | HV_PTE *pgtable; /* page table for user space access */ | 28 | pte_t *pgtable; /* page table for user space access */ |
28 | int end; /* iteration complete. */ | 29 | int end; /* iteration complete. */ |
29 | int new_context; /* new context is starting */ | 30 | int new_context; /* new context is starting */ |
30 | int profile; /* profiling, so stop on async intrpt */ | 31 | int profile; /* profiling, so stop on async intrpt */ |
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h index 5388850deeb2..23d1842f4839 100644 --- a/arch/tile/include/asm/system.h +++ b/arch/tile/include/asm/system.h | |||
@@ -90,7 +90,24 @@ | |||
90 | #endif | 90 | #endif |
91 | 91 | ||
92 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | 92 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() |
93 | int __mb_incoherent(void); /* Helper routine for mb_incoherent(). */ | 93 | #include <hv/syscall_public.h> |
94 | /* | ||
95 | * Issue an uncacheable load to each memory controller, then | ||
96 | * wait until those loads have completed. | ||
97 | */ | ||
98 | static inline void __mb_incoherent(void) | ||
99 | { | ||
100 | long clobber_r10; | ||
101 | asm volatile("swint2" | ||
102 | : "=R10" (clobber_r10) | ||
103 | : "R10" (HV_SYS_fence_incoherent) | ||
104 | : "r0", "r1", "r2", "r3", "r4", | ||
105 | "r5", "r6", "r7", "r8", "r9", | ||
106 | "r11", "r12", "r13", "r14", | ||
107 | "r15", "r16", "r17", "r18", "r19", | ||
108 | "r20", "r21", "r22", "r23", "r24", | ||
109 | "r25", "r26", "r27", "r28", "r29"); | ||
110 | } | ||
94 | #endif | 111 | #endif |
95 | 112 | ||
96 | /* Fence to guarantee visibility of stores to incoherent memory. */ | 113 | /* Fence to guarantee visibility of stores to incoherent memory. */ |
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 3872f2b345d2..9e8e9c4dfa2a 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h | |||
@@ -68,6 +68,7 @@ struct thread_info { | |||
68 | #else | 68 | #else |
69 | #define THREAD_SIZE_ORDER (0) | 69 | #define THREAD_SIZE_ORDER (0) |
70 | #endif | 70 | #endif |
71 | #define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER) | ||
71 | 72 | ||
72 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) | 73 | #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) |
73 | #define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) | 74 | #define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) |
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h index 3baf5fc4c0a1..29921f0b86da 100644 --- a/arch/tile/include/asm/timex.h +++ b/arch/tile/include/asm/timex.h | |||
@@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void) | |||
38 | 38 | ||
39 | cycles_t get_clock_rate(void); | 39 | cycles_t get_clock_rate(void); |
40 | 40 | ||
41 | /* Convert nanoseconds to core clock cycles. */ | ||
42 | cycles_t ns2cycles(unsigned long nsecs); | ||
43 | |||
41 | /* Called at cpu initialization to set some low-level constants. */ | 44 | /* Called at cpu initialization to set some low-level constants. */ |
42 | void setup_clock(void); | 45 | void setup_clock(void); |
43 | 46 | ||
diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h new file mode 100644 index 000000000000..c6ef3bdc55cf --- /dev/null +++ b/arch/tile/include/hv/drv_mshim_intf.h | |||
@@ -0,0 +1,50 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | */ | ||
14 | |||
15 | /** | ||
16 | * @file drv_mshim_intf.h | ||
17 | * Interface definitions for the Linux EDAC memory controller driver. | ||
18 | */ | ||
19 | |||
20 | #ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H | ||
21 | #define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H | ||
22 | |||
23 | /** Number of memory controllers in the public API. */ | ||
24 | #define TILE_MAX_MSHIMS 4 | ||
25 | |||
26 | /** Memory info under each memory controller. */ | ||
27 | struct mshim_mem_info | ||
28 | { | ||
29 | uint64_t mem_size; /**< Total memory size in bytes. */ | ||
30 | uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */ | ||
31 | uint8_t mem_ecc; /**< Memory supports ECC. */ | ||
32 | }; | ||
33 | |||
34 | /** | ||
35 | * DIMM error structure. | ||
36 | * For now, only correctable errors are counted and the mshim doesn't record | ||
37 | * the error PA. HV takes panic upon uncorrectable errors. | ||
38 | */ | ||
39 | struct mshim_mem_error | ||
40 | { | ||
41 | uint32_t sbe_count; /**< Number of single-bit errors. */ | ||
42 | }; | ||
43 | |||
44 | /** Read this offset to get the memory info per mshim. */ | ||
45 | #define MSHIM_MEM_INFO_OFF 0x100 | ||
46 | |||
47 | /** Read this offset to check DIMM error. */ | ||
48 | #define MSHIM_MEM_ERROR_OFF 0x200 | ||
49 | |||
50 | #endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */ | ||
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index f672544cd4f9..1b8bf03d62a0 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h | |||
@@ -338,9 +338,10 @@ typedef int HV_Errno; | |||
338 | #define HV_ENOTREADY -812 /**< Device not ready */ | 338 | #define HV_ENOTREADY -812 /**< Device not ready */ |
339 | #define HV_EIO -813 /**< I/O error */ | 339 | #define HV_EIO -813 /**< I/O error */ |
340 | #define HV_ENOMEM -814 /**< Out of memory */ | 340 | #define HV_ENOMEM -814 /**< Out of memory */ |
341 | #define HV_EAGAIN -815 /**< Try again */ | ||
341 | 342 | ||
342 | #define HV_ERR_MAX -801 /**< Largest HV error code */ | 343 | #define HV_ERR_MAX -801 /**< Largest HV error code */ |
343 | #define HV_ERR_MIN -814 /**< Smallest HV error code */ | 344 | #define HV_ERR_MIN -815 /**< Smallest HV error code */ |
344 | 345 | ||
345 | #ifndef __ASSEMBLER__ | 346 | #ifndef __ASSEMBLER__ |
346 | 347 | ||
@@ -867,6 +868,43 @@ typedef struct | |||
867 | */ | 868 | */ |
868 | HV_PhysAddrRange hv_inquire_physical(int idx); | 869 | HV_PhysAddrRange hv_inquire_physical(int idx); |
869 | 870 | ||
871 | /** Possible DIMM types. */ | ||
872 | typedef enum | ||
873 | { | ||
874 | NO_DIMM = 0, /**< No DIMM */ | ||
875 | DDR2 = 1, /**< DDR2 */ | ||
876 | DDR3 = 2 /**< DDR3 */ | ||
877 | } HV_DIMM_Type; | ||
878 | |||
879 | #ifdef __tilegx__ | ||
880 | |||
881 | /** Log2 of minimum DIMM bytes supported by the memory controller. */ | ||
882 | #define HV_MSH_MIN_DIMM_SIZE_SHIFT 29 | ||
883 | |||
884 | /** Max number of DIMMs contained by one memory controller. */ | ||
885 | #define HV_MSH_MAX_DIMMS 8 | ||
886 | |||
887 | #else | ||
888 | |||
889 | /** Log2 of minimum DIMM bytes supported by the memory controller. */ | ||
890 | #define HV_MSH_MIN_DIMM_SIZE_SHIFT 26 | ||
891 | |||
892 | /** Max number of DIMMs contained by one memory controller. */ | ||
893 | #define HV_MSH_MAX_DIMMS 2 | ||
894 | |||
895 | #endif | ||
896 | |||
897 | /** Number of bits to right-shift to get the DIMM type. */ | ||
898 | #define HV_DIMM_TYPE_SHIFT 0 | ||
899 | |||
900 | /** Bits to mask to get the DIMM type. */ | ||
901 | #define HV_DIMM_TYPE_MASK 0xf | ||
902 | |||
903 | /** Number of bits to right-shift to get the DIMM size. */ | ||
904 | #define HV_DIMM_SIZE_SHIFT 4 | ||
905 | |||
906 | /** Bits to mask to get the DIMM size. */ | ||
907 | #define HV_DIMM_SIZE_MASK 0xf | ||
870 | 908 | ||
871 | /** Memory controller information. */ | 909 | /** Memory controller information. */ |
872 | typedef struct | 910 | typedef struct |
@@ -964,6 +1002,11 @@ HV_ASIDRange hv_inquire_asid(int idx); | |||
964 | 1002 | ||
965 | /** Waits for at least the specified number of nanoseconds then returns. | 1003 | /** Waits for at least the specified number of nanoseconds then returns. |
966 | * | 1004 | * |
1005 | * NOTE: this deprecated function currently assumes a 750 MHz clock, | ||
1006 | * and is thus not generally suitable for use. New code should call | ||
1007 | * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for, | ||
1008 | * and delay by looping while checking the cycle counter SPR. | ||
1009 | * | ||
967 | * @param nanosecs The number of nanoseconds to sleep. | 1010 | * @param nanosecs The number of nanoseconds to sleep. |
968 | */ | 1011 | */ |
969 | void hv_nanosleep(int nanosecs); | 1012 | void hv_nanosleep(int nanosecs); |
@@ -1038,6 +1081,7 @@ int hv_console_write(HV_VirtAddr bytes, int len); | |||
1038 | * downcall: | 1081 | * downcall: |
1039 | * | 1082 | * |
1040 | * INT_MESSAGE_RCV_DWNCL (hypervisor message available) | 1083 | * INT_MESSAGE_RCV_DWNCL (hypervisor message available) |
1084 | * INT_DEV_INTR_DWNCL (device interrupt) | ||
1041 | * INT_DMATLB_MISS_DWNCL (DMA TLB miss) | 1085 | * INT_DMATLB_MISS_DWNCL (DMA TLB miss) |
1042 | * INT_SNITLB_MISS_DWNCL (SNI TLB miss) | 1086 | * INT_SNITLB_MISS_DWNCL (SNI TLB miss) |
1043 | * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) | 1087 | * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) |
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index fd8dc42abdcb..431e9ae60488 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S | |||
@@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve) | |||
38 | jrp lr | 38 | jrp lr |
39 | STD_ENDPROC(kernel_execve) | 39 | STD_ENDPROC(kernel_execve) |
40 | 40 | ||
41 | /* Delay a fixed number of cycles. */ | ||
42 | STD_ENTRY(__delay) | ||
43 | { addi r0, r0, -1; bnzt r0, . } | ||
44 | jrp lr | ||
45 | STD_ENDPROC(__delay) | ||
46 | |||
47 | /* | 41 | /* |
48 | * We don't run this function directly, but instead copy it to a page | 42 | * We don't run this function directly, but instead copy it to a page |
49 | * we map into every user process. See vdso_setup(). | 43 | * we map into every user process. See vdso_setup(). |
@@ -97,23 +91,17 @@ STD_ENTRY(smp_nap) | |||
97 | 91 | ||
98 | /* | 92 | /* |
99 | * Enable interrupts racelessly and then nap until interrupted. | 93 | * Enable interrupts racelessly and then nap until interrupted. |
94 | * Architecturally, we are guaranteed that enabling interrupts via | ||
95 | * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC. | ||
100 | * This function's _cpu_idle_nap address is special; see intvec.S. | 96 | * This function's _cpu_idle_nap address is special; see intvec.S. |
101 | * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and | 97 | * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and |
102 | * as a result return to the function that called _cpu_idle(). | 98 | * as a result return to the function that called _cpu_idle(). |
103 | */ | 99 | */ |
104 | STD_ENTRY(_cpu_idle) | 100 | STD_ENTRY(_cpu_idle) |
105 | { | 101 | movei r1, 1 |
106 | lnk r0 | 102 | mtspr INTERRUPT_CRITICAL_SECTION, r1 |
107 | movei r1, KERNEL_PL | ||
108 | } | ||
109 | { | ||
110 | addli r0, r0, _cpu_idle_nap - . | ||
111 | mtspr INTERRUPT_CRITICAL_SECTION, r1 | ||
112 | } | ||
113 | IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ | 103 | IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ |
114 | mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ | 104 | mtspr INTERRUPT_CRITICAL_SECTION, zero |
115 | mtspr SPR_EX_CONTEXT_K_0, r0 | ||
116 | iret | ||
117 | .global _cpu_idle_nap | 105 | .global _cpu_idle_nap |
118 | _cpu_idle_nap: | 106 | _cpu_idle_nap: |
119 | nap | 107 | nap |
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 90e7c4435693..1a39b7c1c87e 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S | |||
@@ -133,7 +133,7 @@ ENTRY(_start) | |||
133 | } | 133 | } |
134 | ENDPROC(_start) | 134 | ENDPROC(_start) |
135 | 135 | ||
136 | .section ".bss.page_aligned","w" | 136 | __PAGE_ALIGNED_BSS |
137 | .align PAGE_SIZE | 137 | .align PAGE_SIZE |
138 | ENTRY(empty_zero_page) | 138 | ENTRY(empty_zero_page) |
139 | .fill PAGE_SIZE,1,0 | 139 | .fill PAGE_SIZE,1,0 |
@@ -145,10 +145,10 @@ ENTRY(empty_zero_page) | |||
145 | .endif | 145 | .endif |
146 | .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ | 146 | .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ |
147 | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) | 147 | (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) |
148 | .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) | 148 | .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32)) |
149 | .endm | 149 | .endm |
150 | 150 | ||
151 | .section ".data.page_aligned","wa" | 151 | __PAGE_ALIGNED_DATA |
152 | .align PAGE_SIZE | 152 | .align PAGE_SIZE |
153 | ENTRY(swapper_pg_dir) | 153 | ENTRY(swapper_pg_dir) |
154 | /* | 154 | /* |
@@ -158,12 +158,14 @@ ENTRY(swapper_pg_dir) | |||
158 | */ | 158 | */ |
159 | .set addr, 0 | 159 | .set addr, 0 |
160 | .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT | 160 | .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT |
161 | PTE addr + PAGE_OFFSET, addr, HV_PTE_READABLE | HV_PTE_WRITABLE | 161 | PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
162 | (1 << (HV_PTE_INDEX_WRITABLE - 32)) | ||
162 | .set addr, addr + PGDIR_SIZE | 163 | .set addr, addr + PGDIR_SIZE |
163 | .endr | 164 | .endr |
164 | 165 | ||
165 | /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ | 166 | /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ |
166 | PTE MEM_SV_INTRPT, 0, HV_PTE_READABLE | HV_PTE_EXECUTABLE | 167 | PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
168 | (1 << (HV_PTE_INDEX_EXECUTABLE - 32)) | ||
167 | .org swapper_pg_dir + HV_L1_SIZE | 169 | .org swapper_pg_dir + HV_L1_SIZE |
168 | END(swapper_pg_dir) | 170 | END(swapper_pg_dir) |
169 | 171 | ||
@@ -176,6 +178,7 @@ ENTRY(swapper_pg_dir) | |||
176 | __INITDATA | 178 | __INITDATA |
177 | .align CHIP_L2_LINE_SIZE() | 179 | .align CHIP_L2_LINE_SIZE() |
178 | ENTRY(swapper_pgprot) | 180 | ENTRY(swapper_pgprot) |
179 | PTE 0, 0, HV_PTE_READABLE | HV_PTE_WRITABLE, 1 | 181 | PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \ |
182 | (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1 | ||
180 | .align CHIP_L2_LINE_SIZE() | 183 | .align CHIP_L2_LINE_SIZE() |
181 | END(swapper_pgprot) | 184 | END(swapper_pgprot) |
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 5eed4a02bf62..fffcfa6b3a62 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S | |||
@@ -32,10 +32,6 @@ | |||
32 | # error "No support for kernel preemption currently" | 32 | # error "No support for kernel preemption currently" |
33 | #endif | 33 | #endif |
34 | 34 | ||
35 | #if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 | ||
36 | # error INT_INTCTRL_K coded to set high interrupt mask | ||
37 | #endif | ||
38 | |||
39 | #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) | 35 | #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) |
40 | 36 | ||
41 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) | 37 | #define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) |
@@ -1199,46 +1195,6 @@ STD_ENTRY(interrupt_return) | |||
1199 | STD_ENDPROC(interrupt_return) | 1195 | STD_ENDPROC(interrupt_return) |
1200 | 1196 | ||
1201 | /* | 1197 | /* |
1202 | * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit | ||
1203 | * before returning, so we can properly get more downcalls. | ||
1204 | */ | ||
1205 | .pushsection .text.handle_interrupt_downcall,"ax" | ||
1206 | handle_interrupt_downcall: | ||
1207 | finish_interrupt_save handle_interrupt_downcall | ||
1208 | check_single_stepping normal, .Ldispatch_downcall | ||
1209 | .Ldispatch_downcall: | ||
1210 | |||
1211 | /* Clear INTCTRL_K from the set of interrupts we ever enable. */ | ||
1212 | GET_INTERRUPTS_ENABLED_MASK_PTR(r30) | ||
1213 | { | ||
1214 | addi r30, r30, 4 | ||
1215 | movei r31, INT_MASK(INT_INTCTRL_K) | ||
1216 | } | ||
1217 | { | ||
1218 | lw r20, r30 | ||
1219 | nor r21, r31, zero | ||
1220 | } | ||
1221 | and r20, r20, r21 | ||
1222 | sw r30, r20 | ||
1223 | |||
1224 | { | ||
1225 | jalr r0 | ||
1226 | PTREGS_PTR(r0, PTREGS_OFFSET_BASE) | ||
1227 | } | ||
1228 | FEEDBACK_REENTER(handle_interrupt_downcall) | ||
1229 | |||
1230 | /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ | ||
1231 | lw r20, r30 | ||
1232 | or r20, r20, r31 | ||
1233 | sw r30, r20 | ||
1234 | |||
1235 | { | ||
1236 | movei r30, 0 /* not an NMI */ | ||
1237 | j interrupt_return | ||
1238 | } | ||
1239 | STD_ENDPROC(handle_interrupt_downcall) | ||
1240 | |||
1241 | /* | ||
1242 | * Some interrupts don't check for single stepping | 1198 | * Some interrupts don't check for single stepping |
1243 | */ | 1199 | */ |
1244 | .pushsection .text.handle_interrupt_no_single_step,"ax" | 1200 | .pushsection .text.handle_interrupt_no_single_step,"ax" |
@@ -1600,7 +1556,10 @@ STD_ENTRY(_sys_clone) | |||
1600 | .align 64 | 1556 | .align 64 |
1601 | /* Align much later jump on the start of a cache line. */ | 1557 | /* Align much later jump on the start of a cache line. */ |
1602 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() | 1558 | #if !ATOMIC_LOCKS_FOUND_VIA_TABLE() |
1603 | nop; nop | 1559 | nop |
1560 | #if PAGE_SIZE >= 0x10000 | ||
1561 | nop | ||
1562 | #endif | ||
1604 | #endif | 1563 | #endif |
1605 | ENTRY(sys_cmpxchg) | 1564 | ENTRY(sys_cmpxchg) |
1606 | 1565 | ||
@@ -1628,9 +1587,13 @@ ENTRY(sys_cmpxchg) | |||
1628 | * about aliasing among multiple mappings of the same physical page, | 1587 | * about aliasing among multiple mappings of the same physical page, |
1629 | * and we ignore the low 3 bits so we have one lock that covers | 1588 | * and we ignore the low 3 bits so we have one lock that covers |
1630 | * both a cmpxchg64() and a cmpxchg() on either its low or high word. | 1589 | * both a cmpxchg64() and a cmpxchg() on either its low or high word. |
1631 | * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. | 1590 | * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c. |
1632 | */ | 1591 | */ |
1633 | 1592 | ||
1593 | #if (PAGE_OFFSET & 0xffff) != 0 | ||
1594 | # error Code here assumes PAGE_OFFSET can be loaded with just hi16() | ||
1595 | #endif | ||
1596 | |||
1634 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 1597 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
1635 | { | 1598 | { |
1636 | /* Check for unaligned input. */ | 1599 | /* Check for unaligned input. */ |
@@ -1723,11 +1686,14 @@ ENTRY(sys_cmpxchg) | |||
1723 | lw r26, r0 | 1686 | lw r26, r0 |
1724 | } | 1687 | } |
1725 | { | 1688 | { |
1726 | /* atomic_locks is page aligned so this suffices to get its addr. */ | 1689 | auli r21, zero, ha16(atomic_locks) |
1727 | auli r21, zero, hi16(atomic_locks) | ||
1728 | 1690 | ||
1729 | bbns r23, .Lcmpxchg_badaddr | 1691 | bbns r23, .Lcmpxchg_badaddr |
1730 | } | 1692 | } |
1693 | #if PAGE_SIZE < 0x10000 | ||
1694 | /* atomic_locks is page-aligned so for big pages we don't need this. */ | ||
1695 | addli r21, r21, lo16(atomic_locks) | ||
1696 | #endif | ||
1731 | { | 1697 | { |
1732 | /* | 1698 | /* |
1733 | * Insert the hash bits into the page-aligned pointer. | 1699 | * Insert the hash bits into the page-aligned pointer. |
@@ -1762,7 +1728,7 @@ ENTRY(sys_cmpxchg) | |||
1762 | 1728 | ||
1763 | /* | 1729 | /* |
1764 | * Perform the actual cmpxchg or atomic_update. | 1730 | * Perform the actual cmpxchg or atomic_update. |
1765 | * Note that __futex_mark_unlocked() in uClibc relies on | 1731 | * Note that the system <arch/atomic.h> header relies on |
1766 | * atomic_update() to always perform an "mf", so don't make | 1732 | * atomic_update() to always perform an "mf", so don't make |
1767 | * it optional or conditional without modifying that code. | 1733 | * it optional or conditional without modifying that code. |
1768 | */ | 1734 | */ |
@@ -2014,17 +1980,17 @@ int_unalign: | |||
2014 | #endif | 1980 | #endif |
2015 | int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr | 1981 | int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr |
2016 | int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ | 1982 | int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ |
2017 | hv_message_intr, handle_interrupt_downcall | 1983 | hv_message_intr |
2018 | int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ | 1984 | int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ |
2019 | tile_dev_intr, handle_interrupt_downcall | 1985 | tile_dev_intr |
2020 | int_hand INT_I_ASID, I_ASID, bad_intr | 1986 | int_hand INT_I_ASID, I_ASID, bad_intr |
2021 | int_hand INT_D_ASID, D_ASID, bad_intr | 1987 | int_hand INT_D_ASID, D_ASID, bad_intr |
2022 | int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ | 1988 | int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ |
2023 | do_page_fault, handle_interrupt_downcall | 1989 | do_page_fault |
2024 | int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ | 1990 | int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ |
2025 | do_page_fault, handle_interrupt_downcall | 1991 | do_page_fault |
2026 | int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ | 1992 | int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ |
2027 | do_page_fault, handle_interrupt_downcall | 1993 | do_page_fault |
2028 | int_hand INT_SN_CPL, SN_CPL, bad_intr | 1994 | int_hand INT_SN_CPL, SN_CPL, bad_intr |
2029 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap | 1995 | int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap |
2030 | #if CHIP_HAS_AUX_PERF_COUNTERS() | 1996 | #if CHIP_HAS_AUX_PERF_COUNTERS() |
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 128805ef8f2c..0baa7580121f 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c | |||
@@ -176,43 +176,43 @@ void disable_percpu_irq(unsigned int irq) | |||
176 | EXPORT_SYMBOL(disable_percpu_irq); | 176 | EXPORT_SYMBOL(disable_percpu_irq); |
177 | 177 | ||
178 | /* Mask an interrupt. */ | 178 | /* Mask an interrupt. */ |
179 | static void tile_irq_chip_mask(unsigned int irq) | 179 | static void tile_irq_chip_mask(struct irq_data *d) |
180 | { | 180 | { |
181 | mask_irqs(1UL << irq); | 181 | mask_irqs(1UL << d->irq); |
182 | } | 182 | } |
183 | 183 | ||
184 | /* Unmask an interrupt. */ | 184 | /* Unmask an interrupt. */ |
185 | static void tile_irq_chip_unmask(unsigned int irq) | 185 | static void tile_irq_chip_unmask(struct irq_data *d) |
186 | { | 186 | { |
187 | unmask_irqs(1UL << irq); | 187 | unmask_irqs(1UL << d->irq); |
188 | } | 188 | } |
189 | 189 | ||
190 | /* | 190 | /* |
191 | * Clear an interrupt before processing it so that any new assertions | 191 | * Clear an interrupt before processing it so that any new assertions |
192 | * will trigger another irq. | 192 | * will trigger another irq. |
193 | */ | 193 | */ |
194 | static void tile_irq_chip_ack(unsigned int irq) | 194 | static void tile_irq_chip_ack(struct irq_data *d) |
195 | { | 195 | { |
196 | if ((unsigned long)get_irq_chip_data(irq) != IS_HW_CLEARED) | 196 | if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED) |
197 | clear_irqs(1UL << irq); | 197 | clear_irqs(1UL << d->irq); |
198 | } | 198 | } |
199 | 199 | ||
200 | /* | 200 | /* |
201 | * For per-cpu interrupts, we need to avoid unmasking any interrupts | 201 | * For per-cpu interrupts, we need to avoid unmasking any interrupts |
202 | * that we disabled via disable_percpu_irq(). | 202 | * that we disabled via disable_percpu_irq(). |
203 | */ | 203 | */ |
204 | static void tile_irq_chip_eoi(unsigned int irq) | 204 | static void tile_irq_chip_eoi(struct irq_data *d) |
205 | { | 205 | { |
206 | if (!(__get_cpu_var(irq_disable_mask) & (1UL << irq))) | 206 | if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq))) |
207 | unmask_irqs(1UL << irq); | 207 | unmask_irqs(1UL << d->irq); |
208 | } | 208 | } |
209 | 209 | ||
210 | static struct irq_chip tile_irq_chip = { | 210 | static struct irq_chip tile_irq_chip = { |
211 | .name = "tile_irq_chip", | 211 | .name = "tile_irq_chip", |
212 | .ack = tile_irq_chip_ack, | 212 | .irq_ack = tile_irq_chip_ack, |
213 | .eoi = tile_irq_chip_eoi, | 213 | .irq_eoi = tile_irq_chip_eoi, |
214 | .mask = tile_irq_chip_mask, | 214 | .irq_mask = tile_irq_chip_mask, |
215 | .unmask = tile_irq_chip_unmask, | 215 | .irq_unmask = tile_irq_chip_unmask, |
216 | }; | 216 | }; |
217 | 217 | ||
218 | void __init init_IRQ(void) | 218 | void __init init_IRQ(void) |
@@ -277,8 +277,10 @@ int show_interrupts(struct seq_file *p, void *v) | |||
277 | } | 277 | } |
278 | 278 | ||
279 | if (i < NR_IRQS) { | 279 | if (i < NR_IRQS) { |
280 | raw_spin_lock_irqsave(&irq_desc[i].lock, flags); | 280 | struct irq_desc *desc = irq_to_desc(i); |
281 | action = irq_desc[i].action; | 281 | |
282 | raw_spin_lock_irqsave(&desc->lock, flags); | ||
283 | action = desc->action; | ||
282 | if (!action) | 284 | if (!action) |
283 | goto skip; | 285 | goto skip; |
284 | seq_printf(p, "%3d: ", i); | 286 | seq_printf(p, "%3d: ", i); |
@@ -288,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
288 | for_each_online_cpu(j) | 290 | for_each_online_cpu(j) |
289 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); | 291 | seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); |
290 | #endif | 292 | #endif |
291 | seq_printf(p, " %14s", irq_desc[i].chip->name); | 293 | seq_printf(p, " %14s", get_irq_desc_chip(desc)->name); |
292 | seq_printf(p, " %s", action->name); | 294 | seq_printf(p, " %s", action->name); |
293 | 295 | ||
294 | for (action = action->next; action; action = action->next) | 296 | for (action = action->next; action; action = action->next) |
@@ -296,7 +298,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
296 | 298 | ||
297 | seq_putc(p, '\n'); | 299 | seq_putc(p, '\n'); |
298 | skip: | 300 | skip: |
299 | raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); | 301 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
300 | } | 302 | } |
301 | return 0; | 303 | return 0; |
302 | } | 304 | } |
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index 0d8b9e933487..e00d7179989e 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c | |||
@@ -240,8 +240,11 @@ static void setup_quasi_va_is_pa(void) | |||
240 | pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); | 240 | pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); |
241 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); | 241 | pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); |
242 | 242 | ||
243 | for (i = 0; i < pgd_index(PAGE_OFFSET); i++) | 243 | for (i = 0; i < pgd_index(PAGE_OFFSET); i++) { |
244 | pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte); | 244 | unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT); |
245 | if (pfn_valid(pfn)) | ||
246 | __set_pte(&pgtable[i], pfn_pte(pfn, pte)); | ||
247 | } | ||
245 | } | 248 | } |
246 | 249 | ||
247 | 250 | ||
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 5ad5e13b0fa6..658752b2835e 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c | |||
@@ -86,6 +86,21 @@ EXPORT_SYMBOL(dma_free_coherent); | |||
86 | * can count on nothing having been touched. | 86 | * can count on nothing having been touched. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | /* Flush a PA range from cache page by page. */ | ||
90 | static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size) | ||
91 | { | ||
92 | struct page *page = pfn_to_page(PFN_DOWN(dma_addr)); | ||
93 | size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1)); | ||
94 | |||
95 | while ((ssize_t)size > 0) { | ||
96 | /* Flush the page. */ | ||
97 | homecache_flush_cache(page++, 0); | ||
98 | |||
99 | /* Figure out if we need to continue on the next page. */ | ||
100 | size -= bytesleft; | ||
101 | bytesleft = PAGE_SIZE; | ||
102 | } | ||
103 | } | ||
89 | 104 | ||
90 | /* | 105 | /* |
91 | * dma_map_single can be passed any memory address, and there appear | 106 | * dma_map_single can be passed any memory address, and there appear |
@@ -97,26 +112,12 @@ EXPORT_SYMBOL(dma_free_coherent); | |||
97 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, | 112 | dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, |
98 | enum dma_data_direction direction) | 113 | enum dma_data_direction direction) |
99 | { | 114 | { |
100 | struct page *page; | 115 | dma_addr_t dma_addr = __pa(ptr); |
101 | dma_addr_t dma_addr; | ||
102 | int thispage; | ||
103 | 116 | ||
104 | BUG_ON(!valid_dma_direction(direction)); | 117 | BUG_ON(!valid_dma_direction(direction)); |
105 | WARN_ON(size == 0); | 118 | WARN_ON(size == 0); |
106 | 119 | ||
107 | dma_addr = __pa(ptr); | 120 | __dma_map_pa_range(dma_addr, size); |
108 | |||
109 | /* We might have been handed a buffer that wraps a page boundary */ | ||
110 | while ((int)size > 0) { | ||
111 | /* The amount to flush that's on this page */ | ||
112 | thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1)); | ||
113 | thispage = min((int)thispage, (int)size); | ||
114 | /* Is this valid for any page we could be handed? */ | ||
115 | page = pfn_to_page(kaddr_to_pfn(ptr)); | ||
116 | homecache_flush_cache(page, 0); | ||
117 | ptr += thispage; | ||
118 | size -= thispage; | ||
119 | } | ||
120 | 121 | ||
121 | return dma_addr; | 122 | return dma_addr; |
122 | } | 123 | } |
@@ -140,10 +141,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, | |||
140 | WARN_ON(nents == 0 || sglist->length == 0); | 141 | WARN_ON(nents == 0 || sglist->length == 0); |
141 | 142 | ||
142 | for_each_sg(sglist, sg, nents, i) { | 143 | for_each_sg(sglist, sg, nents, i) { |
143 | struct page *page; | ||
144 | sg->dma_address = sg_phys(sg); | 144 | sg->dma_address = sg_phys(sg); |
145 | page = pfn_to_page(sg->dma_address >> PAGE_SHIFT); | 145 | __dma_map_pa_range(sg->dma_address, sg->length); |
146 | homecache_flush_cache(page, 0); | ||
147 | } | 146 | } |
148 | 147 | ||
149 | return nents; | 148 | return nents; |
@@ -163,6 +162,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, | |||
163 | { | 162 | { |
164 | BUG_ON(!valid_dma_direction(direction)); | 163 | BUG_ON(!valid_dma_direction(direction)); |
165 | 164 | ||
165 | BUG_ON(offset + size > PAGE_SIZE); | ||
166 | homecache_flush_cache(page, 0); | 166 | homecache_flush_cache(page, 0); |
167 | 167 | ||
168 | return page_to_pa(page) + offset; | 168 | return page_to_pa(page) + offset; |
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index e90eb53173b0..b9cd962e1d30 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c | |||
@@ -165,7 +165,7 @@ void free_thread_info(struct thread_info *info) | |||
165 | kfree(step_state); | 165 | kfree(step_state); |
166 | } | 166 | } |
167 | 167 | ||
168 | free_page((unsigned long)info); | 168 | free_pages((unsigned long)info, THREAD_SIZE_ORDER); |
169 | } | 169 | } |
170 | 170 | ||
171 | static void save_arch_state(struct thread_struct *t); | 171 | static void save_arch_state(struct thread_struct *t); |
@@ -574,6 +574,8 @@ SYSCALL_DEFINE4(execve, const char __user *, path, | |||
574 | goto out; | 574 | goto out; |
575 | error = do_execve(filename, argv, envp, regs); | 575 | error = do_execve(filename, argv, envp, regs); |
576 | putname(filename); | 576 | putname(filename); |
577 | if (error == 0) | ||
578 | single_step_execve(); | ||
577 | out: | 579 | out: |
578 | return error; | 580 | return error; |
579 | } | 581 | } |
@@ -593,6 +595,8 @@ long compat_sys_execve(const char __user *path, | |||
593 | goto out; | 595 | goto out; |
594 | error = compat_do_execve(filename, argv, envp, regs); | 596 | error = compat_do_execve(filename, argv, envp, regs); |
595 | putname(filename); | 597 | putname(filename); |
598 | if (error == 0) | ||
599 | single_step_execve(); | ||
596 | out: | 600 | out: |
597 | return error; | 601 | return error; |
598 | } | 602 | } |
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index f18573643ed1..3696b1832566 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c | |||
@@ -59,6 +59,8 @@ unsigned long __initdata node_memmap_pfn[MAX_NUMNODES]; | |||
59 | unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; | 59 | unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; |
60 | unsigned long __initdata node_free_pfn[MAX_NUMNODES]; | 60 | unsigned long __initdata node_free_pfn[MAX_NUMNODES]; |
61 | 61 | ||
62 | static unsigned long __initdata node_percpu[MAX_NUMNODES]; | ||
63 | |||
62 | #ifdef CONFIG_HIGHMEM | 64 | #ifdef CONFIG_HIGHMEM |
63 | /* Page frame index of end of lowmem on each controller. */ | 65 | /* Page frame index of end of lowmem on each controller. */ |
64 | unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; | 66 | unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; |
@@ -554,7 +556,6 @@ static void __init setup_bootmem_allocator(void) | |||
554 | reserve_bootmem(crashk_res.start, | 556 | reserve_bootmem(crashk_res.start, |
555 | crashk_res.end - crashk_res.start + 1, 0); | 557 | crashk_res.end - crashk_res.start + 1, 0); |
556 | #endif | 558 | #endif |
557 | |||
558 | } | 559 | } |
559 | 560 | ||
560 | void *__init alloc_remap(int nid, unsigned long size) | 561 | void *__init alloc_remap(int nid, unsigned long size) |
@@ -568,11 +569,13 @@ void *__init alloc_remap(int nid, unsigned long size) | |||
568 | 569 | ||
569 | static int __init percpu_size(void) | 570 | static int __init percpu_size(void) |
570 | { | 571 | { |
571 | int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); | 572 | int size = __per_cpu_end - __per_cpu_start; |
572 | #ifdef CONFIG_MODULES | 573 | size += PERCPU_MODULE_RESERVE; |
573 | if (size < PERCPU_ENOUGH_ROOM) | 574 | size += PERCPU_DYNAMIC_EARLY_SIZE; |
574 | size = PERCPU_ENOUGH_ROOM; | 575 | if (size < PCPU_MIN_UNIT_SIZE) |
575 | #endif | 576 | size = PCPU_MIN_UNIT_SIZE; |
577 | size = roundup(size, PAGE_SIZE); | ||
578 | |||
576 | /* In several places we assume the per-cpu data fits on a huge page. */ | 579 | /* In several places we assume the per-cpu data fits on a huge page. */ |
577 | BUG_ON(kdata_huge && size > HPAGE_SIZE); | 580 | BUG_ON(kdata_huge && size > HPAGE_SIZE); |
578 | return size; | 581 | return size; |
@@ -589,7 +592,6 @@ static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) | |||
589 | static void __init zone_sizes_init(void) | 592 | static void __init zone_sizes_init(void) |
590 | { | 593 | { |
591 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; | 594 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; |
592 | unsigned long node_percpu[MAX_NUMNODES] = { 0 }; | ||
593 | int size = percpu_size(); | 595 | int size = percpu_size(); |
594 | int num_cpus = smp_height * smp_width; | 596 | int num_cpus = smp_height * smp_width; |
595 | int i; | 597 | int i; |
@@ -674,7 +676,7 @@ static void __init zone_sizes_init(void) | |||
674 | NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; | 676 | NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; |
675 | 677 | ||
676 | free_area_init_node(i, zones_size, start, NULL); | 678 | free_area_init_node(i, zones_size, start, NULL); |
677 | printk(KERN_DEBUG " DMA zone: %ld per-cpu pages\n", | 679 | printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", |
678 | PFN_UP(node_percpu[i])); | 680 | PFN_UP(node_percpu[i])); |
679 | 681 | ||
680 | /* Track the type of memory on each node */ | 682 | /* Track the type of memory on each node */ |
@@ -1312,6 +1314,8 @@ static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
1312 | 1314 | ||
1313 | BUG_ON(size % PAGE_SIZE != 0); | 1315 | BUG_ON(size % PAGE_SIZE != 0); |
1314 | pfn_offset[nid] += size / PAGE_SIZE; | 1316 | pfn_offset[nid] += size / PAGE_SIZE; |
1317 | BUG_ON(node_percpu[nid] < size); | ||
1318 | node_percpu[nid] -= size; | ||
1315 | if (percpu_pfn[cpu] == 0) | 1319 | if (percpu_pfn[cpu] == 0) |
1316 | percpu_pfn[cpu] = pfn; | 1320 | percpu_pfn[cpu] = pfn; |
1317 | return pfn_to_kaddr(pfn); | 1321 | return pfn_to_kaddr(pfn); |
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 1eb3b39e36c7..84a729e06ec4 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c | |||
@@ -56,7 +56,7 @@ enum mem_op { | |||
56 | MEMOP_STORE_POSTINCR | 56 | MEMOP_STORE_POSTINCR |
57 | }; | 57 | }; |
58 | 58 | ||
59 | static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) | 59 | static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) |
60 | { | 60 | { |
61 | tile_bundle_bits result; | 61 | tile_bundle_bits result; |
62 | 62 | ||
@@ -254,6 +254,18 @@ P("\n"); | |||
254 | return bundle; | 254 | return bundle; |
255 | } | 255 | } |
256 | 256 | ||
257 | /* | ||
258 | * Called after execve() has started the new image. This allows us | ||
259 | * to reset the info state. Note that the the mmap'ed memory, if there | ||
260 | * was any, has already been unmapped by the exec. | ||
261 | */ | ||
262 | void single_step_execve(void) | ||
263 | { | ||
264 | struct thread_info *ti = current_thread_info(); | ||
265 | kfree(ti->step_state); | ||
266 | ti->step_state = NULL; | ||
267 | } | ||
268 | |||
257 | /** | 269 | /** |
258 | * single_step_once() - entry point when single stepping has been triggered. | 270 | * single_step_once() - entry point when single stepping has been triggered. |
259 | * @regs: The machine register state | 271 | * @regs: The machine register state |
@@ -373,7 +385,7 @@ void single_step_once(struct pt_regs *regs) | |||
373 | /* branches */ | 385 | /* branches */ |
374 | case BRANCH_OPCODE_X1: | 386 | case BRANCH_OPCODE_X1: |
375 | { | 387 | { |
376 | int32_t offset = signExtend17(get_BrOff_X1(bundle)); | 388 | s32 offset = signExtend17(get_BrOff_X1(bundle)); |
377 | 389 | ||
378 | /* | 390 | /* |
379 | * For branches, we use a rewriting trick to let the | 391 | * For branches, we use a rewriting trick to let the |
@@ -731,4 +743,9 @@ void single_step_once(struct pt_regs *regs) | |||
731 | __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); | 743 | __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); |
732 | } | 744 | } |
733 | 745 | ||
746 | void single_step_execve(void) | ||
747 | { | ||
748 | /* Nothing */ | ||
749 | } | ||
750 | |||
734 | #endif /* !__tilegx__ */ | 751 | #endif /* !__tilegx__ */ |
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 9575b37a8b75..a4293102ef81 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c | |||
@@ -36,6 +36,22 @@ static unsigned long __iomem *ipi_mappings[NR_CPUS]; | |||
36 | /* Set by smp_send_stop() to avoid recursive panics. */ | 36 | /* Set by smp_send_stop() to avoid recursive panics. */ |
37 | static int stopping_cpus; | 37 | static int stopping_cpus; |
38 | 38 | ||
39 | static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag) | ||
40 | { | ||
41 | int sent = 0; | ||
42 | while (sent < nrecip) { | ||
43 | int rc = hv_send_message(recip, nrecip, | ||
44 | (HV_VirtAddr)&tag, sizeof(tag)); | ||
45 | if (rc < 0) { | ||
46 | if (!stopping_cpus) /* avoid recursive panic */ | ||
47 | panic("hv_send_message returned %d", rc); | ||
48 | break; | ||
49 | } | ||
50 | WARN_ONCE(rc == 0, "hv_send_message() returned zero\n"); | ||
51 | sent += rc; | ||
52 | } | ||
53 | } | ||
54 | |||
39 | void send_IPI_single(int cpu, int tag) | 55 | void send_IPI_single(int cpu, int tag) |
40 | { | 56 | { |
41 | HV_Recipient recip = { | 57 | HV_Recipient recip = { |
@@ -43,14 +59,13 @@ void send_IPI_single(int cpu, int tag) | |||
43 | .x = cpu % smp_width, | 59 | .x = cpu % smp_width, |
44 | .state = HV_TO_BE_SENT | 60 | .state = HV_TO_BE_SENT |
45 | }; | 61 | }; |
46 | int rc = hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)); | 62 | __send_IPI_many(&recip, 1, tag); |
47 | BUG_ON(rc <= 0); | ||
48 | } | 63 | } |
49 | 64 | ||
50 | void send_IPI_many(const struct cpumask *mask, int tag) | 65 | void send_IPI_many(const struct cpumask *mask, int tag) |
51 | { | 66 | { |
52 | HV_Recipient recip[NR_CPUS]; | 67 | HV_Recipient recip[NR_CPUS]; |
53 | int cpu, sent; | 68 | int cpu; |
54 | int nrecip = 0; | 69 | int nrecip = 0; |
55 | int my_cpu = smp_processor_id(); | 70 | int my_cpu = smp_processor_id(); |
56 | for_each_cpu(cpu, mask) { | 71 | for_each_cpu(cpu, mask) { |
@@ -61,17 +76,7 @@ void send_IPI_many(const struct cpumask *mask, int tag) | |||
61 | r->x = cpu % smp_width; | 76 | r->x = cpu % smp_width; |
62 | r->state = HV_TO_BE_SENT; | 77 | r->state = HV_TO_BE_SENT; |
63 | } | 78 | } |
64 | sent = 0; | 79 | __send_IPI_many(recip, nrecip, tag); |
65 | while (sent < nrecip) { | ||
66 | int rc = hv_send_message(recip, nrecip, | ||
67 | (HV_VirtAddr)&tag, sizeof(tag)); | ||
68 | if (rc <= 0) { | ||
69 | if (!stopping_cpus) /* avoid recursive panic */ | ||
70 | panic("hv_send_message returned %d", rc); | ||
71 | break; | ||
72 | } | ||
73 | sent += rc; | ||
74 | } | ||
75 | } | 80 | } |
76 | 81 | ||
77 | void send_IPI_allbutself(int tag) | 82 | void send_IPI_allbutself(int tag) |
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 0d54106be3d6..dd81713a90dc 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c | |||
@@ -44,13 +44,6 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) | |||
44 | return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; | 44 | return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; |
45 | } | 45 | } |
46 | 46 | ||
47 | /* Is address in the specified kernel code? */ | ||
48 | static int in_kernel_text(VirtualAddress address) | ||
49 | { | ||
50 | return (address >= MEM_SV_INTRPT && | ||
51 | address < MEM_SV_INTRPT + HPAGE_SIZE); | ||
52 | } | ||
53 | |||
54 | /* Is address valid for reading? */ | 47 | /* Is address valid for reading? */ |
55 | static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) | 48 | static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) |
56 | { | 49 | { |
@@ -63,6 +56,23 @@ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) | |||
63 | if (l1_pgtable == NULL) | 56 | if (l1_pgtable == NULL) |
64 | return 0; /* can't read user space in other tasks */ | 57 | return 0; /* can't read user space in other tasks */ |
65 | 58 | ||
59 | #ifdef CONFIG_64BIT | ||
60 | /* Find the real l1_pgtable by looking in the l0_pgtable. */ | ||
61 | pte = l1_pgtable[HV_L0_INDEX(address)]; | ||
62 | if (!hv_pte_get_present(pte)) | ||
63 | return 0; | ||
64 | pfn = hv_pte_get_pfn(pte); | ||
65 | if (pte_huge(pte)) { | ||
66 | if (!pfn_valid(pfn)) { | ||
67 | pr_err("L0 huge page has bad pfn %#lx\n", pfn); | ||
68 | return 0; | ||
69 | } | ||
70 | return hv_pte_get_present(pte) && hv_pte_get_readable(pte); | ||
71 | } | ||
72 | page = pfn_to_page(pfn); | ||
73 | BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */ | ||
74 | l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn); | ||
75 | #endif | ||
66 | pte = l1_pgtable[HV_L1_INDEX(address)]; | 76 | pte = l1_pgtable[HV_L1_INDEX(address)]; |
67 | if (!hv_pte_get_present(pte)) | 77 | if (!hv_pte_get_present(pte)) |
68 | return 0; | 78 | return 0; |
@@ -92,7 +102,7 @@ static bool read_memory_func(void *result, VirtualAddress address, | |||
92 | { | 102 | { |
93 | int retval; | 103 | int retval; |
94 | struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; | 104 | struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; |
95 | if (in_kernel_text(address)) { | 105 | if (__kernel_text_address(address)) { |
96 | /* OK to read kernel code. */ | 106 | /* OK to read kernel code. */ |
97 | } else if (address >= PAGE_OFFSET) { | 107 | } else if (address >= PAGE_OFFSET) { |
98 | /* We only tolerate kernel-space reads of this task's stack */ | 108 | /* We only tolerate kernel-space reads of this task's stack */ |
@@ -132,7 +142,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt) | |||
132 | } | 142 | } |
133 | } | 143 | } |
134 | if (EX1_PL(p->ex1) == KERNEL_PL && | 144 | if (EX1_PL(p->ex1) == KERNEL_PL && |
135 | in_kernel_text(p->pc) && | 145 | __kernel_text_address(p->pc) && |
136 | in_kernel_stack(kbt, p->sp) && | 146 | in_kernel_stack(kbt, p->sp) && |
137 | p->sp >= sp) { | 147 | p->sp >= sp) { |
138 | if (kbt->verbose) | 148 | if (kbt->verbose) |
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index f2e156e44692..49a605be94c5 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c | |||
@@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier) | |||
224 | { | 224 | { |
225 | return -EINVAL; | 225 | return -EINVAL; |
226 | } | 226 | } |
227 | |||
228 | /* | ||
229 | * Use the tile timer to convert nsecs to core clock cycles, relying | ||
230 | * on it having the same frequency as SPR_CYCLE. | ||
231 | */ | ||
232 | cycles_t ns2cycles(unsigned long nsecs) | ||
233 | { | ||
234 | struct clock_event_device *dev = &__get_cpu_var(tile_timer); | ||
235 | return ((u64)nsecs * dev->mult) >> dev->shift; | ||
236 | } | ||
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index c6ce378e0678..38f64fafdc10 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S | |||
@@ -59,10 +59,7 @@ SECTIONS | |||
59 | 59 | ||
60 | . = ALIGN(PAGE_SIZE); | 60 | . = ALIGN(PAGE_SIZE); |
61 | VMLINUX_SYMBOL(_sinitdata) = .; | 61 | VMLINUX_SYMBOL(_sinitdata) = .; |
62 | .init.page : AT (ADDR(.init.page) - LOAD_OFFSET) { | 62 | INIT_DATA_SECTION(16) :data =0 |
63 | *(.init.page) | ||
64 | } :data =0 | ||
65 | INIT_DATA_SECTION(16) | ||
66 | PERCPU(L2_CACHE_BYTES, PAGE_SIZE) | 63 | PERCPU(L2_CACHE_BYTES, PAGE_SIZE) |
67 | . = ALIGN(PAGE_SIZE); | 64 | . = ALIGN(PAGE_SIZE); |
68 | VMLINUX_SYMBOL(_einitdata) = .; | 65 | VMLINUX_SYMBOL(_einitdata) = .; |
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 93122d5b1558..0c26086ecbef 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile | |||
@@ -2,9 +2,8 @@ | |||
2 | # Makefile for TILE-specific library files.. | 2 | # Makefile for TILE-specific library files.. |
3 | # | 3 | # |
4 | 4 | ||
5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o \ | 5 | lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \ |
6 | mb_incoherent.o uaccess.o memmove.o \ | 6 | memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ |
7 | memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ | ||
8 | strchr_$(BITS).o strlen_$(BITS).o | 7 | strchr_$(BITS).o strlen_$(BITS).o |
9 | 8 | ||
10 | ifeq ($(CONFIG_TILEGX),y) | 9 | ifeq ($(CONFIG_TILEGX),y) |
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 7a5cc706ab62..f02040d3614e 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c | |||
@@ -46,14 +46,13 @@ struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE] | |||
46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 46 | #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
47 | 47 | ||
48 | /* This page is remapped on startup to be hash-for-home. */ | 48 | /* This page is remapped on startup to be hash-for-home. */ |
49 | int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */] | 49 | int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; |
50 | __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned"))); | ||
51 | 50 | ||
52 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ | 51 | #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ |
53 | 52 | ||
54 | static inline int *__atomic_hashed_lock(volatile void *v) | 53 | static inline int *__atomic_hashed_lock(volatile void *v) |
55 | { | 54 | { |
56 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */ | 55 | /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ |
57 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() | 56 | #if ATOMIC_LOCKS_FOUND_VIA_TABLE() |
58 | unsigned long i = | 57 | unsigned long i = |
59 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); | 58 | (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); |
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S index 5a5514b77e78..82f64cc63658 100644 --- a/arch/tile/lib/atomic_asm_32.S +++ b/arch/tile/lib/atomic_asm_32.S | |||
@@ -14,7 +14,7 @@ | |||
14 | * Support routines for atomic operations. Each function takes: | 14 | * Support routines for atomic operations. Each function takes: |
15 | * | 15 | * |
16 | * r0: address to manipulate | 16 | * r0: address to manipulate |
17 | * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) | 17 | * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG) |
18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against | 18 | * r2: new value to write, or for cmpxchg/add_unless, value to compare against |
19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; | 19 | * r3: (cmpxchg/xchg_add_unless) new value to write or add; |
20 | * (atomic64 ops) high word of value to write | 20 | * (atomic64 ops) high word of value to write |
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c index 11b6164c2097..35c1d8ca5f38 100644 --- a/arch/tile/lib/cacheflush.c +++ b/arch/tile/lib/cacheflush.c | |||
@@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end) | |||
21 | { | 21 | { |
22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); | 22 | invalidate_icache((const void *)start, end - start, PAGE_SIZE); |
23 | } | 23 | } |
24 | |||
25 | |||
26 | /* Force a load instruction to issue. */ | ||
27 | static inline void force_load(char *p) | ||
28 | { | ||
29 | *(volatile char *)p; | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * Flush and invalidate a VA range that is homed remotely on a single | ||
34 | * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting | ||
35 | * until the memory controller holds the flushed values. | ||
36 | */ | ||
37 | void finv_buffer_remote(void *buffer, size_t size, int hfh) | ||
38 | { | ||
39 | char *p, *base; | ||
40 | size_t step_size, load_count; | ||
41 | const unsigned long STRIPE_WIDTH = 8192; | ||
42 | |||
43 | /* | ||
44 | * Flush and invalidate the buffer out of the local L1/L2 | ||
45 | * and request the home cache to flush and invalidate as well. | ||
46 | */ | ||
47 | __finv_buffer(buffer, size); | ||
48 | |||
49 | /* | ||
50 | * Wait for the home cache to acknowledge that it has processed | ||
51 | * all the flush-and-invalidate requests. This does not mean | ||
52 | * that the flushed data has reached the memory controller yet, | ||
53 | * but it does mean the home cache is processing the flushes. | ||
54 | */ | ||
55 | __insn_mf(); | ||
56 | |||
57 | /* | ||
58 | * Issue a load to the last cache line, which can't complete | ||
59 | * until all the previously-issued flushes to the same memory | ||
60 | * controller have also completed. If we weren't striping | ||
61 | * memory, that one load would be sufficient, but since we may | ||
62 | * be, we also need to back up to the last load issued to | ||
63 | * another memory controller, which would be the point where | ||
64 | * we crossed an 8KB boundary (the granularity of striping | ||
65 | * across memory controllers). Keep backing up and doing this | ||
66 | * until we are before the beginning of the buffer, or have | ||
67 | * hit all the controllers. | ||
68 | * | ||
69 | * If we are flushing a hash-for-home buffer, it's even worse. | ||
70 | * Each line may be homed on a different tile, and each tile | ||
71 | * may have up to four lines that are on different | ||
72 | * controllers. So as we walk backwards, we have to touch | ||
73 | * enough cache lines to satisfy these constraints. In | ||
74 | * practice this ends up being close enough to "load from | ||
75 | * every cache line on a full memory stripe on each | ||
76 | * controller" that we simply do that, to simplify the logic. | ||
77 | * | ||
78 | * FIXME: See bug 9535 for some issues with this code. | ||
79 | */ | ||
80 | if (hfh) { | ||
81 | step_size = L2_CACHE_BYTES; | ||
82 | load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) * | ||
83 | (1 << CHIP_LOG_NUM_MSHIMS()); | ||
84 | } else { | ||
85 | step_size = STRIPE_WIDTH; | ||
86 | load_count = (1 << CHIP_LOG_NUM_MSHIMS()); | ||
87 | } | ||
88 | |||
89 | /* Load the last byte of the buffer. */ | ||
90 | p = (char *)buffer + size - 1; | ||
91 | force_load(p); | ||
92 | |||
93 | /* Bump down to the end of the previous stripe or cache line. */ | ||
94 | p -= step_size; | ||
95 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
96 | |||
97 | /* Figure out how far back we need to go. */ | ||
98 | base = p - (step_size * (load_count - 2)); | ||
99 | if ((long)base < (long)buffer) | ||
100 | base = buffer; | ||
101 | |||
102 | /* | ||
103 | * Fire all the loads we need. The MAF only has eight entries | ||
104 | * so we can have at most eight outstanding loads, so we | ||
105 | * unroll by that amount. | ||
106 | */ | ||
107 | #pragma unroll 8 | ||
108 | for (; p >= base; p -= step_size) | ||
109 | force_load(p); | ||
110 | |||
111 | /* | ||
112 | * Repeat, but with inv's instead of loads, to get rid of the | ||
113 | * data we just loaded into our own cache and the old home L3. | ||
114 | * No need to unroll since inv's don't target a register. | ||
115 | */ | ||
116 | p = (char *)buffer + size - 1; | ||
117 | __insn_inv(p); | ||
118 | p -= step_size; | ||
119 | p = (char *)((unsigned long)p | (step_size - 1)); | ||
120 | for (; p >= base; p -= step_size) | ||
121 | __insn_inv(p); | ||
122 | |||
123 | /* Wait for the load+inv's (and thus finvs) to have completed. */ | ||
124 | __insn_mf(); | ||
125 | } | ||
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c index 5801b03c13ef..cdacdd11d360 100644 --- a/arch/tile/lib/delay.c +++ b/arch/tile/lib/delay.c | |||
@@ -15,20 +15,31 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/delay.h> | 16 | #include <linux/delay.h> |
17 | #include <linux/thread_info.h> | 17 | #include <linux/thread_info.h> |
18 | #include <asm/fixmap.h> | 18 | #include <asm/timex.h> |
19 | #include <hv/hypervisor.h> | ||
20 | 19 | ||
21 | void __udelay(unsigned long usecs) | 20 | void __udelay(unsigned long usecs) |
22 | { | 21 | { |
23 | hv_nanosleep(usecs * 1000); | 22 | if (usecs > ULONG_MAX / 1000) { |
23 | WARN_ON_ONCE(usecs > ULONG_MAX / 1000); | ||
24 | usecs = ULONG_MAX / 1000; | ||
25 | } | ||
26 | __ndelay(usecs * 1000); | ||
24 | } | 27 | } |
25 | EXPORT_SYMBOL(__udelay); | 28 | EXPORT_SYMBOL(__udelay); |
26 | 29 | ||
27 | void __ndelay(unsigned long nsecs) | 30 | void __ndelay(unsigned long nsecs) |
28 | { | 31 | { |
29 | hv_nanosleep(nsecs); | 32 | cycles_t target = get_cycles(); |
33 | target += ns2cycles(nsecs); | ||
34 | while (get_cycles() < target) | ||
35 | cpu_relax(); | ||
30 | } | 36 | } |
31 | EXPORT_SYMBOL(__ndelay); | 37 | EXPORT_SYMBOL(__ndelay); |
32 | 38 | ||
33 | /* FIXME: should be declared in a header somewhere. */ | 39 | void __delay(unsigned long cycles) |
40 | { | ||
41 | cycles_t target = get_cycles() + cycles; | ||
42 | while (get_cycles() < target) | ||
43 | cpu_relax(); | ||
44 | } | ||
34 | EXPORT_SYMBOL(__delay); | 45 | EXPORT_SYMBOL(__delay); |
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 1509c5597653..49284fae9d09 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c | |||
@@ -29,6 +29,9 @@ EXPORT_SYMBOL(__put_user_8); | |||
29 | EXPORT_SYMBOL(strnlen_user_asm); | 29 | EXPORT_SYMBOL(strnlen_user_asm); |
30 | EXPORT_SYMBOL(strncpy_from_user_asm); | 30 | EXPORT_SYMBOL(strncpy_from_user_asm); |
31 | EXPORT_SYMBOL(clear_user_asm); | 31 | EXPORT_SYMBOL(clear_user_asm); |
32 | EXPORT_SYMBOL(flush_user_asm); | ||
33 | EXPORT_SYMBOL(inv_user_asm); | ||
34 | EXPORT_SYMBOL(finv_user_asm); | ||
32 | 35 | ||
33 | /* arch/tile/kernel/entry.S */ | 36 | /* arch/tile/kernel/entry.S */ |
34 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
@@ -45,9 +48,6 @@ EXPORT_SYMBOL(__copy_from_user_zeroing); | |||
45 | EXPORT_SYMBOL(__copy_in_user_inatomic); | 48 | EXPORT_SYMBOL(__copy_in_user_inatomic); |
46 | #endif | 49 | #endif |
47 | 50 | ||
48 | /* arch/tile/lib/mb_incoherent.S */ | ||
49 | EXPORT_SYMBOL(__mb_incoherent); | ||
50 | |||
51 | /* hypervisor glue */ | 51 | /* hypervisor glue */ |
52 | #include <hv/hypervisor.h> | 52 | #include <hv/hypervisor.h> |
53 | EXPORT_SYMBOL(hv_dev_open); | 53 | EXPORT_SYMBOL(hv_dev_open); |
@@ -85,4 +85,8 @@ int64_t __muldi3(int64_t, int64_t); | |||
85 | EXPORT_SYMBOL(__muldi3); | 85 | EXPORT_SYMBOL(__muldi3); |
86 | uint64_t __lshrdi3(uint64_t, unsigned int); | 86 | uint64_t __lshrdi3(uint64_t, unsigned int); |
87 | EXPORT_SYMBOL(__lshrdi3); | 87 | EXPORT_SYMBOL(__lshrdi3); |
88 | uint64_t __ashrdi3(uint64_t, unsigned int); | ||
89 | EXPORT_SYMBOL(__ashrdi3); | ||
90 | uint64_t __ashldi3(uint64_t, unsigned int); | ||
91 | EXPORT_SYMBOL(__ashldi3); | ||
88 | #endif | 92 | #endif |
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S deleted file mode 100644 index 989ad7b68d5a..000000000000 --- a/arch/tile/lib/mb_incoherent.S +++ /dev/null | |||
@@ -1,34 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * Assembly code for invoking the HV's fence_incoherent syscall. | ||
15 | */ | ||
16 | |||
17 | #include <linux/linkage.h> | ||
18 | #include <hv/syscall_public.h> | ||
19 | #include <arch/abi.h> | ||
20 | #include <arch/chip.h> | ||
21 | |||
22 | #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() | ||
23 | |||
24 | /* | ||
25 | * Invoke the hypervisor's fence_incoherent syscall, which guarantees | ||
26 | * that all victims for cachelines homed on this tile have reached memory. | ||
27 | */ | ||
28 | STD_ENTRY(__mb_incoherent) | ||
29 | moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent | ||
30 | swint2 | ||
31 | jrp lr | ||
32 | STD_ENDPROC(__mb_incoherent) | ||
33 | |||
34 | #endif | ||
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index f7d4a6ad61e8..b2fe15e01075 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c | |||
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); | 96 | newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); |
97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); | 97 | pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); |
98 | ptep = pte_offset_kernel(pmdp, newsrc); | 98 | ptep = pte_offset_kernel(pmdp, newsrc); |
99 | *ptep = src_pte; /* set_pte() would be confused by this */ | 99 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 100 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
101 | 101 | ||
102 | /* Actually move the data. */ | 102 | /* Actually move the data. */ |
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source, | |||
109 | */ | 109 | */ |
110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); | 110 | src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); |
111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ | 111 | src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ |
112 | *ptep = src_pte; /* set_pte() would be confused by this */ | 112 | __set_pte(ptep, src_pte); /* set_pte() would be confused by this */ |
113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); | 113 | local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); |
114 | 114 | ||
115 | /* | 115 | /* |
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 5cd1c4004eca..cb0999fb64b4 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
18 | #include <arch/spr_def.h> | ||
18 | 19 | ||
19 | #include "spinlock_common.h" | 20 | #include "spinlock_common.h" |
20 | 21 | ||
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait); | |||
91 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) | 92 | #define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) |
92 | 93 | ||
93 | 94 | ||
94 | /* Lock the word, spinning until there are no tns-ers. */ | 95 | /* |
95 | static inline u32 get_rwlock(arch_rwlock_t *rwlock) | 96 | * We can get the read lock if everything but the reader bits (which |
96 | { | 97 | * are in the high part of the word) is zero, i.e. no active or |
97 | u32 iterations = 0; | 98 | * waiting writers, no tns. |
98 | for (;;) { | 99 | * |
99 | u32 val = __insn_tns((int *)&rwlock->lock); | 100 | * We guard the tns/store-back with an interrupt critical section to |
100 | if (unlikely(val & 1)) { | 101 | * preserve the semantic that the same read lock can be acquired in an |
101 | delay_backoff(iterations++); | 102 | * interrupt context. |
102 | continue; | 103 | */ |
103 | } | 104 | inline int arch_read_trylock(arch_rwlock_t *rwlock) |
104 | return val; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | int arch_read_trylock_slow(arch_rwlock_t *rwlock) | ||
109 | { | ||
110 | u32 val = get_rwlock(rwlock); | ||
111 | int locked = (val << RD_COUNT_WIDTH) == 0; | ||
112 | rwlock->lock = val + (locked << RD_COUNT_SHIFT); | ||
113 | return locked; | ||
114 | } | ||
115 | EXPORT_SYMBOL(arch_read_trylock_slow); | ||
116 | |||
117 | void arch_read_unlock_slow(arch_rwlock_t *rwlock) | ||
118 | { | ||
119 | u32 val = get_rwlock(rwlock); | ||
120 | rwlock->lock = val - (1 << RD_COUNT_SHIFT); | ||
121 | } | ||
122 | EXPORT_SYMBOL(arch_read_unlock_slow); | ||
123 | |||
124 | void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val) | ||
125 | { | 105 | { |
126 | u32 eq, mask = 1 << WR_CURR_SHIFT; | 106 | u32 val; |
127 | while (unlikely(val & 1)) { | 107 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); |
128 | /* Limited backoff since we are the highest-priority task. */ | 108 | val = __insn_tns((int *)&rwlock->lock); |
129 | relax(4); | 109 | if (likely((val << _RD_COUNT_WIDTH) == 0)) { |
130 | val = __insn_tns((int *)&rwlock->lock); | 110 | val += 1 << RD_COUNT_SHIFT; |
111 | rwlock->lock = val; | ||
112 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
113 | BUG_ON(val == 0); /* we don't expect wraparound */ | ||
114 | return 1; | ||
131 | } | 115 | } |
132 | val = __insn_addb(val, mask); | 116 | if ((val & 1) == 0) |
133 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | 117 | rwlock->lock = val; |
134 | val = __insn_mz(eq & mask, val); | 118 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); |
135 | rwlock->lock = val; | 119 | return 0; |
136 | } | 120 | } |
137 | EXPORT_SYMBOL(arch_write_unlock_slow); | 121 | EXPORT_SYMBOL(arch_read_trylock); |
138 | 122 | ||
139 | /* | 123 | /* |
140 | * We spin until everything but the reader bits (which are in the high | 124 | * Spin doing arch_read_trylock() until we acquire the lock. |
141 | * part of the word) are zero, i.e. no active or waiting writers, no tns. | ||
142 | * | ||
143 | * ISSUE: This approach can permanently starve readers. A reader who sees | 125 | * ISSUE: This approach can permanently starve readers. A reader who sees |
144 | * a writer could instead take a ticket lock (just like a writer would), | 126 | * a writer could instead take a ticket lock (just like a writer would), |
145 | * and atomically enter read mode (with 1 reader) when it gets the ticket. | 127 | * and atomically enter read mode (with 1 reader) when it gets the ticket. |
146 | * This way both readers and writers will always make forward progress | 128 | * This way both readers and writers would always make forward progress |
147 | * in a finite time. | 129 | * in a finite time. |
148 | */ | 130 | */ |
149 | void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) | 131 | void arch_read_lock(arch_rwlock_t *rwlock) |
150 | { | 132 | { |
151 | u32 iterations = 0; | 133 | u32 iterations = 0; |
152 | do { | 134 | while (unlikely(!arch_read_trylock(rwlock))) |
153 | if (!(val & 1)) | ||
154 | rwlock->lock = val; | ||
155 | delay_backoff(iterations++); | 135 | delay_backoff(iterations++); |
136 | } | ||
137 | EXPORT_SYMBOL(arch_read_lock); | ||
138 | |||
139 | void arch_read_unlock(arch_rwlock_t *rwlock) | ||
140 | { | ||
141 | u32 val, iterations = 0; | ||
142 | |||
143 | mb(); /* guarantee anything modified under the lock is visible */ | ||
144 | for (;;) { | ||
145 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | ||
156 | val = __insn_tns((int *)&rwlock->lock); | 146 | val = __insn_tns((int *)&rwlock->lock); |
157 | } while ((val << RD_COUNT_WIDTH) != 0); | 147 | if (likely(val & 1) == 0) { |
158 | rwlock->lock = val + (1 << RD_COUNT_SHIFT); | 148 | rwlock->lock = val - (1 << _RD_COUNT_SHIFT); |
149 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
150 | break; | ||
151 | } | ||
152 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | ||
153 | delay_backoff(iterations++); | ||
154 | } | ||
159 | } | 155 | } |
160 | EXPORT_SYMBOL(arch_read_lock_slow); | 156 | EXPORT_SYMBOL(arch_read_unlock); |
161 | 157 | ||
162 | void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | 158 | /* |
159 | * We don't need an interrupt critical section here (unlike for | ||
160 | * arch_read_lock) since we should never use a bare write lock where | ||
161 | * it could be interrupted by code that could try to re-acquire it. | ||
162 | */ | ||
163 | void arch_write_lock(arch_rwlock_t *rwlock) | ||
163 | { | 164 | { |
164 | /* | 165 | /* |
165 | * The trailing underscore on this variable (and curr_ below) | 166 | * The trailing underscore on this variable (and curr_ below) |
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
168 | */ | 169 | */ |
169 | u32 my_ticket_; | 170 | u32 my_ticket_; |
170 | u32 iterations = 0; | 171 | u32 iterations = 0; |
172 | u32 val = __insn_tns((int *)&rwlock->lock); | ||
173 | |||
174 | if (likely(val == 0)) { | ||
175 | rwlock->lock = 1 << _WR_NEXT_SHIFT; | ||
176 | return; | ||
177 | } | ||
171 | 178 | ||
172 | /* | 179 | /* |
173 | * Wait until there are no readers, then bump up the next | 180 | * Wait until there are no readers, then bump up the next |
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) | |||
206 | relax(4); | 213 | relax(4); |
207 | } | 214 | } |
208 | } | 215 | } |
209 | EXPORT_SYMBOL(arch_write_lock_slow); | 216 | EXPORT_SYMBOL(arch_write_lock); |
210 | 217 | ||
211 | int __tns_atomic_acquire(atomic_t *lock) | 218 | int arch_write_trylock(arch_rwlock_t *rwlock) |
212 | { | 219 | { |
213 | int ret; | 220 | u32 val = __insn_tns((int *)&rwlock->lock); |
214 | u32 iterations = 0; | ||
215 | 221 | ||
216 | BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); | 222 | /* |
217 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); | 223 | * If a tns is in progress, or there's a waiting or active locker, |
224 | * or active readers, we can't take the lock, so give up. | ||
225 | */ | ||
226 | if (unlikely(val != 0)) { | ||
227 | if (!(val & 1)) | ||
228 | rwlock->lock = val; | ||
229 | return 0; | ||
230 | } | ||
218 | 231 | ||
219 | while ((ret = __insn_tns((void *)&lock->counter)) == 1) | 232 | /* Set the "next" field to mark it locked. */ |
220 | delay_backoff(iterations++); | 233 | rwlock->lock = 1 << _WR_NEXT_SHIFT; |
221 | return ret; | 234 | return 1; |
222 | } | 235 | } |
236 | EXPORT_SYMBOL(arch_write_trylock); | ||
223 | 237 | ||
224 | void __tns_atomic_release(atomic_t *p, int v) | 238 | void arch_write_unlock(arch_rwlock_t *rwlock) |
225 | { | 239 | { |
226 | p->counter = v; | 240 | u32 val, eq, mask; |
227 | __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); | 241 | |
242 | mb(); /* guarantee anything modified under the lock is visible */ | ||
243 | val = __insn_tns((int *)&rwlock->lock); | ||
244 | if (likely(val == (1 << _WR_NEXT_SHIFT))) { | ||
245 | rwlock->lock = 0; | ||
246 | return; | ||
247 | } | ||
248 | while (unlikely(val & 1)) { | ||
249 | /* Limited backoff since we are the highest-priority task. */ | ||
250 | relax(4); | ||
251 | val = __insn_tns((int *)&rwlock->lock); | ||
252 | } | ||
253 | mask = 1 << WR_CURR_SHIFT; | ||
254 | val = __insn_addb(val, mask); | ||
255 | eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); | ||
256 | val = __insn_mz(eq & mask, val); | ||
257 | rwlock->lock = val; | ||
228 | } | 258 | } |
259 | EXPORT_SYMBOL(arch_write_unlock); | ||
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index dcebfc831cd6..758f597f488c 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, | |||
655 | } | 655 | } |
656 | 656 | ||
657 | /* | 657 | /* |
658 | * NOTE: the one other type of access that might bring us here | ||
659 | * are the memory ops in __tns_atomic_acquire/__tns_atomic_release, | ||
660 | * but we don't have to check specially for them since we can | ||
661 | * always safely return to the address of the fault and retry, | ||
662 | * since no separate atomic locks are involved. | ||
663 | */ | ||
664 | |||
665 | /* | ||
666 | * Now that we have released the atomic lock (if necessary), | 658 | * Now that we have released the atomic lock (if necessary), |
667 | * it's safe to spin if the PTE that caused the fault was migrating. | 659 | * it's safe to spin if the PTE that caused the fault was migrating. |
668 | */ | 660 | */ |
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index d78df3a6ee15..cbe6f4f9eca3 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, | |||
179 | panic("Unsafe to continue."); | 179 | panic("Unsafe to continue."); |
180 | } | 180 | } |
181 | 181 | ||
182 | void flush_remote_page(struct page *page, int order) | ||
183 | { | ||
184 | int i, pages = (1 << order); | ||
185 | for (i = 0; i < pages; ++i, ++page) { | ||
186 | void *p = kmap_atomic(page); | ||
187 | int hfh = 0; | ||
188 | int home = page_home(page); | ||
189 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
190 | if (home == PAGE_HOME_HASH) | ||
191 | hfh = 1; | ||
192 | else | ||
193 | #endif | ||
194 | BUG_ON(home < 0 || home >= NR_CPUS); | ||
195 | finv_buffer_remote(p, PAGE_SIZE, hfh); | ||
196 | kunmap_atomic(p); | ||
197 | } | ||
198 | } | ||
199 | |||
182 | void homecache_evict(const struct cpumask *mask) | 200 | void homecache_evict(const struct cpumask *mask) |
183 | { | 201 | { |
184 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); | 202 | flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); |
185 | } | 203 | } |
186 | 204 | ||
187 | /* Return a mask of the cpus whose caches currently own these pages. */ | 205 | /* |
188 | static void homecache_mask(struct page *page, int pages, | 206 | * Return a mask of the cpus whose caches currently own these pages. |
189 | struct cpumask *home_mask) | 207 | * The return value is whether the pages are all coherently cached |
208 | * (i.e. none are immutable, incoherent, or uncached). | ||
209 | */ | ||
210 | static int homecache_mask(struct page *page, int pages, | ||
211 | struct cpumask *home_mask) | ||
190 | { | 212 | { |
191 | int i; | 213 | int i; |
214 | int cached_coherently = 1; | ||
192 | cpumask_clear(home_mask); | 215 | cpumask_clear(home_mask); |
193 | for (i = 0; i < pages; ++i) { | 216 | for (i = 0; i < pages; ++i) { |
194 | int home = page_home(&page[i]); | 217 | int home = page_home(&page[i]); |
195 | if (home == PAGE_HOME_IMMUTABLE || | 218 | if (home == PAGE_HOME_IMMUTABLE || |
196 | home == PAGE_HOME_INCOHERENT) { | 219 | home == PAGE_HOME_INCOHERENT) { |
197 | cpumask_copy(home_mask, cpu_possible_mask); | 220 | cpumask_copy(home_mask, cpu_possible_mask); |
198 | return; | 221 | return 0; |
199 | } | 222 | } |
200 | #if CHIP_HAS_CBOX_HOME_MAP() | 223 | #if CHIP_HAS_CBOX_HOME_MAP() |
201 | if (home == PAGE_HOME_HASH) { | 224 | if (home == PAGE_HOME_HASH) { |
@@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages, | |||
203 | continue; | 226 | continue; |
204 | } | 227 | } |
205 | #endif | 228 | #endif |
206 | if (home == PAGE_HOME_UNCACHED) | 229 | if (home == PAGE_HOME_UNCACHED) { |
230 | cached_coherently = 0; | ||
207 | continue; | 231 | continue; |
232 | } | ||
208 | BUG_ON(home < 0 || home >= NR_CPUS); | 233 | BUG_ON(home < 0 || home >= NR_CPUS); |
209 | cpumask_set_cpu(home, home_mask); | 234 | cpumask_set_cpu(home, home_mask); |
210 | } | 235 | } |
236 | return cached_coherently; | ||
211 | } | 237 | } |
212 | 238 | ||
213 | /* | 239 | /* |
@@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home) | |||
386 | pte_t *ptep = virt_to_pte(NULL, kva); | 412 | pte_t *ptep = virt_to_pte(NULL, kva); |
387 | pte_t pteval = *ptep; | 413 | pte_t pteval = *ptep; |
388 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); | 414 | BUG_ON(!pte_present(pteval) || pte_huge(pteval)); |
389 | *ptep = pte_set_home(pteval, home); | 415 | __set_pte(ptep, pte_set_home(pteval, home)); |
390 | } | 416 | } |
391 | } | 417 | } |
392 | 418 | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index 0b9ce69b0ee5..d6e87fda2fb2 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -53,22 +53,11 @@ | |||
53 | 53 | ||
54 | #include "migrate.h" | 54 | #include "migrate.h" |
55 | 55 | ||
56 | /* | ||
57 | * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)" | ||
58 | * in the Tile Kconfig, but this generates configure warnings. | ||
59 | * Do it here and force people to get it right to compile this file. | ||
60 | * The problem is that with 4KB small pages and 16MB huge pages, | ||
61 | * the default value doesn't allow us to group enough small pages | ||
62 | * together to make up a huge page. | ||
63 | */ | ||
64 | #if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1 | ||
65 | # error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size" | ||
66 | #endif | ||
67 | |||
68 | #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) | 56 | #define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) |
69 | 57 | ||
70 | #ifndef __tilegx__ | 58 | #ifndef __tilegx__ |
71 | unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; | 59 | unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; |
60 | EXPORT_SYMBOL(VMALLOC_RESERVE); | ||
72 | #endif | 61 | #endif |
73 | 62 | ||
74 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
@@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va) | |||
445 | 434 | ||
446 | /* Temporary page table we use for staging. */ | 435 | /* Temporary page table we use for staging. */ |
447 | static pgd_t pgtables[PTRS_PER_PGD] | 436 | static pgd_t pgtables[PTRS_PER_PGD] |
448 | __attribute__((section(".init.page"))); | 437 | __attribute__((aligned(HV_PAGE_TABLE_ALIGN))); |
449 | 438 | ||
450 | /* | 439 | /* |
451 | * This maps the physical memory to kernel virtual address space, a total | 440 | * This maps the physical memory to kernel virtual address space, a total |
@@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
653 | memcpy(pgd_base, pgtables, sizeof(pgtables)); | 642 | memcpy(pgd_base, pgtables, sizeof(pgtables)); |
654 | __install_page_table(pgd_base, __get_cpu_var(current_asid), | 643 | __install_page_table(pgd_base, __get_cpu_var(current_asid), |
655 | swapper_pgprot); | 644 | swapper_pgprot); |
645 | |||
646 | /* | ||
647 | * We just read swapper_pgprot and thus brought it into the cache, | ||
648 | * with its new home & caching mode. When we start the other CPUs, | ||
649 | * they're going to reference swapper_pgprot via their initial fake | ||
650 | * VA-is-PA mappings, which cache everything locally. At that | ||
651 | * time, if it's in our cache with a conflicting home, the | ||
652 | * simulator's coherence checker will complain. So, flush it out | ||
653 | * of our cache; we're not going to ever use it again anyway. | ||
654 | */ | ||
655 | __insn_finv(&swapper_pgprot); | ||
656 | } | 656 | } |
657 | 657 | ||
658 | /* | 658 | /* |
@@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache; | |||
950 | 950 | ||
951 | void __init pgtable_cache_init(void) | 951 | void __init pgtable_cache_init(void) |
952 | { | 952 | { |
953 | pgd_cache = kmem_cache_create("pgd", | 953 | pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL); |
954 | PTRS_PER_PGD*sizeof(pgd_t), | ||
955 | PTRS_PER_PGD*sizeof(pgd_t), | ||
956 | 0, | ||
957 | NULL); | ||
958 | if (!pgd_cache) | 954 | if (!pgd_cache) |
959 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 955 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
960 | } | 956 | } |
@@ -989,7 +985,7 @@ static long __write_once initfree = 1; | |||
989 | static int __init set_initfree(char *str) | 985 | static int __init set_initfree(char *str) |
990 | { | 986 | { |
991 | long val; | 987 | long val; |
992 | if (strict_strtol(str, 0, &val)) { | 988 | if (strict_strtol(str, 0, &val) == 0) { |
993 | initfree = val; | 989 | initfree = val; |
994 | pr_info("initfree: %s free init pages\n", | 990 | pr_info("initfree: %s free init pages\n", |
995 | initfree ? "will" : "won't"); | 991 | initfree ? "will" : "won't"); |
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S index f738765cd1e6..ac01a7cdf77f 100644 --- a/arch/tile/mm/migrate_32.S +++ b/arch/tile/mm/migrate_32.S | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/linkage.h> | 18 | #include <linux/linkage.h> |
19 | #include <linux/threads.h> | 19 | #include <linux/threads.h> |
20 | #include <asm/page.h> | 20 | #include <asm/page.h> |
21 | #include <asm/thread_info.h> | ||
21 | #include <asm/types.h> | 22 | #include <asm/types.h> |
22 | #include <asm/asm-offsets.h> | 23 | #include <asm/asm-offsets.h> |
23 | #include <hv/hypervisor.h> | 24 | #include <hv/hypervisor.h> |
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 1f5430c53d0d..1a2b36f8866d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c | |||
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) | |||
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | /** | ||
146 | * shatter_huge_page() - ensure a given address is mapped by a small page. | ||
147 | * | ||
148 | * This function converts a huge PTE mapping kernel LOWMEM into a bunch | ||
149 | * of small PTEs with the same caching. No cache flush required, but we | ||
150 | * must do a global TLB flush. | ||
151 | * | ||
152 | * Any caller that wishes to modify a kernel mapping that might | ||
153 | * have been made with a huge page should call this function, | ||
154 | * since doing so properly avoids race conditions with installing the | ||
155 | * newly-shattered page and then flushing all the TLB entries. | ||
156 | * | ||
157 | * @addr: Address at which to shatter any existing huge page. | ||
158 | */ | ||
159 | void shatter_huge_page(unsigned long addr) | ||
160 | { | ||
161 | pgd_t *pgd; | ||
162 | pud_t *pud; | ||
163 | pmd_t *pmd; | ||
164 | unsigned long flags = 0; /* happy compiler */ | ||
165 | #ifdef __PAGETABLE_PMD_FOLDED | ||
166 | struct list_head *pos; | ||
167 | #endif | ||
168 | |||
169 | /* Get a pointer to the pmd entry that we need to change. */ | ||
170 | addr &= HPAGE_MASK; | ||
171 | BUG_ON(pgd_addr_invalid(addr)); | ||
172 | BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ | ||
173 | pgd = swapper_pg_dir + pgd_index(addr); | ||
174 | pud = pud_offset(pgd, addr); | ||
175 | BUG_ON(!pud_present(*pud)); | ||
176 | pmd = pmd_offset(pud, addr); | ||
177 | BUG_ON(!pmd_present(*pmd)); | ||
178 | if (!pmd_huge_page(*pmd)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Grab the pgd_lock, since we may need it to walk the pgd_list, | ||
183 | * and since we need some kind of lock here to avoid races. | ||
184 | */ | ||
185 | spin_lock_irqsave(&pgd_lock, flags); | ||
186 | if (!pmd_huge_page(*pmd)) { | ||
187 | /* Lost the race to convert the huge page. */ | ||
188 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | /* Shatter the huge page into the preallocated L2 page table. */ | ||
193 | pmd_populate_kernel(&init_mm, pmd, | ||
194 | get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); | ||
195 | |||
196 | #ifdef __PAGETABLE_PMD_FOLDED | ||
197 | /* Walk every pgd on the system and update the pmd there. */ | ||
198 | list_for_each(pos, &pgd_list) { | ||
199 | pmd_t *copy_pmd; | ||
200 | pgd = list_to_pgd(pos) + pgd_index(addr); | ||
201 | pud = pud_offset(pgd, addr); | ||
202 | copy_pmd = pmd_offset(pud, addr); | ||
203 | __set_pmd(copy_pmd, *pmd); | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | /* Tell every cpu to notice the change. */ | ||
208 | flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, | ||
209 | cpu_possible_mask, NULL, 0); | ||
210 | |||
211 | /* Hold the lock until the TLB flush is finished to avoid races. */ | ||
212 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
213 | } | ||
214 | |||
145 | /* | 215 | /* |
146 | * List of all pgd's needed so it can invalidate entries in both cached | 216 | * List of all pgd's needed so it can invalidate entries in both cached |
147 | * and uncached pgd's. This is essentially codepath-based locking | 217 | * and uncached pgd's. This is essentially codepath-based locking |
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd) | |||
184 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); | 254 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); |
185 | #endif | 255 | #endif |
186 | 256 | ||
187 | clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, | 257 | memcpy(pgd + KERNEL_PGD_INDEX_START, |
188 | swapper_pg_dir + KERNEL_PGD_INDEX_START, | 258 | swapper_pg_dir + KERNEL_PGD_INDEX_START, |
189 | KERNEL_PGD_PTRS); | 259 | KERNEL_PGD_PTRS * sizeof(pgd_t)); |
190 | 260 | ||
191 | pgd_list_add(pgd); | 261 | pgd_list_add(pgd); |
192 | spin_unlock_irqrestore(&pgd_lock, flags); | 262 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
220 | 290 | ||
221 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | 291 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) |
222 | { | 292 | { |
223 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; | 293 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; |
224 | struct page *p; | 294 | struct page *p; |
295 | #if L2_USER_PGTABLE_ORDER > 0 | ||
296 | int i; | ||
297 | #endif | ||
225 | 298 | ||
226 | #ifdef CONFIG_HIGHPTE | 299 | #ifdef CONFIG_HIGHPTE |
227 | flags |= __GFP_HIGHMEM; | 300 | flags |= __GFP_HIGHMEM; |
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
231 | if (p == NULL) | 304 | if (p == NULL) |
232 | return NULL; | 305 | return NULL; |
233 | 306 | ||
307 | #if L2_USER_PGTABLE_ORDER > 0 | ||
308 | /* | ||
309 | * Make every page have a page_count() of one, not just the first. | ||
310 | * We don't use __GFP_COMP since it doesn't look like it works | ||
311 | * correctly with tlb_remove_page(). | ||
312 | */ | ||
313 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
314 | init_page_count(p+i); | ||
315 | inc_zone_page_state(p+i, NR_PAGETABLE); | ||
316 | } | ||
317 | #endif | ||
318 | |||
234 | pgtable_page_ctor(p); | 319 | pgtable_page_ctor(p); |
235 | return p; | 320 | return p; |
236 | } | 321 | } |
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
242 | */ | 327 | */ |
243 | void pte_free(struct mm_struct *mm, struct page *p) | 328 | void pte_free(struct mm_struct *mm, struct page *p) |
244 | { | 329 | { |
330 | int i; | ||
331 | |||
245 | pgtable_page_dtor(p); | 332 | pgtable_page_dtor(p); |
246 | __free_pages(p, L2_USER_PGTABLE_ORDER); | 333 | __free_page(p); |
334 | |||
335 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
336 | __free_page(p+i); | ||
337 | dec_zone_page_state(p+i, NR_PAGETABLE); | ||
338 | } | ||
247 | } | 339 | } |
248 | 340 | ||
249 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | 341 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, |
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | |||
252 | int i; | 344 | int i; |
253 | 345 | ||
254 | pgtable_page_dtor(pte); | 346 | pgtable_page_dtor(pte); |
255 | tlb->need_flush = 1; | 347 | tlb_remove_page(tlb, pte); |
256 | if (tlb_fast_mode(tlb)) { | 348 | |
257 | struct page *pte_pages[L2_USER_PGTABLE_PAGES]; | 349 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { |
258 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) | 350 | tlb_remove_page(tlb, pte + i); |
259 | pte_pages[i] = pte + i; | 351 | dec_zone_page_state(pte + i, NR_PAGETABLE); |
260 | free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES); | ||
261 | return; | ||
262 | } | ||
263 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
264 | tlb->pages[tlb->nr++] = pte + i; | ||
265 | if (tlb->nr >= FREE_PTE_NR) | ||
266 | tlb_flush_mmu(tlb, 0, 0); | ||
267 | } | 352 | } |
268 | } | 353 | } |
269 | 354 | ||
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot) | |||
346 | return x + y * smp_width; | 431 | return x + y * smp_width; |
347 | } | 432 | } |
348 | 433 | ||
349 | void set_pte_order(pte_t *ptep, pte_t pte, int order) | 434 | /* |
435 | * Convert a kernel VA to a PA and homing information. | ||
436 | */ | ||
437 | int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) | ||
350 | { | 438 | { |
351 | unsigned long pfn = pte_pfn(pte); | 439 | struct page *page = virt_to_page(va); |
352 | struct page *page = pfn_to_page(pfn); | 440 | pte_t null_pte = { 0 }; |
353 | 441 | ||
354 | /* Update the home of a PTE if necessary */ | 442 | *cpa = __pa(va); |
355 | pte = pte_set_home(pte, page_home(page)); | ||
356 | 443 | ||
444 | /* Note that this is not writing a page table, just returning a pte. */ | ||
445 | *pte = pte_set_home(null_pte, page_home(page)); | ||
446 | |||
447 | return 0; /* return non-zero if not hfh? */ | ||
448 | } | ||
449 | EXPORT_SYMBOL(va_to_cpa_and_pte); | ||
450 | |||
451 | void __set_pte(pte_t *ptep, pte_t pte) | ||
452 | { | ||
357 | #ifdef __tilegx__ | 453 | #ifdef __tilegx__ |
358 | *ptep = pte; | 454 | *ptep = pte; |
359 | #else | 455 | #else |
360 | /* | 456 | # if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 |
361 | * When setting a PTE, write the high bits first, then write | 457 | # error Must write the present and migrating bits last |
362 | * the low bits. This sets the "present" bit only after the | 458 | # endif |
363 | * other bits are in place. If a particular PTE update | 459 | if (pte_present(pte)) { |
364 | * involves transitioning from one valid PTE to another, it | 460 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
365 | * may be necessary to call set_pte_order() more than once, | 461 | barrier(); |
366 | * transitioning via a suitable intermediate state. | 462 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
367 | * Note that this sequence also means that if we are transitioning | 463 | } else { |
368 | * from any migrating PTE to a non-migrating one, we will not | 464 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
369 | * see a half-updated PTE with the migrating bit off. | 465 | barrier(); |
370 | */ | 466 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
371 | #if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 | 467 | } |
372 | # error Must write the present and migrating bits last | 468 | #endif /* __tilegx__ */ |
373 | #endif | 469 | } |
374 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); | 470 | |
375 | barrier(); | 471 | void set_pte(pte_t *ptep, pte_t pte) |
376 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); | 472 | { |
377 | #endif | 473 | struct page *page = pfn_to_page(pte_pfn(pte)); |
474 | |||
475 | /* Update the home of a PTE if necessary */ | ||
476 | pte = pte_set_home(pte, page_home(page)); | ||
477 | |||
478 | __set_pte(ptep, pte); | ||
378 | } | 479 | } |
379 | 480 | ||
380 | /* Can this mm load a PTE with cached_priority set? */ | 481 | /* Can this mm load a PTE with cached_priority set? */ |
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index fe70a341bd8b..fac1a2002e67 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -7,7 +7,7 @@ | |||
7 | menuconfig EDAC | 7 | menuconfig EDAC |
8 | bool "EDAC (Error Detection And Correction) reporting" | 8 | bool "EDAC (Error Detection And Correction) reporting" |
9 | depends on HAS_IOMEM | 9 | depends on HAS_IOMEM |
10 | depends on X86 || PPC | 10 | depends on X86 || PPC || TILE |
11 | help | 11 | help |
12 | EDAC is designed to report errors in the core system. | 12 | EDAC is designed to report errors in the core system. |
13 | These are low-level errors that are reported in the CPU or | 13 | These are low-level errors that are reported in the CPU or |
@@ -282,4 +282,12 @@ config EDAC_CPC925 | |||
282 | a companion chip to the PowerPC 970 family of | 282 | a companion chip to the PowerPC 970 family of |
283 | processors. | 283 | processors. |
284 | 284 | ||
285 | config EDAC_TILE | ||
286 | tristate "Tilera Memory Controller" | ||
287 | depends on EDAC_MM_EDAC && TILE | ||
288 | default y | ||
289 | help | ||
290 | Support for error detection and correction on the | ||
291 | Tilera memory controller. | ||
292 | |||
285 | endif # EDAC | 293 | endif # EDAC |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index ba2898b3639b..3e239133e29e 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
@@ -54,3 +54,4 @@ obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o | |||
54 | obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o | 54 | obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o |
55 | obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o | 55 | obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o |
56 | 56 | ||
57 | obj-$(CONFIG_EDAC_TILE) += tile_edac.o | ||
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c new file mode 100644 index 000000000000..1d5cf06f6c6b --- /dev/null +++ b/drivers/edac/tile_edac.c | |||
@@ -0,0 +1,254 @@ | |||
1 | /* | ||
2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation, version 2. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, but | ||
9 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
11 | * NON INFRINGEMENT. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * Tilera-specific EDAC driver. | ||
14 | * | ||
15 | * This source code is derived from the following driver: | ||
16 | * | ||
17 | * Cell MIC driver for ECC counting | ||
18 | * | ||
19 | * Copyright 2007 Benjamin Herrenschmidt, IBM Corp. | ||
20 | * <benh@kernel.crashing.org> | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/platform_device.h> | ||
27 | #include <linux/io.h> | ||
28 | #include <linux/uaccess.h> | ||
29 | #include <linux/edac.h> | ||
30 | #include <hv/hypervisor.h> | ||
31 | #include <hv/drv_mshim_intf.h> | ||
32 | |||
33 | #include "edac_core.h" | ||
34 | |||
35 | #define DRV_NAME "tile-edac" | ||
36 | |||
37 | /* Number of cs_rows needed per memory controller on TILEPro. */ | ||
38 | #define TILE_EDAC_NR_CSROWS 1 | ||
39 | |||
40 | /* Number of channels per memory controller on TILEPro. */ | ||
41 | #define TILE_EDAC_NR_CHANS 1 | ||
42 | |||
43 | /* Granularity of reported error in bytes on TILEPro. */ | ||
44 | #define TILE_EDAC_ERROR_GRAIN 8 | ||
45 | |||
46 | /* TILE processor has multiple independent memory controllers. */ | ||
47 | struct platform_device *mshim_pdev[TILE_MAX_MSHIMS]; | ||
48 | |||
49 | struct tile_edac_priv { | ||
50 | int hv_devhdl; /* Hypervisor device handle. */ | ||
51 | int node; /* Memory controller instance #. */ | ||
52 | unsigned int ce_count; /* | ||
53 | * Correctable-error counter | ||
54 | * kept by the driver. | ||
55 | */ | ||
56 | }; | ||
57 | |||
58 | static void tile_edac_check(struct mem_ctl_info *mci) | ||
59 | { | ||
60 | struct tile_edac_priv *priv = mci->pvt_info; | ||
61 | struct mshim_mem_error mem_error; | ||
62 | |||
63 | if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error, | ||
64 | sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) != | ||
65 | sizeof(struct mshim_mem_error)) { | ||
66 | pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n"); | ||
67 | return; | ||
68 | } | ||
69 | |||
70 | /* Check if the current error count is different from the saved one. */ | ||
71 | if (mem_error.sbe_count != priv->ce_count) { | ||
72 | dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node); | ||
73 | priv->ce_count = mem_error.sbe_count; | ||
74 | edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * Initialize the 'csrows' table within the mci control structure with the | ||
80 | * addressing of memory. | ||
81 | */ | ||
82 | static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci) | ||
83 | { | ||
84 | struct csrow_info *csrow = &mci->csrows[0]; | ||
85 | struct tile_edac_priv *priv = mci->pvt_info; | ||
86 | struct mshim_mem_info mem_info; | ||
87 | |||
88 | if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info, | ||
89 | sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) != | ||
90 | sizeof(struct mshim_mem_info)) { | ||
91 | pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n"); | ||
92 | return -1; | ||
93 | } | ||
94 | |||
95 | if (mem_info.mem_ecc) | ||
96 | csrow->edac_mode = EDAC_SECDED; | ||
97 | else | ||
98 | csrow->edac_mode = EDAC_NONE; | ||
99 | switch (mem_info.mem_type) { | ||
100 | case DDR2: | ||
101 | csrow->mtype = MEM_DDR2; | ||
102 | break; | ||
103 | |||
104 | case DDR3: | ||
105 | csrow->mtype = MEM_DDR3; | ||
106 | break; | ||
107 | |||
108 | default: | ||
109 | return -1; | ||
110 | } | ||
111 | |||
112 | csrow->first_page = 0; | ||
113 | csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT; | ||
114 | csrow->last_page = csrow->first_page + csrow->nr_pages - 1; | ||
115 | csrow->grain = TILE_EDAC_ERROR_GRAIN; | ||
116 | csrow->dtype = DEV_UNKNOWN; | ||
117 | |||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | static int __devinit tile_edac_mc_probe(struct platform_device *pdev) | ||
122 | { | ||
123 | char hv_file[32]; | ||
124 | int hv_devhdl; | ||
125 | struct mem_ctl_info *mci; | ||
126 | struct tile_edac_priv *priv; | ||
127 | int rc; | ||
128 | |||
129 | sprintf(hv_file, "mshim/%d", pdev->id); | ||
130 | hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0); | ||
131 | if (hv_devhdl < 0) | ||
132 | return -EINVAL; | ||
133 | |||
134 | /* A TILE MC has a single channel and one chip-select row. */ | ||
135 | mci = edac_mc_alloc(sizeof(struct tile_edac_priv), | ||
136 | TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id); | ||
137 | if (mci == NULL) | ||
138 | return -ENOMEM; | ||
139 | priv = mci->pvt_info; | ||
140 | priv->node = pdev->id; | ||
141 | priv->hv_devhdl = hv_devhdl; | ||
142 | |||
143 | mci->dev = &pdev->dev; | ||
144 | mci->mtype_cap = MEM_FLAG_DDR2; | ||
145 | mci->edac_ctl_cap = EDAC_FLAG_SECDED; | ||
146 | |||
147 | mci->mod_name = DRV_NAME; | ||
148 | mci->ctl_name = "TILEPro_Memory_Controller"; | ||
149 | mci->dev_name = dev_name(&pdev->dev); | ||
150 | mci->edac_check = tile_edac_check; | ||
151 | |||
152 | /* | ||
153 | * Initialize the MC control structure 'csrows' table | ||
154 | * with the mapping and control information. | ||
155 | */ | ||
156 | if (tile_edac_init_csrows(mci)) { | ||
157 | /* No csrows found. */ | ||
158 | mci->edac_cap = EDAC_FLAG_NONE; | ||
159 | } else { | ||
160 | mci->edac_cap = EDAC_FLAG_SECDED; | ||
161 | } | ||
162 | |||
163 | platform_set_drvdata(pdev, mci); | ||
164 | |||
165 | /* Register with EDAC core */ | ||
166 | rc = edac_mc_add_mc(mci); | ||
167 | if (rc) { | ||
168 | dev_err(&pdev->dev, "failed to register with EDAC core\n"); | ||
169 | edac_mc_free(mci); | ||
170 | return rc; | ||
171 | } | ||
172 | |||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | static int __devexit tile_edac_mc_remove(struct platform_device *pdev) | ||
177 | { | ||
178 | struct mem_ctl_info *mci = platform_get_drvdata(pdev); | ||
179 | |||
180 | edac_mc_del_mc(&pdev->dev); | ||
181 | if (mci) | ||
182 | edac_mc_free(mci); | ||
183 | return 0; | ||
184 | } | ||
185 | |||
186 | static struct platform_driver tile_edac_mc_driver = { | ||
187 | .driver = { | ||
188 | .name = DRV_NAME, | ||
189 | .owner = THIS_MODULE, | ||
190 | }, | ||
191 | .probe = tile_edac_mc_probe, | ||
192 | .remove = __devexit_p(tile_edac_mc_remove), | ||
193 | }; | ||
194 | |||
195 | /* | ||
196 | * Driver init routine. | ||
197 | */ | ||
198 | static int __init tile_edac_init(void) | ||
199 | { | ||
200 | char hv_file[32]; | ||
201 | struct platform_device *pdev; | ||
202 | int i, err, num = 0; | ||
203 | |||
204 | /* Only support POLL mode. */ | ||
205 | edac_op_state = EDAC_OPSTATE_POLL; | ||
206 | |||
207 | err = platform_driver_register(&tile_edac_mc_driver); | ||
208 | if (err) | ||
209 | return err; | ||
210 | |||
211 | for (i = 0; i < TILE_MAX_MSHIMS; i++) { | ||
212 | /* | ||
213 | * Not all memory controllers are configured such as in the | ||
214 | * case of a simulator. So we register only those mshims | ||
215 | * that are configured by the hypervisor. | ||
216 | */ | ||
217 | sprintf(hv_file, "mshim/%d", i); | ||
218 | if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0) | ||
219 | continue; | ||
220 | |||
221 | pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0); | ||
222 | if (IS_ERR(pdev)) | ||
223 | continue; | ||
224 | mshim_pdev[i] = pdev; | ||
225 | num++; | ||
226 | } | ||
227 | |||
228 | if (num == 0) { | ||
229 | platform_driver_unregister(&tile_edac_mc_driver); | ||
230 | return -ENODEV; | ||
231 | } | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * Driver cleanup routine. | ||
237 | */ | ||
238 | static void __exit tile_edac_exit(void) | ||
239 | { | ||
240 | int i; | ||
241 | |||
242 | for (i = 0; i < TILE_MAX_MSHIMS; i++) { | ||
243 | struct platform_device *pdev = mshim_pdev[i]; | ||
244 | if (!pdev) | ||
245 | continue; | ||
246 | |||
247 | platform_set_drvdata(pdev, NULL); | ||
248 | platform_device_unregister(pdev); | ||
249 | } | ||
250 | platform_driver_unregister(&tile_edac_mc_driver); | ||
251 | } | ||
252 | |||
253 | module_init(tile_edac_init); | ||
254 | module_exit(tile_edac_exit); | ||
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c index 7cb301da7474..0825db6d883f 100644 --- a/drivers/net/tile/tilepro.c +++ b/drivers/net/tile/tilepro.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright 2010 Tilera Corporation. All Rights Reserved. | 2 | * Copyright 2011 Tilera Corporation. All Rights Reserved. |
3 | * | 3 | * |
4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public License | 5 | * modify it under the terms of the GNU General Public License |
@@ -44,10 +44,6 @@ | |||
44 | #include <linux/tcp.h> | 44 | #include <linux/tcp.h> |
45 | 45 | ||
46 | 46 | ||
47 | /* There is no singlethread_cpu, so schedule work on the current cpu. */ | ||
48 | #define singlethread_cpu -1 | ||
49 | |||
50 | |||
51 | /* | 47 | /* |
52 | * First, "tile_net_init_module()" initializes all four "devices" which | 48 | * First, "tile_net_init_module()" initializes all four "devices" which |
53 | * can be used by linux. | 49 | * can be used by linux. |
@@ -73,15 +69,16 @@ | |||
73 | * return, knowing we will be called again later. Otherwise, we | 69 | * return, knowing we will be called again later. Otherwise, we |
74 | * reenable the ingress interrupt, and call "napi_complete()". | 70 | * reenable the ingress interrupt, and call "napi_complete()". |
75 | * | 71 | * |
72 | * HACK: Since disabling the ingress interrupt is not reliable, we | ||
73 | * ignore the interrupt if the global "active" flag is false. | ||
74 | * | ||
76 | * | 75 | * |
77 | * NOTE: The use of "native_driver" ensures that EPP exists, and that | 76 | * NOTE: The use of "native_driver" ensures that EPP exists, and that |
78 | * "epp_sendv" is legal, and that "LIPP" is being used. | 77 | * we are using "LIPP" and "LEPP". |
79 | * | 78 | * |
80 | * NOTE: Failing to free completions for an arbitrarily long time | 79 | * NOTE: Failing to free completions for an arbitrarily long time |
81 | * (which is defined to be illegal) does in fact cause bizarre | 80 | * (which is defined to be illegal) does in fact cause bizarre |
82 | * problems. The "egress_timer" helps prevent this from happening. | 81 | * problems. The "egress_timer" helps prevent this from happening. |
83 | * | ||
84 | * NOTE: The egress code can be interrupted by the interrupt handler. | ||
85 | */ | 82 | */ |
86 | 83 | ||
87 | 84 | ||
@@ -142,6 +139,7 @@ | |||
142 | MODULE_AUTHOR("Tilera"); | 139 | MODULE_AUTHOR("Tilera"); |
143 | MODULE_LICENSE("GPL"); | 140 | MODULE_LICENSE("GPL"); |
144 | 141 | ||
142 | |||
145 | /* | 143 | /* |
146 | * Queue of incoming packets for a specific cpu and device. | 144 | * Queue of incoming packets for a specific cpu and device. |
147 | * | 145 | * |
@@ -177,7 +175,7 @@ struct tile_net_cpu { | |||
177 | struct tile_netio_queue queue; | 175 | struct tile_netio_queue queue; |
178 | /* Statistics. */ | 176 | /* Statistics. */ |
179 | struct tile_net_stats_t stats; | 177 | struct tile_net_stats_t stats; |
180 | /* ISSUE: Is this needed? */ | 178 | /* True iff NAPI is enabled. */ |
181 | bool napi_enabled; | 179 | bool napi_enabled; |
182 | /* True if this tile has succcessfully registered with the IPP. */ | 180 | /* True if this tile has succcessfully registered with the IPP. */ |
183 | bool registered; | 181 | bool registered; |
@@ -200,20 +198,20 @@ struct tile_net_cpu { | |||
200 | struct tile_net_priv { | 198 | struct tile_net_priv { |
201 | /* Our network device. */ | 199 | /* Our network device. */ |
202 | struct net_device *dev; | 200 | struct net_device *dev; |
203 | /* The actual egress queue. */ | 201 | /* Pages making up the egress queue. */ |
204 | lepp_queue_t *epp_queue; | 202 | struct page *eq_pages; |
205 | /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ | 203 | /* Address of the actual egress queue. */ |
206 | spinlock_t cmd_lock; | 204 | lepp_queue_t *eq; |
207 | /* Protects "epp_queue->comp_head". */ | 205 | /* Protects "eq". */ |
208 | spinlock_t comp_lock; | 206 | spinlock_t eq_lock; |
209 | /* The hypervisor handle for this interface. */ | 207 | /* The hypervisor handle for this interface. */ |
210 | int hv_devhdl; | 208 | int hv_devhdl; |
211 | /* The intr bit mask that IDs this device. */ | 209 | /* The intr bit mask that IDs this device. */ |
212 | u32 intr_id; | 210 | u32 intr_id; |
213 | /* True iff "tile_net_open_aux()" has succeeded. */ | 211 | /* True iff "tile_net_open_aux()" has succeeded. */ |
214 | int partly_opened; | 212 | bool partly_opened; |
215 | /* True iff "tile_net_open_inner()" has succeeded. */ | 213 | /* True iff the device is "active". */ |
216 | int fully_opened; | 214 | bool active; |
217 | /* Effective network cpus. */ | 215 | /* Effective network cpus. */ |
218 | struct cpumask network_cpus_map; | 216 | struct cpumask network_cpus_map; |
219 | /* Number of network cpus. */ | 217 | /* Number of network cpus. */ |
@@ -228,6 +226,10 @@ struct tile_net_priv { | |||
228 | struct tile_net_cpu *cpu[NR_CPUS]; | 226 | struct tile_net_cpu *cpu[NR_CPUS]; |
229 | }; | 227 | }; |
230 | 228 | ||
229 | /* Log2 of the number of small pages needed for the egress queue. */ | ||
230 | #define EQ_ORDER get_order(sizeof(lepp_queue_t)) | ||
231 | /* Size of the egress queue's pages. */ | ||
232 | #define EQ_SIZE (1 << (PAGE_SHIFT + EQ_ORDER)) | ||
231 | 233 | ||
232 | /* | 234 | /* |
233 | * The actual devices (xgbe0, xgbe1, gbe0, gbe1). | 235 | * The actual devices (xgbe0, xgbe1, gbe0, gbe1). |
@@ -284,7 +286,11 @@ static void net_printk(char *fmt, ...) | |||
284 | */ | 286 | */ |
285 | static void dump_packet(unsigned char *data, unsigned long length, char *s) | 287 | static void dump_packet(unsigned char *data, unsigned long length, char *s) |
286 | { | 288 | { |
289 | int my_cpu = smp_processor_id(); | ||
290 | |||
287 | unsigned long i; | 291 | unsigned long i; |
292 | char buf[128]; | ||
293 | |||
288 | static unsigned int count; | 294 | static unsigned int count; |
289 | 295 | ||
290 | pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", | 296 | pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", |
@@ -294,10 +300,12 @@ static void dump_packet(unsigned char *data, unsigned long length, char *s) | |||
294 | 300 | ||
295 | for (i = 0; i < length; i++) { | 301 | for (i = 0; i < length; i++) { |
296 | if ((i & 0xf) == 0) | 302 | if ((i & 0xf) == 0) |
297 | sprintf(buf, "%8.8lx:", i); | 303 | sprintf(buf, "[%02d] %8.8lx:", my_cpu, i); |
298 | sprintf(buf + strlen(buf), " %2.2x", data[i]); | 304 | sprintf(buf + strlen(buf), " %2.2x", data[i]); |
299 | if ((i & 0xf) == 0xf || i == length - 1) | 305 | if ((i & 0xf) == 0xf || i == length - 1) { |
300 | pr_info("%s\n", buf); | 306 | strcat(buf, "\n"); |
307 | pr_info("%s", buf); | ||
308 | } | ||
301 | } | 309 | } |
302 | } | 310 | } |
303 | #endif | 311 | #endif |
@@ -351,60 +359,109 @@ static void tile_net_provide_linux_buffer(struct tile_net_cpu *info, | |||
351 | 359 | ||
352 | /* | 360 | /* |
353 | * Provide a linux buffer for LIPP. | 361 | * Provide a linux buffer for LIPP. |
362 | * | ||
363 | * Note that the ACTUAL allocation for each buffer is a "struct sk_buff", | ||
364 | * plus a chunk of memory that includes not only the requested bytes, but | ||
365 | * also NET_SKB_PAD bytes of initial padding, and a "struct skb_shared_info". | ||
366 | * | ||
367 | * Note that "struct skb_shared_info" is 88 bytes with 64K pages and | ||
368 | * 268 bytes with 4K pages (since the frags[] array needs 18 entries). | ||
369 | * | ||
370 | * Without jumbo packets, the maximum packet size will be 1536 bytes, | ||
371 | * and we use 2 bytes (NET_IP_ALIGN) of padding. ISSUE: If we told | ||
372 | * the hardware to clip at 1518 bytes instead of 1536 bytes, then we | ||
373 | * could save an entire cache line, but in practice, we don't need it. | ||
374 | * | ||
375 | * Since CPAs are 38 bits, and we can only encode the high 31 bits in | ||
376 | * a "linux_buffer_t", the low 7 bits must be zero, and thus, we must | ||
377 | * align the actual "va" mod 128. | ||
378 | * | ||
379 | * We assume that the underlying "head" will be aligned mod 64. Note | ||
380 | * that in practice, we have seen "head" NOT aligned mod 128 even when | ||
381 | * using 2048 byte allocations, which is surprising. | ||
382 | * | ||
383 | * If "head" WAS always aligned mod 128, we could change LIPP to | ||
384 | * assume that the low SIX bits are zero, and the 7th bit is one, that | ||
385 | * is, align the actual "va" mod 128 plus 64, which would be "free". | ||
386 | * | ||
387 | * For now, the actual "head" pointer points at NET_SKB_PAD bytes of | ||
388 | * padding, plus 28 or 92 bytes of extra padding, plus the sk_buff | ||
389 | * pointer, plus the NET_IP_ALIGN padding, plus 126 or 1536 bytes for | ||
390 | * the actual packet, plus 62 bytes of empty padding, plus some | ||
391 | * padding and the "struct skb_shared_info". | ||
392 | * | ||
393 | * With 64K pages, a large buffer thus needs 32+92+4+2+1536+62+88 | ||
394 | * bytes, or 1816 bytes, which fits comfortably into 2048 bytes. | ||
395 | * | ||
396 | * With 64K pages, a small buffer thus needs 32+92+4+2+126+88 | ||
397 | * bytes, or 344 bytes, which means we are wasting 64+ bytes, and | ||
398 | * could presumably increase the size of small buffers. | ||
399 | * | ||
400 | * With 4K pages, a large buffer thus needs 32+92+4+2+1536+62+268 | ||
401 | * bytes, or 1996 bytes, which fits comfortably into 2048 bytes. | ||
402 | * | ||
403 | * With 4K pages, a small buffer thus needs 32+92+4+2+126+268 | ||
404 | * bytes, or 524 bytes, which is annoyingly wasteful. | ||
405 | * | ||
406 | * Maybe we should increase LIPP_SMALL_PACKET_SIZE to 192? | ||
407 | * | ||
408 | * ISSUE: Maybe we should increase "NET_SKB_PAD" to 64? | ||
354 | */ | 409 | */ |
355 | static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, | 410 | static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, |
356 | bool small) | 411 | bool small) |
357 | { | 412 | { |
358 | /* ISSUE: What should we use here? */ | 413 | #if TILE_NET_MTU <= 1536 |
414 | /* Without "jumbo", 2 + 1536 should be sufficient. */ | ||
415 | unsigned int large_size = NET_IP_ALIGN + 1536; | ||
416 | #else | ||
417 | /* ISSUE: This has not been tested. */ | ||
359 | unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; | 418 | unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; |
419 | #endif | ||
360 | 420 | ||
361 | /* Round up to ensure to avoid "false sharing" with last cache line. */ | 421 | /* Avoid "false sharing" with last cache line. */ |
362 | unsigned int buffer_size = | 422 | /* ISSUE: This is already done by "dev_alloc_skb()". */ |
423 | unsigned int len = | ||
363 | (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + | 424 | (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + |
364 | CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); | 425 | CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); |
365 | 426 | ||
366 | /* | 427 | unsigned int padding = 128 - NET_SKB_PAD; |
367 | * ISSUE: Since CPAs are 38 bits, and we can only encode the | 428 | unsigned int align; |
368 | * high 31 bits in a "linux_buffer_t", the low 7 bits must be | ||
369 | * zero, and thus, we must align the actual "va" mod 128. | ||
370 | */ | ||
371 | const unsigned long align = 128; | ||
372 | 429 | ||
373 | struct sk_buff *skb; | 430 | struct sk_buff *skb; |
374 | void *va; | 431 | void *va; |
375 | 432 | ||
376 | struct sk_buff **skb_ptr; | 433 | struct sk_buff **skb_ptr; |
377 | 434 | ||
378 | /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ | 435 | /* Request 96 extra bytes for alignment purposes. */ |
379 | /* and also "reserves" that many bytes. */ | 436 | skb = dev_alloc_skb(len + padding); |
380 | /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ | 437 | if (skb == NULL) |
381 | int len = sizeof(*skb_ptr) + align + buffer_size; | 438 | return false; |
382 | |||
383 | while (1) { | ||
384 | |||
385 | /* Allocate (or fail). */ | ||
386 | skb = dev_alloc_skb(len); | ||
387 | if (skb == NULL) | ||
388 | return false; | ||
389 | |||
390 | /* Make room for a back-pointer to 'skb'. */ | ||
391 | skb_reserve(skb, sizeof(*skb_ptr)); | ||
392 | 439 | ||
393 | /* Make sure we are aligned. */ | 440 | /* Skip 32 or 96 bytes to align "data" mod 128. */ |
394 | skb_reserve(skb, -(long)skb->data & (align - 1)); | 441 | align = -(long)skb->data & (128 - 1); |
442 | BUG_ON(align > padding); | ||
443 | skb_reserve(skb, align); | ||
395 | 444 | ||
396 | /* This address is given to IPP. */ | 445 | /* This address is given to IPP. */ |
397 | va = skb->data; | 446 | va = skb->data; |
398 | 447 | ||
399 | if (small) | 448 | /* Buffers must not span a huge page. */ |
400 | break; | 449 | BUG_ON(((((long)va & ~HPAGE_MASK) + len) & HPAGE_MASK) != 0); |
401 | 450 | ||
402 | /* ISSUE: This has never been observed! */ | 451 | #ifdef TILE_NET_PARANOIA |
403 | /* Large buffers must not span a huge page. */ | 452 | #if CHIP_HAS_CBOX_HOME_MAP() |
404 | if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) | 453 | if (hash_default) { |
405 | break; | 454 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); |
406 | pr_err("Leaking unaligned linux buffer at %p.\n", va); | 455 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) |
456 | panic("Non-HFH ingress buffer! VA=%p Mode=%d PTE=%llx", | ||
457 | va, hv_pte_get_mode(pte), hv_pte_val(pte)); | ||
407 | } | 458 | } |
459 | #endif | ||
460 | #endif | ||
461 | |||
462 | /* Invalidate the packet buffer. */ | ||
463 | if (!hash_default) | ||
464 | __inv_buffer(va, len); | ||
408 | 465 | ||
409 | /* Skip two bytes to satisfy LIPP assumptions. */ | 466 | /* Skip two bytes to satisfy LIPP assumptions. */ |
410 | /* Note that this aligns IP on a 16 byte boundary. */ | 467 | /* Note that this aligns IP on a 16 byte boundary. */ |
@@ -415,23 +472,9 @@ static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, | |||
415 | skb_ptr = va - sizeof(*skb_ptr); | 472 | skb_ptr = va - sizeof(*skb_ptr); |
416 | *skb_ptr = skb; | 473 | *skb_ptr = skb; |
417 | 474 | ||
418 | /* Invalidate the packet buffer. */ | ||
419 | if (!hash_default) | ||
420 | __inv_buffer(skb->data, buffer_size); | ||
421 | |||
422 | /* Make sure "skb_ptr" has been flushed. */ | 475 | /* Make sure "skb_ptr" has been flushed. */ |
423 | __insn_mf(); | 476 | __insn_mf(); |
424 | 477 | ||
425 | #ifdef TILE_NET_PARANOIA | ||
426 | #if CHIP_HAS_CBOX_HOME_MAP() | ||
427 | if (hash_default) { | ||
428 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); | ||
429 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | ||
430 | panic("Non-coherent ingress buffer!"); | ||
431 | } | ||
432 | #endif | ||
433 | #endif | ||
434 | |||
435 | /* Provide the new buffer. */ | 478 | /* Provide the new buffer. */ |
436 | tile_net_provide_linux_buffer(info, va, small); | 479 | tile_net_provide_linux_buffer(info, va, small); |
437 | 480 | ||
@@ -469,48 +512,64 @@ oops: | |||
469 | * Grab some LEPP completions, and store them in "comps", of size | 512 | * Grab some LEPP completions, and store them in "comps", of size |
470 | * "comps_size", and return the number of completions which were | 513 | * "comps_size", and return the number of completions which were |
471 | * stored, so the caller can free them. | 514 | * stored, so the caller can free them. |
472 | * | ||
473 | * If "pending" is not NULL, it will be set to true if there might | ||
474 | * still be some pending completions caused by this tile, else false. | ||
475 | */ | 515 | */ |
476 | static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, | 516 | static unsigned int tile_net_lepp_grab_comps(lepp_queue_t *eq, |
477 | struct sk_buff *comps[], | 517 | struct sk_buff *comps[], |
478 | unsigned int comps_size, | 518 | unsigned int comps_size, |
479 | bool *pending) | 519 | unsigned int min_size) |
480 | { | 520 | { |
481 | struct tile_net_priv *priv = netdev_priv(dev); | ||
482 | |||
483 | lepp_queue_t *eq = priv->epp_queue; | ||
484 | |||
485 | unsigned int n = 0; | 521 | unsigned int n = 0; |
486 | 522 | ||
487 | unsigned int comp_head; | 523 | unsigned int comp_head = eq->comp_head; |
488 | unsigned int comp_busy; | 524 | unsigned int comp_busy = eq->comp_busy; |
489 | unsigned int comp_tail; | ||
490 | |||
491 | spin_lock(&priv->comp_lock); | ||
492 | |||
493 | comp_head = eq->comp_head; | ||
494 | comp_busy = eq->comp_busy; | ||
495 | comp_tail = eq->comp_tail; | ||
496 | 525 | ||
497 | while (comp_head != comp_busy && n < comps_size) { | 526 | while (comp_head != comp_busy && n < comps_size) { |
498 | comps[n++] = eq->comps[comp_head]; | 527 | comps[n++] = eq->comps[comp_head]; |
499 | LEPP_QINC(comp_head); | 528 | LEPP_QINC(comp_head); |
500 | } | 529 | } |
501 | 530 | ||
502 | if (pending != NULL) | 531 | if (n < min_size) |
503 | *pending = (comp_head != comp_tail); | 532 | return 0; |
504 | 533 | ||
505 | eq->comp_head = comp_head; | 534 | eq->comp_head = comp_head; |
506 | 535 | ||
507 | spin_unlock(&priv->comp_lock); | ||
508 | |||
509 | return n; | 536 | return n; |
510 | } | 537 | } |
511 | 538 | ||
512 | 539 | ||
513 | /* | 540 | /* |
541 | * Free some comps, and return true iff there are still some pending. | ||
542 | */ | ||
543 | static bool tile_net_lepp_free_comps(struct net_device *dev, bool all) | ||
544 | { | ||
545 | struct tile_net_priv *priv = netdev_priv(dev); | ||
546 | |||
547 | lepp_queue_t *eq = priv->eq; | ||
548 | |||
549 | struct sk_buff *olds[64]; | ||
550 | unsigned int wanted = 64; | ||
551 | unsigned int i, n; | ||
552 | bool pending; | ||
553 | |||
554 | spin_lock(&priv->eq_lock); | ||
555 | |||
556 | if (all) | ||
557 | eq->comp_busy = eq->comp_tail; | ||
558 | |||
559 | n = tile_net_lepp_grab_comps(eq, olds, wanted, 0); | ||
560 | |||
561 | pending = (eq->comp_head != eq->comp_tail); | ||
562 | |||
563 | spin_unlock(&priv->eq_lock); | ||
564 | |||
565 | for (i = 0; i < n; i++) | ||
566 | kfree_skb(olds[i]); | ||
567 | |||
568 | return pending; | ||
569 | } | ||
570 | |||
571 | |||
572 | /* | ||
514 | * Make sure the egress timer is scheduled. | 573 | * Make sure the egress timer is scheduled. |
515 | * | 574 | * |
516 | * Note that we use "schedule if not scheduled" logic instead of the more | 575 | * Note that we use "schedule if not scheduled" logic instead of the more |
@@ -544,21 +603,11 @@ static void tile_net_handle_egress_timer(unsigned long arg) | |||
544 | struct tile_net_cpu *info = (struct tile_net_cpu *)arg; | 603 | struct tile_net_cpu *info = (struct tile_net_cpu *)arg; |
545 | struct net_device *dev = info->napi.dev; | 604 | struct net_device *dev = info->napi.dev; |
546 | 605 | ||
547 | struct sk_buff *olds[32]; | ||
548 | unsigned int wanted = 32; | ||
549 | unsigned int i, nolds = 0; | ||
550 | bool pending; | ||
551 | |||
552 | /* The timer is no longer scheduled. */ | 606 | /* The timer is no longer scheduled. */ |
553 | info->egress_timer_scheduled = false; | 607 | info->egress_timer_scheduled = false; |
554 | 608 | ||
555 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); | 609 | /* Free comps, and reschedule timer if more are pending. */ |
556 | 610 | if (tile_net_lepp_free_comps(dev, false)) | |
557 | for (i = 0; i < nolds; i++) | ||
558 | kfree_skb(olds[i]); | ||
559 | |||
560 | /* Reschedule timer if needed. */ | ||
561 | if (pending) | ||
562 | tile_net_schedule_egress_timer(info); | 611 | tile_net_schedule_egress_timer(info); |
563 | } | 612 | } |
564 | 613 | ||
@@ -636,8 +685,39 @@ static bool is_dup_ack(char *s1, char *s2, unsigned int len) | |||
636 | 685 | ||
637 | 686 | ||
638 | 687 | ||
688 | static void tile_net_discard_aux(struct tile_net_cpu *info, int index) | ||
689 | { | ||
690 | struct tile_netio_queue *queue = &info->queue; | ||
691 | netio_queue_impl_t *qsp = queue->__system_part; | ||
692 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
693 | |||
694 | int index2_aux = index + sizeof(netio_pkt_t); | ||
695 | int index2 = | ||
696 | ((index2_aux == | ||
697 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
698 | 0 : index2_aux); | ||
699 | |||
700 | netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index); | ||
701 | |||
702 | /* Extract the "linux_buffer_t". */ | ||
703 | unsigned int buffer = pkt->__packet.word; | ||
704 | |||
705 | /* Convert "linux_buffer_t" to "va". */ | ||
706 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
707 | |||
708 | /* Acquire the associated "skb". */ | ||
709 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
710 | struct sk_buff *skb = *skb_ptr; | ||
711 | |||
712 | kfree_skb(skb); | ||
713 | |||
714 | /* Consume this packet. */ | ||
715 | qup->__packet_receive_read = index2; | ||
716 | } | ||
717 | |||
718 | |||
639 | /* | 719 | /* |
640 | * Like "tile_net_handle_packets()", but just discard packets. | 720 | * Like "tile_net_poll()", but just discard packets. |
641 | */ | 721 | */ |
642 | static void tile_net_discard_packets(struct net_device *dev) | 722 | static void tile_net_discard_packets(struct net_device *dev) |
643 | { | 723 | { |
@@ -650,32 +730,8 @@ static void tile_net_discard_packets(struct net_device *dev) | |||
650 | 730 | ||
651 | while (qup->__packet_receive_read != | 731 | while (qup->__packet_receive_read != |
652 | qsp->__packet_receive_queue.__packet_write) { | 732 | qsp->__packet_receive_queue.__packet_write) { |
653 | |||
654 | int index = qup->__packet_receive_read; | 733 | int index = qup->__packet_receive_read; |
655 | 734 | tile_net_discard_aux(info, index); | |
656 | int index2_aux = index + sizeof(netio_pkt_t); | ||
657 | int index2 = | ||
658 | ((index2_aux == | ||
659 | qsp->__packet_receive_queue.__last_packet_plus_one) ? | ||
660 | 0 : index2_aux); | ||
661 | |||
662 | netio_pkt_t *pkt = (netio_pkt_t *) | ||
663 | ((unsigned long) &qsp[1] + index); | ||
664 | |||
665 | /* Extract the "linux_buffer_t". */ | ||
666 | unsigned int buffer = pkt->__packet.word; | ||
667 | |||
668 | /* Convert "linux_buffer_t" to "va". */ | ||
669 | void *va = __va((phys_addr_t)(buffer >> 1) << 7); | ||
670 | |||
671 | /* Acquire the associated "skb". */ | ||
672 | struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); | ||
673 | struct sk_buff *skb = *skb_ptr; | ||
674 | |||
675 | kfree_skb(skb); | ||
676 | |||
677 | /* Consume this packet. */ | ||
678 | qup->__packet_receive_read = index2; | ||
679 | } | 735 | } |
680 | } | 736 | } |
681 | 737 | ||
@@ -704,7 +760,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
704 | 760 | ||
705 | netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); | 761 | netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); |
706 | 762 | ||
707 | /* Extract the packet size. */ | 763 | /* Extract the packet size. FIXME: Shouldn't the second line */ |
764 | /* get subtracted? Mostly moot, since it should be "zero". */ | ||
708 | unsigned long len = | 765 | unsigned long len = |
709 | (NETIO_PKT_CUSTOM_LENGTH(pkt) + | 766 | (NETIO_PKT_CUSTOM_LENGTH(pkt) + |
710 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | 767 | NET_IP_ALIGN - NETIO_PACKET_PADDING); |
@@ -722,15 +779,6 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
722 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | 779 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ |
723 | unsigned char *buf = va + NET_IP_ALIGN; | 780 | unsigned char *buf = va + NET_IP_ALIGN; |
724 | 781 | ||
725 | #ifdef IGNORE_DUP_ACKS | ||
726 | |||
727 | static int other; | ||
728 | static int final; | ||
729 | static int keep; | ||
730 | static int skip; | ||
731 | |||
732 | #endif | ||
733 | |||
734 | /* Invalidate the packet buffer. */ | 782 | /* Invalidate the packet buffer. */ |
735 | if (!hash_default) | 783 | if (!hash_default) |
736 | __inv_buffer(buf, len); | 784 | __inv_buffer(buf, len); |
@@ -745,16 +793,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
745 | #ifdef TILE_NET_VERIFY_INGRESS | 793 | #ifdef TILE_NET_VERIFY_INGRESS |
746 | if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && | 794 | if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && |
747 | NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { | 795 | NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { |
748 | /* | 796 | /* Bug 6624: Includes UDP packets with a "zero" checksum. */ |
749 | * FIXME: This complains about UDP packets | ||
750 | * with a "zero" checksum (bug 6624). | ||
751 | */ | ||
752 | #ifdef TILE_NET_PANIC_ON_BAD | ||
753 | dump_packet(buf, len, "rx"); | ||
754 | panic("Bad L4 checksum."); | ||
755 | #else | ||
756 | pr_warning("Bad L4 checksum on %d byte packet.\n", len); | 797 | pr_warning("Bad L4 checksum on %d byte packet.\n", len); |
757 | #endif | ||
758 | } | 798 | } |
759 | if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && | 799 | if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && |
760 | NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { | 800 | NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { |
@@ -769,90 +809,29 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
769 | } | 809 | } |
770 | break; | 810 | break; |
771 | case NETIO_PKT_STATUS_BAD: | 811 | case NETIO_PKT_STATUS_BAD: |
772 | #ifdef TILE_NET_PANIC_ON_BAD | 812 | pr_warning("Unexpected BAD %ld byte packet.\n", len); |
773 | dump_packet(buf, len, "rx"); | ||
774 | panic("Unexpected BAD packet."); | ||
775 | #else | ||
776 | pr_warning("Unexpected BAD %d byte packet.\n", len); | ||
777 | #endif | ||
778 | } | 813 | } |
779 | #endif | 814 | #endif |
780 | 815 | ||
781 | filter = 0; | 816 | filter = 0; |
782 | 817 | ||
818 | /* ISSUE: Filter TCP packets with "bad" checksums? */ | ||
819 | |||
783 | if (!(dev->flags & IFF_UP)) { | 820 | if (!(dev->flags & IFF_UP)) { |
784 | /* Filter packets received before we're up. */ | 821 | /* Filter packets received before we're up. */ |
785 | filter = 1; | 822 | filter = 1; |
823 | } else if (NETIO_PKT_STATUS_M(metadata, pkt) == NETIO_PKT_STATUS_BAD) { | ||
824 | /* Filter "truncated" packets. */ | ||
825 | filter = 1; | ||
786 | } else if (!(dev->flags & IFF_PROMISC)) { | 826 | } else if (!(dev->flags & IFF_PROMISC)) { |
787 | /* | 827 | /* FIXME: Implement HW multicast filter. */ |
788 | * FIXME: Implement HW multicast filter. | 828 | if (!is_multicast_ether_addr(buf)) { |
789 | */ | ||
790 | if (is_unicast_ether_addr(buf)) { | ||
791 | /* Filter packets not for our address. */ | 829 | /* Filter packets not for our address. */ |
792 | const u8 *mine = dev->dev_addr; | 830 | const u8 *mine = dev->dev_addr; |
793 | filter = compare_ether_addr(mine, buf); | 831 | filter = compare_ether_addr(mine, buf); |
794 | } | 832 | } |
795 | } | 833 | } |
796 | 834 | ||
797 | #ifdef IGNORE_DUP_ACKS | ||
798 | |||
799 | if (len != 66) { | ||
800 | /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */ | ||
801 | |||
802 | other++; | ||
803 | |||
804 | } else if (index2 == | ||
805 | qsp->__packet_receive_queue.__packet_write) { | ||
806 | |||
807 | final++; | ||
808 | |||
809 | } else { | ||
810 | |||
811 | netio_pkt_t *pkt2 = (netio_pkt_t *) | ||
812 | ((unsigned long) &qsp[1] + index2); | ||
813 | |||
814 | netio_pkt_metadata_t *metadata2 = | ||
815 | NETIO_PKT_METADATA(pkt2); | ||
816 | |||
817 | /* Extract the packet size. */ | ||
818 | unsigned long len2 = | ||
819 | (NETIO_PKT_CUSTOM_LENGTH(pkt2) + | ||
820 | NET_IP_ALIGN - NETIO_PACKET_PADDING); | ||
821 | |||
822 | if (len2 == 66 && | ||
823 | NETIO_PKT_FLOW_HASH_M(metadata, pkt) == | ||
824 | NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) { | ||
825 | |||
826 | /* Extract the "linux_buffer_t". */ | ||
827 | unsigned int buffer2 = pkt2->__packet.word; | ||
828 | |||
829 | /* Convert "linux_buffer_t" to "va". */ | ||
830 | void *va2 = | ||
831 | __va((phys_addr_t)(buffer2 >> 1) << 7); | ||
832 | |||
833 | /* Extract the packet data pointer. */ | ||
834 | /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ | ||
835 | unsigned char *buf2 = va2 + NET_IP_ALIGN; | ||
836 | |||
837 | /* Invalidate the packet buffer. */ | ||
838 | if (!hash_default) | ||
839 | __inv_buffer(buf2, len2); | ||
840 | |||
841 | if (is_dup_ack(buf, buf2, len)) { | ||
842 | skip++; | ||
843 | filter = 1; | ||
844 | } else { | ||
845 | keep++; | ||
846 | } | ||
847 | } | ||
848 | } | ||
849 | |||
850 | if (net_ratelimit()) | ||
851 | pr_info("Other %d Final %d Keep %d Skip %d.\n", | ||
852 | other, final, keep, skip); | ||
853 | |||
854 | #endif | ||
855 | |||
856 | if (filter) { | 835 | if (filter) { |
857 | 836 | ||
858 | /* ISSUE: Update "drop" statistics? */ | 837 | /* ISSUE: Update "drop" statistics? */ |
@@ -877,10 +856,7 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
877 | /* NOTE: This call also sets "skb->dev = dev". */ | 856 | /* NOTE: This call also sets "skb->dev = dev". */ |
878 | skb->protocol = eth_type_trans(skb, dev); | 857 | skb->protocol = eth_type_trans(skb, dev); |
879 | 858 | ||
880 | /* ISSUE: Discard corrupt packets? */ | 859 | /* Avoid recomputing "good" TCP/UDP checksums. */ |
881 | /* ISSUE: Discard packets with bad checksums? */ | ||
882 | |||
883 | /* Avoid recomputing TCP/UDP checksums. */ | ||
884 | if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) | 860 | if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) |
885 | skb->ip_summed = CHECKSUM_UNNECESSARY; | 861 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
886 | 862 | ||
@@ -912,9 +888,14 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) | |||
912 | /* | 888 | /* |
913 | * Handle some packets for the given device on the current CPU. | 889 | * Handle some packets for the given device on the current CPU. |
914 | * | 890 | * |
915 | * ISSUE: The "rotting packet" race condition occurs if a packet | 891 | * If "tile_net_stop()" is called on some other tile while this |
916 | * arrives after the queue appears to be empty, and before the | 892 | * function is running, we will return, hopefully before that |
917 | * hypervisor interrupt is re-enabled. | 893 | * other tile asks us to call "napi_disable()". |
894 | * | ||
895 | * The "rotting packet" race condition occurs if a packet arrives | ||
896 | * during the extremely narrow window between the queue appearing to | ||
897 | * be empty, and the ingress interrupt being re-enabled. This happens | ||
898 | * a LOT under heavy network load. | ||
918 | */ | 899 | */ |
919 | static int tile_net_poll(struct napi_struct *napi, int budget) | 900 | static int tile_net_poll(struct napi_struct *napi, int budget) |
920 | { | 901 | { |
@@ -928,7 +909,7 @@ static int tile_net_poll(struct napi_struct *napi, int budget) | |||
928 | 909 | ||
929 | unsigned int work = 0; | 910 | unsigned int work = 0; |
930 | 911 | ||
931 | while (1) { | 912 | while (priv->active) { |
932 | int index = qup->__packet_receive_read; | 913 | int index = qup->__packet_receive_read; |
933 | if (index == qsp->__packet_receive_queue.__packet_write) | 914 | if (index == qsp->__packet_receive_queue.__packet_write) |
934 | break; | 915 | break; |
@@ -941,19 +922,24 @@ static int tile_net_poll(struct napi_struct *napi, int budget) | |||
941 | 922 | ||
942 | napi_complete(&info->napi); | 923 | napi_complete(&info->napi); |
943 | 924 | ||
944 | /* Re-enable hypervisor interrupts. */ | 925 | if (!priv->active) |
926 | goto done; | ||
927 | |||
928 | /* Re-enable the ingress interrupt. */ | ||
945 | enable_percpu_irq(priv->intr_id); | 929 | enable_percpu_irq(priv->intr_id); |
946 | 930 | ||
947 | /* HACK: Avoid the "rotting packet" problem. */ | 931 | /* HACK: Avoid the "rotting packet" problem (see above). */ |
948 | if (qup->__packet_receive_read != | 932 | if (qup->__packet_receive_read != |
949 | qsp->__packet_receive_queue.__packet_write) | 933 | qsp->__packet_receive_queue.__packet_write) { |
950 | napi_schedule(&info->napi); | 934 | /* ISSUE: Sometimes this returns zero, presumably */ |
951 | 935 | /* because an interrupt was handled for this tile. */ | |
952 | /* ISSUE: Handle completions? */ | 936 | (void)napi_reschedule(&info->napi); |
937 | } | ||
953 | 938 | ||
954 | done: | 939 | done: |
955 | 940 | ||
956 | tile_net_provide_needed_buffers(info); | 941 | if (priv->active) |
942 | tile_net_provide_needed_buffers(info); | ||
957 | 943 | ||
958 | return work; | 944 | return work; |
959 | } | 945 | } |
@@ -961,6 +947,12 @@ done: | |||
961 | 947 | ||
962 | /* | 948 | /* |
963 | * Handle an ingress interrupt for the given device on the current cpu. | 949 | * Handle an ingress interrupt for the given device on the current cpu. |
950 | * | ||
951 | * ISSUE: Sometimes this gets called after "disable_percpu_irq()" has | ||
952 | * been called! This is probably due to "pending hypervisor downcalls". | ||
953 | * | ||
954 | * ISSUE: Is there any race condition between the "napi_schedule()" here | ||
955 | * and the "napi_complete()" call above? | ||
964 | */ | 956 | */ |
965 | static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) | 957 | static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) |
966 | { | 958 | { |
@@ -969,9 +961,15 @@ static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) | |||
969 | int my_cpu = smp_processor_id(); | 961 | int my_cpu = smp_processor_id(); |
970 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 962 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
971 | 963 | ||
972 | /* Disable hypervisor interrupt. */ | 964 | /* Disable the ingress interrupt. */ |
973 | disable_percpu_irq(priv->intr_id); | 965 | disable_percpu_irq(priv->intr_id); |
974 | 966 | ||
967 | /* Ignore unwanted interrupts. */ | ||
968 | if (!priv->active) | ||
969 | return IRQ_HANDLED; | ||
970 | |||
971 | /* ISSUE: Sometimes "info->napi_enabled" is false here. */ | ||
972 | |||
975 | napi_schedule(&info->napi); | 973 | napi_schedule(&info->napi); |
976 | 974 | ||
977 | return IRQ_HANDLED; | 975 | return IRQ_HANDLED; |
@@ -1005,8 +1003,7 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1005 | */ | 1003 | */ |
1006 | { | 1004 | { |
1007 | int epp_home = hv_lotar_to_cpu(epp_lotar); | 1005 | int epp_home = hv_lotar_to_cpu(epp_lotar); |
1008 | struct page *page = virt_to_page(priv->epp_queue); | 1006 | homecache_change_page_home(priv->eq_pages, EQ_ORDER, epp_home); |
1009 | homecache_change_page_home(page, 0, epp_home); | ||
1010 | } | 1007 | } |
1011 | 1008 | ||
1012 | /* | 1009 | /* |
@@ -1015,9 +1012,9 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1015 | { | 1012 | { |
1016 | netio_ipp_address_t ea = { | 1013 | netio_ipp_address_t ea = { |
1017 | .va = 0, | 1014 | .va = 0, |
1018 | .pa = __pa(priv->epp_queue), | 1015 | .pa = __pa(priv->eq), |
1019 | .pte = hv_pte(0), | 1016 | .pte = hv_pte(0), |
1020 | .size = PAGE_SIZE, | 1017 | .size = EQ_SIZE, |
1021 | }; | 1018 | }; |
1022 | ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); | 1019 | ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); |
1023 | ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); | 1020 | ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); |
@@ -1043,7 +1040,7 @@ static int tile_net_open_aux(struct net_device *dev) | |||
1043 | 1040 | ||
1044 | 1041 | ||
1045 | /* | 1042 | /* |
1046 | * Register with hypervisor on each CPU. | 1043 | * Register with hypervisor on the current CPU. |
1047 | * | 1044 | * |
1048 | * Strangely, this function does important things even if it "fails", | 1045 | * Strangely, this function does important things even if it "fails", |
1049 | * which is especially common if the link is not up yet. Hopefully | 1046 | * which is especially common if the link is not up yet. Hopefully |
@@ -1092,7 +1089,8 @@ static void tile_net_register(void *dev_ptr) | |||
1092 | priv->cpu[my_cpu] = info; | 1089 | priv->cpu[my_cpu] = info; |
1093 | 1090 | ||
1094 | /* | 1091 | /* |
1095 | * Register ourselves with the IPP. | 1092 | * Register ourselves with LIPP. This does a lot of stuff, |
1093 | * including invoking the LIPP registration code. | ||
1096 | */ | 1094 | */ |
1097 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, | 1095 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, |
1098 | (HV_VirtAddr)&config, | 1096 | (HV_VirtAddr)&config, |
@@ -1101,8 +1099,11 @@ static void tile_net_register(void *dev_ptr) | |||
1101 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", | 1099 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", |
1102 | ret); | 1100 | ret); |
1103 | if (ret < 0) { | 1101 | if (ret < 0) { |
1104 | printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" | 1102 | if (ret != NETIO_LINK_DOWN) { |
1105 | " failure %d\n", ret); | 1103 | printk(KERN_DEBUG "hv_dev_pwrite " |
1104 | "NETIO_IPP_INPUT_REGISTER_OFF failure %d\n", | ||
1105 | ret); | ||
1106 | } | ||
1106 | info->link_down = (ret == NETIO_LINK_DOWN); | 1107 | info->link_down = (ret == NETIO_LINK_DOWN); |
1107 | return; | 1108 | return; |
1108 | } | 1109 | } |
@@ -1145,15 +1146,47 @@ static void tile_net_register(void *dev_ptr) | |||
1145 | NETIO_IPP_GET_FASTIO_OFF); | 1146 | NETIO_IPP_GET_FASTIO_OFF); |
1146 | PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); | 1147 | PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); |
1147 | 1148 | ||
1148 | netif_napi_add(dev, &info->napi, tile_net_poll, 64); | ||
1149 | |||
1150 | /* Now we are registered. */ | 1149 | /* Now we are registered. */ |
1151 | info->registered = true; | 1150 | info->registered = true; |
1152 | } | 1151 | } |
1153 | 1152 | ||
1154 | 1153 | ||
1155 | /* | 1154 | /* |
1156 | * Unregister with hypervisor on each CPU. | 1155 | * Deregister with hypervisor on the current CPU. |
1156 | * | ||
1157 | * This simply discards all our credits, so no more packets will be | ||
1158 | * delivered to this tile. There may still be packets in our queue. | ||
1159 | * | ||
1160 | * Also, disable the ingress interrupt. | ||
1161 | */ | ||
1162 | static void tile_net_deregister(void *dev_ptr) | ||
1163 | { | ||
1164 | struct net_device *dev = (struct net_device *)dev_ptr; | ||
1165 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1166 | int my_cpu = smp_processor_id(); | ||
1167 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | ||
1168 | |||
1169 | /* Disable the ingress interrupt. */ | ||
1170 | disable_percpu_irq(priv->intr_id); | ||
1171 | |||
1172 | /* Do nothing else if not registered. */ | ||
1173 | if (info == NULL || !info->registered) | ||
1174 | return; | ||
1175 | |||
1176 | { | ||
1177 | struct tile_netio_queue *queue = &info->queue; | ||
1178 | netio_queue_user_impl_t *qup = &queue->__user_part; | ||
1179 | |||
1180 | /* Discard all our credits. */ | ||
1181 | __netio_fastio_return_credits(qup->__fastio_index, -1); | ||
1182 | } | ||
1183 | } | ||
1184 | |||
1185 | |||
1186 | /* | ||
1187 | * Unregister with hypervisor on the current CPU. | ||
1188 | * | ||
1189 | * Also, disable the ingress interrupt. | ||
1157 | */ | 1190 | */ |
1158 | static void tile_net_unregister(void *dev_ptr) | 1191 | static void tile_net_unregister(void *dev_ptr) |
1159 | { | 1192 | { |
@@ -1162,35 +1195,23 @@ static void tile_net_unregister(void *dev_ptr) | |||
1162 | int my_cpu = smp_processor_id(); | 1195 | int my_cpu = smp_processor_id(); |
1163 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1196 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1164 | 1197 | ||
1165 | int ret = 0; | 1198 | int ret; |
1166 | int dummy = 0; | 1199 | int dummy = 0; |
1167 | 1200 | ||
1168 | /* Do nothing if never registered. */ | 1201 | /* Disable the ingress interrupt. */ |
1169 | if (info == NULL) | 1202 | disable_percpu_irq(priv->intr_id); |
1170 | return; | ||
1171 | 1203 | ||
1172 | /* Do nothing if already unregistered. */ | 1204 | /* Do nothing else if not registered. */ |
1173 | if (!info->registered) | 1205 | if (info == NULL || !info->registered) |
1174 | return; | 1206 | return; |
1175 | 1207 | ||
1176 | /* | 1208 | /* Unregister ourselves with LIPP/LEPP. */ |
1177 | * Unregister ourselves with LIPP. | ||
1178 | */ | ||
1179 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1209 | ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, |
1180 | sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); | 1210 | sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); |
1181 | PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", | 1211 | if (ret < 0) |
1182 | ret); | 1212 | panic("Failed to unregister with LIPP/LEPP!\n"); |
1183 | if (ret < 0) { | ||
1184 | /* FIXME: Just panic? */ | ||
1185 | pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF" | ||
1186 | " failure %d\n", ret); | ||
1187 | } | ||
1188 | 1213 | ||
1189 | /* | 1214 | /* Discard all packets still in our NetIO queue. */ |
1190 | * Discard all packets still in our NetIO queue. Hopefully, | ||
1191 | * once the unregister call is complete, there will be no | ||
1192 | * packets still in flight on the IDN. | ||
1193 | */ | ||
1194 | tile_net_discard_packets(dev); | 1215 | tile_net_discard_packets(dev); |
1195 | 1216 | ||
1196 | /* Reset state. */ | 1217 | /* Reset state. */ |
@@ -1200,11 +1221,6 @@ static void tile_net_unregister(void *dev_ptr) | |||
1200 | /* Cancel egress timer. */ | 1221 | /* Cancel egress timer. */ |
1201 | del_timer(&info->egress_timer); | 1222 | del_timer(&info->egress_timer); |
1202 | info->egress_timer_scheduled = false; | 1223 | info->egress_timer_scheduled = false; |
1203 | |||
1204 | netif_napi_del(&info->napi); | ||
1205 | |||
1206 | /* Now we are unregistered. */ | ||
1207 | info->registered = false; | ||
1208 | } | 1224 | } |
1209 | 1225 | ||
1210 | 1226 | ||
@@ -1212,18 +1228,28 @@ static void tile_net_unregister(void *dev_ptr) | |||
1212 | * Helper function for "tile_net_stop()". | 1228 | * Helper function for "tile_net_stop()". |
1213 | * | 1229 | * |
1214 | * Also used to handle registration failure in "tile_net_open_inner()", | 1230 | * Also used to handle registration failure in "tile_net_open_inner()", |
1215 | * when "fully_opened" is known to be false, and the various extra | 1231 | * when the various extra steps in "tile_net_stop()" are not necessary. |
1216 | * steps in "tile_net_stop()" are not necessary. ISSUE: It might be | ||
1217 | * simpler if we could just call "tile_net_stop()" anyway. | ||
1218 | */ | 1232 | */ |
1219 | static void tile_net_stop_aux(struct net_device *dev) | 1233 | static void tile_net_stop_aux(struct net_device *dev) |
1220 | { | 1234 | { |
1221 | struct tile_net_priv *priv = netdev_priv(dev); | 1235 | struct tile_net_priv *priv = netdev_priv(dev); |
1236 | int i; | ||
1222 | 1237 | ||
1223 | int dummy = 0; | 1238 | int dummy = 0; |
1224 | 1239 | ||
1225 | /* Unregister all tiles, so LIPP will stop delivering packets. */ | 1240 | /* |
1241 | * Unregister all tiles, so LIPP will stop delivering packets. | ||
1242 | * Also, delete all the "napi" objects (sequentially, to protect | ||
1243 | * "dev->napi_list"). | ||
1244 | */ | ||
1226 | on_each_cpu(tile_net_unregister, (void *)dev, 1); | 1245 | on_each_cpu(tile_net_unregister, (void *)dev, 1); |
1246 | for_each_online_cpu(i) { | ||
1247 | struct tile_net_cpu *info = priv->cpu[i]; | ||
1248 | if (info != NULL && info->registered) { | ||
1249 | netif_napi_del(&info->napi); | ||
1250 | info->registered = false; | ||
1251 | } | ||
1252 | } | ||
1227 | 1253 | ||
1228 | /* Stop LIPP/LEPP. */ | 1254 | /* Stop LIPP/LEPP. */ |
1229 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1255 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, |
@@ -1235,18 +1261,15 @@ static void tile_net_stop_aux(struct net_device *dev) | |||
1235 | 1261 | ||
1236 | 1262 | ||
1237 | /* | 1263 | /* |
1238 | * Disable ingress interrupts for the given device on the current cpu. | 1264 | * Disable NAPI for the given device on the current cpu. |
1239 | */ | 1265 | */ |
1240 | static void tile_net_disable_intr(void *dev_ptr) | 1266 | static void tile_net_stop_disable(void *dev_ptr) |
1241 | { | 1267 | { |
1242 | struct net_device *dev = (struct net_device *)dev_ptr; | 1268 | struct net_device *dev = (struct net_device *)dev_ptr; |
1243 | struct tile_net_priv *priv = netdev_priv(dev); | 1269 | struct tile_net_priv *priv = netdev_priv(dev); |
1244 | int my_cpu = smp_processor_id(); | 1270 | int my_cpu = smp_processor_id(); |
1245 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1271 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1246 | 1272 | ||
1247 | /* Disable hypervisor interrupt. */ | ||
1248 | disable_percpu_irq(priv->intr_id); | ||
1249 | |||
1250 | /* Disable NAPI if needed. */ | 1273 | /* Disable NAPI if needed. */ |
1251 | if (info != NULL && info->napi_enabled) { | 1274 | if (info != NULL && info->napi_enabled) { |
1252 | napi_disable(&info->napi); | 1275 | napi_disable(&info->napi); |
@@ -1256,21 +1279,24 @@ static void tile_net_disable_intr(void *dev_ptr) | |||
1256 | 1279 | ||
1257 | 1280 | ||
1258 | /* | 1281 | /* |
1259 | * Enable ingress interrupts for the given device on the current cpu. | 1282 | * Enable NAPI and the ingress interrupt for the given device |
1283 | * on the current cpu. | ||
1284 | * | ||
1285 | * ISSUE: Only do this for "network cpus"? | ||
1260 | */ | 1286 | */ |
1261 | static void tile_net_enable_intr(void *dev_ptr) | 1287 | static void tile_net_open_enable(void *dev_ptr) |
1262 | { | 1288 | { |
1263 | struct net_device *dev = (struct net_device *)dev_ptr; | 1289 | struct net_device *dev = (struct net_device *)dev_ptr; |
1264 | struct tile_net_priv *priv = netdev_priv(dev); | 1290 | struct tile_net_priv *priv = netdev_priv(dev); |
1265 | int my_cpu = smp_processor_id(); | 1291 | int my_cpu = smp_processor_id(); |
1266 | struct tile_net_cpu *info = priv->cpu[my_cpu]; | 1292 | struct tile_net_cpu *info = priv->cpu[my_cpu]; |
1267 | 1293 | ||
1268 | /* Enable hypervisor interrupt. */ | ||
1269 | enable_percpu_irq(priv->intr_id); | ||
1270 | |||
1271 | /* Enable NAPI. */ | 1294 | /* Enable NAPI. */ |
1272 | napi_enable(&info->napi); | 1295 | napi_enable(&info->napi); |
1273 | info->napi_enabled = true; | 1296 | info->napi_enabled = true; |
1297 | |||
1298 | /* Enable the ingress interrupt. */ | ||
1299 | enable_percpu_irq(priv->intr_id); | ||
1274 | } | 1300 | } |
1275 | 1301 | ||
1276 | 1302 | ||
@@ -1288,8 +1314,9 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1288 | int my_cpu = smp_processor_id(); | 1314 | int my_cpu = smp_processor_id(); |
1289 | struct tile_net_cpu *info; | 1315 | struct tile_net_cpu *info; |
1290 | struct tile_netio_queue *queue; | 1316 | struct tile_netio_queue *queue; |
1291 | unsigned int irq; | 1317 | int result = 0; |
1292 | int i; | 1318 | int i; |
1319 | int dummy = 0; | ||
1293 | 1320 | ||
1294 | /* | 1321 | /* |
1295 | * First try to register just on the local CPU, and handle any | 1322 | * First try to register just on the local CPU, and handle any |
@@ -1307,42 +1334,52 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1307 | /* | 1334 | /* |
1308 | * Now register everywhere else. If any registration fails, | 1335 | * Now register everywhere else. If any registration fails, |
1309 | * even for "link down" (which might not be possible), we | 1336 | * even for "link down" (which might not be possible), we |
1310 | * clean up using "tile_net_stop_aux()". | 1337 | * clean up using "tile_net_stop_aux()". Also, add all the |
1338 | * "napi" objects (sequentially, to protect "dev->napi_list"). | ||
1339 | * ISSUE: Only use "netif_napi_add()" for "network cpus"? | ||
1311 | */ | 1340 | */ |
1312 | smp_call_function(tile_net_register, (void *)dev, 1); | 1341 | smp_call_function(tile_net_register, (void *)dev, 1); |
1313 | for_each_online_cpu(i) { | 1342 | for_each_online_cpu(i) { |
1314 | if (!priv->cpu[i]->registered) { | 1343 | struct tile_net_cpu *info = priv->cpu[i]; |
1315 | tile_net_stop_aux(dev); | 1344 | if (info->registered) |
1316 | return -EAGAIN; | 1345 | netif_napi_add(dev, &info->napi, tile_net_poll, 64); |
1317 | } | 1346 | else |
1347 | result = -EAGAIN; | ||
1348 | } | ||
1349 | if (result != 0) { | ||
1350 | tile_net_stop_aux(dev); | ||
1351 | return result; | ||
1318 | } | 1352 | } |
1319 | 1353 | ||
1320 | queue = &info->queue; | 1354 | queue = &info->queue; |
1321 | 1355 | ||
1322 | /* | 1356 | if (priv->intr_id == 0) { |
1323 | * Set the device intr bit mask. | 1357 | unsigned int irq; |
1324 | * The tile_net_register above sets per tile __intr_id. | ||
1325 | */ | ||
1326 | priv->intr_id = queue->__system_part->__intr_id; | ||
1327 | BUG_ON(!priv->intr_id); | ||
1328 | |||
1329 | /* | ||
1330 | * Register the device interrupt handler. | ||
1331 | * The __ffs() function returns the index into the interrupt handler | ||
1332 | * table from the interrupt bit mask which should have one bit | ||
1333 | * and one bit only set. | ||
1334 | */ | ||
1335 | irq = __ffs(priv->intr_id); | ||
1336 | tile_irq_activate(irq, TILE_IRQ_PERCPU); | ||
1337 | BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, | ||
1338 | 0, dev->name, (void *)dev) != 0); | ||
1339 | 1358 | ||
1340 | /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ | 1359 | /* |
1341 | 1360 | * Acquire the irq allocated by the hypervisor. Every | |
1342 | if (!priv->fully_opened) { | 1361 | * queue gets the same irq. The "__intr_id" field is |
1362 | * "1 << irq", so we use "__ffs()" to extract "irq". | ||
1363 | */ | ||
1364 | priv->intr_id = queue->__system_part->__intr_id; | ||
1365 | BUG_ON(priv->intr_id == 0); | ||
1366 | irq = __ffs(priv->intr_id); | ||
1343 | 1367 | ||
1344 | int dummy = 0; | 1368 | /* |
1369 | * Register the ingress interrupt handler for this | ||
1370 | * device, permanently. | ||
1371 | * | ||
1372 | * We used to call "free_irq()" in "tile_net_stop()", | ||
1373 | * and then re-register the handler here every time, | ||
1374 | * but that caused DNP errors in "handle_IRQ_event()" | ||
1375 | * because "desc->action" was NULL. See bug 9143. | ||
1376 | */ | ||
1377 | tile_irq_activate(irq, TILE_IRQ_PERCPU); | ||
1378 | BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, | ||
1379 | 0, dev->name, (void *)dev) != 0); | ||
1380 | } | ||
1345 | 1381 | ||
1382 | { | ||
1346 | /* Allocate initial buffers. */ | 1383 | /* Allocate initial buffers. */ |
1347 | 1384 | ||
1348 | int max_buffers = | 1385 | int max_buffers = |
@@ -1359,18 +1396,21 @@ static int tile_net_open_inner(struct net_device *dev) | |||
1359 | if (info->num_needed_small_buffers != 0 || | 1396 | if (info->num_needed_small_buffers != 0 || |
1360 | info->num_needed_large_buffers != 0) | 1397 | info->num_needed_large_buffers != 0) |
1361 | panic("Insufficient memory for buffer stack!"); | 1398 | panic("Insufficient memory for buffer stack!"); |
1399 | } | ||
1362 | 1400 | ||
1363 | /* Start LIPP/LEPP and activate "ingress" at the shim. */ | 1401 | /* We are about to be active. */ |
1364 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | 1402 | priv->active = true; |
1365 | sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) | ||
1366 | panic("Failed to activate the LIPP Shim!\n"); | ||
1367 | 1403 | ||
1368 | priv->fully_opened = 1; | 1404 | /* Make sure "active" is visible to all tiles. */ |
1369 | } | 1405 | mb(); |
1370 | 1406 | ||
1371 | /* On each tile, enable the hypervisor to trigger interrupts. */ | 1407 | /* On each tile, enable NAPI and the ingress interrupt. */ |
1372 | /* ISSUE: Do this before starting LIPP/LEPP? */ | 1408 | on_each_cpu(tile_net_open_enable, (void *)dev, 1); |
1373 | on_each_cpu(tile_net_enable_intr, (void *)dev, 1); | 1409 | |
1410 | /* Start LIPP/LEPP and activate "ingress" at the shim. */ | ||
1411 | if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, | ||
1412 | sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) | ||
1413 | panic("Failed to activate the LIPP Shim!\n"); | ||
1374 | 1414 | ||
1375 | /* Start our transmit queue. */ | 1415 | /* Start our transmit queue. */ |
1376 | netif_start_queue(dev); | 1416 | netif_start_queue(dev); |
@@ -1396,9 +1436,9 @@ static void tile_net_open_retry(struct work_struct *w) | |||
1396 | * ourselves to try again later; otherwise, tell Linux we now have | 1436 | * ourselves to try again later; otherwise, tell Linux we now have |
1397 | * a working link. ISSUE: What if the return value is negative? | 1437 | * a working link. ISSUE: What if the return value is negative? |
1398 | */ | 1438 | */ |
1399 | if (tile_net_open_inner(priv->dev)) | 1439 | if (tile_net_open_inner(priv->dev) != 0) |
1400 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | 1440 | schedule_delayed_work(&priv->retry_work, |
1401 | TILE_NET_RETRY_INTERVAL); | 1441 | TILE_NET_RETRY_INTERVAL); |
1402 | else | 1442 | else |
1403 | netif_carrier_on(priv->dev); | 1443 | netif_carrier_on(priv->dev); |
1404 | } | 1444 | } |
@@ -1412,8 +1452,8 @@ static void tile_net_open_retry(struct work_struct *w) | |||
1412 | * The open entry point is called when a network interface is made | 1452 | * The open entry point is called when a network interface is made |
1413 | * active by the system (IFF_UP). At this point all resources needed | 1453 | * active by the system (IFF_UP). At this point all resources needed |
1414 | * for transmit and receive operations are allocated, the interrupt | 1454 | * for transmit and receive operations are allocated, the interrupt |
1415 | * handler is registered with the OS, the watchdog timer is started, | 1455 | * handler is registered with the OS (if needed), the watchdog timer |
1416 | * and the stack is notified that the interface is ready. | 1456 | * is started, and the stack is notified that the interface is ready. |
1417 | * | 1457 | * |
1418 | * If the actual link is not available yet, then we tell Linux that | 1458 | * If the actual link is not available yet, then we tell Linux that |
1419 | * we have no carrier, and we keep checking until the link comes up. | 1459 | * we have no carrier, and we keep checking until the link comes up. |
@@ -1468,6 +1508,10 @@ static int tile_net_open(struct net_device *dev) | |||
1468 | #endif | 1508 | #endif |
1469 | 1509 | ||
1470 | priv->partly_opened = 1; | 1510 | priv->partly_opened = 1; |
1511 | |||
1512 | } else { | ||
1513 | /* FIXME: Is this possible? */ | ||
1514 | /* printk("Already partly opened.\n"); */ | ||
1471 | } | 1515 | } |
1472 | 1516 | ||
1473 | /* | 1517 | /* |
@@ -1487,57 +1531,17 @@ static int tile_net_open(struct net_device *dev) | |||
1487 | * and then remember to try again later. | 1531 | * and then remember to try again later. |
1488 | */ | 1532 | */ |
1489 | netif_carrier_off(dev); | 1533 | netif_carrier_off(dev); |
1490 | schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, | 1534 | schedule_delayed_work(&priv->retry_work, TILE_NET_RETRY_INTERVAL); |
1491 | TILE_NET_RETRY_INTERVAL); | ||
1492 | 1535 | ||
1493 | return 0; | 1536 | return 0; |
1494 | } | 1537 | } |
1495 | 1538 | ||
1496 | 1539 | ||
1497 | /* | 1540 | static int tile_net_drain_lipp_buffers(struct tile_net_priv *priv) |
1498 | * Disables a network interface. | ||
1499 | * | ||
1500 | * Returns 0, this is not allowed to fail. | ||
1501 | * | ||
1502 | * The close entry point is called when an interface is de-activated | ||
1503 | * by the OS. The hardware is still under the drivers control, but | ||
1504 | * needs to be disabled. A global MAC reset is issued to stop the | ||
1505 | * hardware, and all transmit and receive resources are freed. | ||
1506 | * | ||
1507 | * ISSUE: Can this can be called while "tile_net_poll()" is running? | ||
1508 | */ | ||
1509 | static int tile_net_stop(struct net_device *dev) | ||
1510 | { | 1541 | { |
1511 | struct tile_net_priv *priv = netdev_priv(dev); | 1542 | int n = 0; |
1512 | |||
1513 | bool pending = true; | ||
1514 | |||
1515 | PDEBUG("tile_net_stop()\n"); | ||
1516 | |||
1517 | /* ISSUE: Only needed if not yet fully open. */ | ||
1518 | cancel_delayed_work_sync(&priv->retry_work); | ||
1519 | |||
1520 | /* Can't transmit any more. */ | ||
1521 | netif_stop_queue(dev); | ||
1522 | |||
1523 | /* | ||
1524 | * Disable hypervisor interrupts on each tile. | ||
1525 | */ | ||
1526 | on_each_cpu(tile_net_disable_intr, (void *)dev, 1); | ||
1527 | |||
1528 | /* | ||
1529 | * Unregister the interrupt handler. | ||
1530 | * The __ffs() function returns the index into the interrupt handler | ||
1531 | * table from the interrupt bit mask which should have one bit | ||
1532 | * and one bit only set. | ||
1533 | */ | ||
1534 | if (priv->intr_id) | ||
1535 | free_irq(__ffs(priv->intr_id), dev); | ||
1536 | |||
1537 | /* | ||
1538 | * Drain all the LIPP buffers. | ||
1539 | */ | ||
1540 | 1543 | ||
1544 | /* Drain all the LIPP buffers. */ | ||
1541 | while (true) { | 1545 | while (true) { |
1542 | int buffer; | 1546 | int buffer; |
1543 | 1547 | ||
@@ -1560,43 +1564,105 @@ static int tile_net_stop(struct net_device *dev) | |||
1560 | 1564 | ||
1561 | kfree_skb(skb); | 1565 | kfree_skb(skb); |
1562 | } | 1566 | } |
1567 | |||
1568 | n++; | ||
1563 | } | 1569 | } |
1564 | 1570 | ||
1565 | /* Stop LIPP/LEPP. */ | 1571 | return n; |
1566 | tile_net_stop_aux(dev); | 1572 | } |
1567 | 1573 | ||
1568 | 1574 | ||
1569 | priv->fully_opened = 0; | 1575 | /* |
1576 | * Disables a network interface. | ||
1577 | * | ||
1578 | * Returns 0, this is not allowed to fail. | ||
1579 | * | ||
1580 | * The close entry point is called when an interface is de-activated | ||
1581 | * by the OS. The hardware is still under the drivers control, but | ||
1582 | * needs to be disabled. A global MAC reset is issued to stop the | ||
1583 | * hardware, and all transmit and receive resources are freed. | ||
1584 | * | ||
1585 | * ISSUE: How closely does "netif_running(dev)" mirror "priv->active"? | ||
1586 | * | ||
1587 | * Before we are called by "__dev_close()", "netif_running()" will | ||
1588 | * have been cleared, so no NEW calls to "tile_net_poll()" will be | ||
1589 | * made by "netpoll_poll_dev()". | ||
1590 | * | ||
1591 | * Often, this can cause some tiles to still have packets in their | ||
1592 | * queues, so we must call "tile_net_discard_packets()" later. | ||
1593 | * | ||
1594 | * Note that some other tile may still be INSIDE "tile_net_poll()", | ||
1595 | * and in fact, many will be, if there is heavy network load. | ||
1596 | * | ||
1597 | * Calling "on_each_cpu(tile_net_stop_disable, (void *)dev, 1)" when | ||
1598 | * any tile is still "napi_schedule()"'d will induce a horrible crash | ||
1599 | * when "msleep()" is called. This includes tiles which are inside | ||
1600 | * "tile_net_poll()" which have not yet called "napi_complete()". | ||
1601 | * | ||
1602 | * So, we must first try to wait long enough for other tiles to finish | ||
1603 | * with any current "tile_net_poll()" call, and, hopefully, to clear | ||
1604 | * the "scheduled" flag. ISSUE: It is unclear what happens to tiles | ||
1605 | * which have called "napi_schedule()" but which had not yet tried to | ||
1606 | * call "tile_net_poll()", or which exhausted their budget inside | ||
1607 | * "tile_net_poll()" just before this function was called. | ||
1608 | */ | ||
1609 | static int tile_net_stop(struct net_device *dev) | ||
1610 | { | ||
1611 | struct tile_net_priv *priv = netdev_priv(dev); | ||
1612 | |||
1613 | PDEBUG("tile_net_stop()\n"); | ||
1570 | 1614 | ||
1615 | /* Start discarding packets. */ | ||
1616 | priv->active = false; | ||
1617 | |||
1618 | /* Make sure "active" is visible to all tiles. */ | ||
1619 | mb(); | ||
1571 | 1620 | ||
1572 | /* | 1621 | /* |
1573 | * XXX: ISSUE: It appears that, in practice anyway, by the | 1622 | * On each tile, make sure no NEW packets get delivered, and |
1574 | * time we get here, there are no pending completions. | 1623 | * disable the ingress interrupt. |
1624 | * | ||
1625 | * Note that the ingress interrupt can fire AFTER this, | ||
1626 | * presumably due to packets which were recently delivered, | ||
1627 | * but it will have no effect. | ||
1575 | */ | 1628 | */ |
1576 | while (pending) { | 1629 | on_each_cpu(tile_net_deregister, (void *)dev, 1); |
1577 | 1630 | ||
1578 | struct sk_buff *olds[32]; | 1631 | /* Optimistically drain LIPP buffers. */ |
1579 | unsigned int wanted = 32; | 1632 | (void)tile_net_drain_lipp_buffers(priv); |
1580 | unsigned int i, nolds = 0; | ||
1581 | 1633 | ||
1582 | nolds = tile_net_lepp_grab_comps(dev, olds, | 1634 | /* ISSUE: Only needed if not yet fully open. */ |
1583 | wanted, &pending); | 1635 | cancel_delayed_work_sync(&priv->retry_work); |
1584 | 1636 | ||
1585 | /* ISSUE: We have never actually seen this debug spew. */ | 1637 | /* Can't transmit any more. */ |
1586 | if (nolds != 0) | 1638 | netif_stop_queue(dev); |
1587 | pr_info("During tile_net_stop(), grabbed %d comps.\n", | ||
1588 | nolds); | ||
1589 | 1639 | ||
1590 | for (i = 0; i < nolds; i++) | 1640 | /* Disable NAPI on each tile. */ |
1591 | kfree_skb(olds[i]); | 1641 | on_each_cpu(tile_net_stop_disable, (void *)dev, 1); |
1592 | } | 1642 | |
1643 | /* | ||
1644 | * Drain any remaining LIPP buffers. NOTE: This "printk()" | ||
1645 | * has never been observed, but in theory it could happen. | ||
1646 | */ | ||
1647 | if (tile_net_drain_lipp_buffers(priv) != 0) | ||
1648 | printk("Had to drain some extra LIPP buffers!\n"); | ||
1593 | 1649 | ||
1650 | /* Stop LIPP/LEPP. */ | ||
1651 | tile_net_stop_aux(dev); | ||
1652 | |||
1653 | /* | ||
1654 | * ISSUE: It appears that, in practice anyway, by the time we | ||
1655 | * get here, there are no pending completions, but just in case, | ||
1656 | * we free (all of) them anyway. | ||
1657 | */ | ||
1658 | while (tile_net_lepp_free_comps(dev, true)) | ||
1659 | /* loop */; | ||
1594 | 1660 | ||
1595 | /* Wipe the EPP queue. */ | 1661 | /* Wipe the EPP queue. */ |
1596 | memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); | 1662 | memset(priv->eq, 0, sizeof(lepp_queue_t)); |
1597 | 1663 | ||
1598 | /* Evict the EPP queue. */ | 1664 | /* Evict the EPP queue. */ |
1599 | finv_buffer(priv->epp_queue, PAGE_SIZE); | 1665 | finv_buffer(priv->eq, EQ_SIZE); |
1600 | 1666 | ||
1601 | return 0; | 1667 | return 0; |
1602 | } | 1668 | } |
@@ -1620,7 +1686,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, | |||
1620 | if (b_len != 0) { | 1686 | if (b_len != 0) { |
1621 | 1687 | ||
1622 | if (!hash_default) | 1688 | if (!hash_default) |
1623 | finv_buffer_remote(b_data, b_len); | 1689 | finv_buffer_remote(b_data, b_len, 0); |
1624 | 1690 | ||
1625 | cpa = __pa(b_data); | 1691 | cpa = __pa(b_data); |
1626 | frags[n].cpa_lo = cpa; | 1692 | frags[n].cpa_lo = cpa; |
@@ -1643,7 +1709,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, | |||
1643 | if (!hash_default) { | 1709 | if (!hash_default) { |
1644 | void *va = pfn_to_kaddr(pfn) + f->page_offset; | 1710 | void *va = pfn_to_kaddr(pfn) + f->page_offset; |
1645 | BUG_ON(PageHighMem(f->page)); | 1711 | BUG_ON(PageHighMem(f->page)); |
1646 | finv_buffer_remote(va, f->size); | 1712 | finv_buffer_remote(va, f->size, 0); |
1647 | } | 1713 | } |
1648 | 1714 | ||
1649 | cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; | 1715 | cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; |
@@ -1742,17 +1808,15 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) | |||
1742 | 1808 | ||
1743 | unsigned long irqflags; | 1809 | unsigned long irqflags; |
1744 | 1810 | ||
1745 | lepp_queue_t *eq = priv->epp_queue; | 1811 | lepp_queue_t *eq = priv->eq; |
1746 | 1812 | ||
1747 | struct sk_buff *olds[4]; | 1813 | struct sk_buff *olds[8]; |
1748 | unsigned int wanted = 4; | 1814 | unsigned int wanted = 8; |
1749 | unsigned int i, nolds = 0; | 1815 | unsigned int i, nolds = 0; |
1750 | 1816 | ||
1751 | unsigned int cmd_head, cmd_tail, cmd_next; | 1817 | unsigned int cmd_head, cmd_tail, cmd_next; |
1752 | unsigned int comp_tail; | 1818 | unsigned int comp_tail; |
1753 | 1819 | ||
1754 | unsigned int free_slots; | ||
1755 | |||
1756 | 1820 | ||
1757 | /* Paranoia. */ | 1821 | /* Paranoia. */ |
1758 | BUG_ON(skb->protocol != htons(ETH_P_IP)); | 1822 | BUG_ON(skb->protocol != htons(ETH_P_IP)); |
@@ -1780,34 +1844,32 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) | |||
1780 | 1844 | ||
1781 | /* Enqueue the command. */ | 1845 | /* Enqueue the command. */ |
1782 | 1846 | ||
1783 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | 1847 | spin_lock_irqsave(&priv->eq_lock, irqflags); |
1784 | 1848 | ||
1785 | /* | 1849 | /* |
1786 | * Handle completions if needed to make room. | 1850 | * Handle completions if needed to make room. |
1787 | * HACK: Spin until there is sufficient room. | 1851 | * HACK: Spin until there is sufficient room. |
1788 | */ | 1852 | */ |
1789 | free_slots = lepp_num_free_comp_slots(eq); | 1853 | if (lepp_num_free_comp_slots(eq) == 0) { |
1790 | if (free_slots < 1) { | 1854 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); |
1791 | spin: | 1855 | if (nolds == 0) { |
1792 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | 1856 | busy: |
1793 | wanted - nolds, NULL); | 1857 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); |
1794 | if (lepp_num_free_comp_slots(eq) < 1) | 1858 | return NETDEV_TX_BUSY; |
1795 | goto spin; | 1859 | } |
1796 | } | 1860 | } |
1797 | 1861 | ||
1798 | cmd_head = eq->cmd_head; | 1862 | cmd_head = eq->cmd_head; |
1799 | cmd_tail = eq->cmd_tail; | 1863 | cmd_tail = eq->cmd_tail; |
1800 | 1864 | ||
1801 | /* NOTE: The "gotos" below are untested. */ | ||
1802 | |||
1803 | /* Prepare to advance, detecting full queue. */ | 1865 | /* Prepare to advance, detecting full queue. */ |
1804 | cmd_next = cmd_tail + cmd_size; | 1866 | cmd_next = cmd_tail + cmd_size; |
1805 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | 1867 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) |
1806 | goto spin; | 1868 | goto busy; |
1807 | if (cmd_next > LEPP_CMD_LIMIT) { | 1869 | if (cmd_next > LEPP_CMD_LIMIT) { |
1808 | cmd_next = 0; | 1870 | cmd_next = 0; |
1809 | if (cmd_next == cmd_head) | 1871 | if (cmd_next == cmd_head) |
1810 | goto spin; | 1872 | goto busy; |
1811 | } | 1873 | } |
1812 | 1874 | ||
1813 | /* Copy the command. */ | 1875 | /* Copy the command. */ |
@@ -1823,14 +1885,18 @@ spin: | |||
1823 | eq->comp_tail = comp_tail; | 1885 | eq->comp_tail = comp_tail; |
1824 | 1886 | ||
1825 | /* Flush before allowing LEPP to handle the command. */ | 1887 | /* Flush before allowing LEPP to handle the command. */ |
1888 | /* ISSUE: Is this the optimal location for the flush? */ | ||
1826 | __insn_mf(); | 1889 | __insn_mf(); |
1827 | 1890 | ||
1828 | eq->cmd_tail = cmd_tail; | 1891 | eq->cmd_tail = cmd_tail; |
1829 | 1892 | ||
1830 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | 1893 | /* NOTE: Using "4" here is more efficient than "0" or "2", */ |
1831 | 1894 | /* and, strangely, more efficient than pre-checking the number */ | |
1895 | /* of available completions, and comparing it to 4. */ | ||
1832 | if (nolds == 0) | 1896 | if (nolds == 0) |
1833 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | 1897 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4); |
1898 | |||
1899 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); | ||
1834 | 1900 | ||
1835 | /* Handle completions. */ | 1901 | /* Handle completions. */ |
1836 | for (i = 0; i < nolds; i++) | 1902 | for (i = 0; i < nolds; i++) |
@@ -1870,10 +1936,10 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1870 | 1936 | ||
1871 | unsigned int num_frags; | 1937 | unsigned int num_frags; |
1872 | 1938 | ||
1873 | lepp_queue_t *eq = priv->epp_queue; | 1939 | lepp_queue_t *eq = priv->eq; |
1874 | 1940 | ||
1875 | struct sk_buff *olds[4]; | 1941 | struct sk_buff *olds[8]; |
1876 | unsigned int wanted = 4; | 1942 | unsigned int wanted = 8; |
1877 | unsigned int i, nolds = 0; | 1943 | unsigned int i, nolds = 0; |
1878 | 1944 | ||
1879 | unsigned int cmd_size = sizeof(lepp_cmd_t); | 1945 | unsigned int cmd_size = sizeof(lepp_cmd_t); |
@@ -1883,8 +1949,6 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1883 | 1949 | ||
1884 | lepp_cmd_t cmds[LEPP_MAX_FRAGS]; | 1950 | lepp_cmd_t cmds[LEPP_MAX_FRAGS]; |
1885 | 1951 | ||
1886 | unsigned int free_slots; | ||
1887 | |||
1888 | 1952 | ||
1889 | /* | 1953 | /* |
1890 | * This is paranoia, since we think that if the link doesn't come | 1954 | * This is paranoia, since we think that if the link doesn't come |
@@ -1905,7 +1969,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1905 | if (hash_default) { | 1969 | if (hash_default) { |
1906 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); | 1970 | HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); |
1907 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) | 1971 | if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) |
1908 | panic("Non-coherent egress buffer!"); | 1972 | panic("Non-HFH egress buffer! VA=%p Mode=%d PTE=%llx", |
1973 | data, hv_pte_get_mode(pte), hv_pte_val(pte)); | ||
1909 | } | 1974 | } |
1910 | #endif | 1975 | #endif |
1911 | #endif | 1976 | #endif |
@@ -1958,37 +2023,35 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) | |||
1958 | 2023 | ||
1959 | /* Enqueue the commands. */ | 2024 | /* Enqueue the commands. */ |
1960 | 2025 | ||
1961 | spin_lock_irqsave(&priv->cmd_lock, irqflags); | 2026 | spin_lock_irqsave(&priv->eq_lock, irqflags); |
1962 | 2027 | ||
1963 | /* | 2028 | /* |
1964 | * Handle completions if needed to make room. | 2029 | * Handle completions if needed to make room. |
1965 | * HACK: Spin until there is sufficient room. | 2030 | * HACK: Spin until there is sufficient room. |
1966 | */ | 2031 | */ |
1967 | free_slots = lepp_num_free_comp_slots(eq); | 2032 | if (lepp_num_free_comp_slots(eq) == 0) { |
1968 | if (free_slots < 1) { | 2033 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0); |
1969 | spin: | 2034 | if (nolds == 0) { |
1970 | nolds += tile_net_lepp_grab_comps(dev, olds + nolds, | 2035 | busy: |
1971 | wanted - nolds, NULL); | 2036 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); |
1972 | if (lepp_num_free_comp_slots(eq) < 1) | 2037 | return NETDEV_TX_BUSY; |
1973 | goto spin; | 2038 | } |
1974 | } | 2039 | } |
1975 | 2040 | ||
1976 | cmd_head = eq->cmd_head; | 2041 | cmd_head = eq->cmd_head; |
1977 | cmd_tail = eq->cmd_tail; | 2042 | cmd_tail = eq->cmd_tail; |
1978 | 2043 | ||
1979 | /* NOTE: The "gotos" below are untested. */ | ||
1980 | |||
1981 | /* Copy the commands, or fail. */ | 2044 | /* Copy the commands, or fail. */ |
1982 | for (i = 0; i < num_frags; i++) { | 2045 | for (i = 0; i < num_frags; i++) { |
1983 | 2046 | ||
1984 | /* Prepare to advance, detecting full queue. */ | 2047 | /* Prepare to advance, detecting full queue. */ |
1985 | cmd_next = cmd_tail + cmd_size; | 2048 | cmd_next = cmd_tail + cmd_size; |
1986 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) | 2049 | if (cmd_tail < cmd_head && cmd_next >= cmd_head) |
1987 | goto spin; | 2050 | goto busy; |
1988 | if (cmd_next > LEPP_CMD_LIMIT) { | 2051 | if (cmd_next > LEPP_CMD_LIMIT) { |
1989 | cmd_next = 0; | 2052 | cmd_next = 0; |
1990 | if (cmd_next == cmd_head) | 2053 | if (cmd_next == cmd_head) |
1991 | goto spin; | 2054 | goto busy; |
1992 | } | 2055 | } |
1993 | 2056 | ||
1994 | /* Copy the command. */ | 2057 | /* Copy the command. */ |
@@ -2005,14 +2068,18 @@ spin: | |||
2005 | eq->comp_tail = comp_tail; | 2068 | eq->comp_tail = comp_tail; |
2006 | 2069 | ||
2007 | /* Flush before allowing LEPP to handle the command. */ | 2070 | /* Flush before allowing LEPP to handle the command. */ |
2071 | /* ISSUE: Is this the optimal location for the flush? */ | ||
2008 | __insn_mf(); | 2072 | __insn_mf(); |
2009 | 2073 | ||
2010 | eq->cmd_tail = cmd_tail; | 2074 | eq->cmd_tail = cmd_tail; |
2011 | 2075 | ||
2012 | spin_unlock_irqrestore(&priv->cmd_lock, irqflags); | 2076 | /* NOTE: Using "4" here is more efficient than "0" or "2", */ |
2013 | 2077 | /* and, strangely, more efficient than pre-checking the number */ | |
2078 | /* of available completions, and comparing it to 4. */ | ||
2014 | if (nolds == 0) | 2079 | if (nolds == 0) |
2015 | nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); | 2080 | nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4); |
2081 | |||
2082 | spin_unlock_irqrestore(&priv->eq_lock, irqflags); | ||
2016 | 2083 | ||
2017 | /* Handle completions. */ | 2084 | /* Handle completions. */ |
2018 | for (i = 0; i < nolds; i++) | 2085 | for (i = 0; i < nolds; i++) |
@@ -2261,7 +2328,6 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2261 | int ret; | 2328 | int ret; |
2262 | struct net_device *dev; | 2329 | struct net_device *dev; |
2263 | struct tile_net_priv *priv; | 2330 | struct tile_net_priv *priv; |
2264 | struct page *page; | ||
2265 | 2331 | ||
2266 | /* | 2332 | /* |
2267 | * Allocate the device structure. This allocates "priv", calls | 2333 | * Allocate the device structure. This allocates "priv", calls |
@@ -2285,23 +2351,21 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2285 | 2351 | ||
2286 | INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); | 2352 | INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); |
2287 | 2353 | ||
2288 | spin_lock_init(&priv->cmd_lock); | 2354 | spin_lock_init(&priv->eq_lock); |
2289 | spin_lock_init(&priv->comp_lock); | ||
2290 | 2355 | ||
2291 | /* Allocate "epp_queue". */ | 2356 | /* Allocate "eq". */ |
2292 | BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); | 2357 | priv->eq_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, EQ_ORDER); |
2293 | page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); | 2358 | if (!priv->eq_pages) { |
2294 | if (!page) { | ||
2295 | free_netdev(dev); | 2359 | free_netdev(dev); |
2296 | return NULL; | 2360 | return NULL; |
2297 | } | 2361 | } |
2298 | priv->epp_queue = page_address(page); | 2362 | priv->eq = page_address(priv->eq_pages); |
2299 | 2363 | ||
2300 | /* Register the network device. */ | 2364 | /* Register the network device. */ |
2301 | ret = register_netdev(dev); | 2365 | ret = register_netdev(dev); |
2302 | if (ret) { | 2366 | if (ret) { |
2303 | pr_err("register_netdev %s failed %d\n", dev->name, ret); | 2367 | pr_err("register_netdev %s failed %d\n", dev->name, ret); |
2304 | free_page((unsigned long)priv->epp_queue); | 2368 | __free_pages(priv->eq_pages, EQ_ORDER); |
2305 | free_netdev(dev); | 2369 | free_netdev(dev); |
2306 | return NULL; | 2370 | return NULL; |
2307 | } | 2371 | } |
@@ -2310,7 +2374,7 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2310 | ret = tile_net_get_mac(dev); | 2374 | ret = tile_net_get_mac(dev); |
2311 | if (ret < 0) { | 2375 | if (ret < 0) { |
2312 | unregister_netdev(dev); | 2376 | unregister_netdev(dev); |
2313 | free_page((unsigned long)priv->epp_queue); | 2377 | __free_pages(priv->eq_pages, EQ_ORDER); |
2314 | free_netdev(dev); | 2378 | free_netdev(dev); |
2315 | return NULL; | 2379 | return NULL; |
2316 | } | 2380 | } |
@@ -2321,6 +2385,9 @@ static struct net_device *tile_net_dev_init(const char *name) | |||
2321 | 2385 | ||
2322 | /* | 2386 | /* |
2323 | * Module cleanup. | 2387 | * Module cleanup. |
2388 | * | ||
2389 | * FIXME: If compiled as a module, this module cannot be "unloaded", | ||
2390 | * because the "ingress interrupt handler" is registered permanently. | ||
2324 | */ | 2391 | */ |
2325 | static void tile_net_cleanup(void) | 2392 | static void tile_net_cleanup(void) |
2326 | { | 2393 | { |
@@ -2331,8 +2398,8 @@ static void tile_net_cleanup(void) | |||
2331 | struct net_device *dev = tile_net_devs[i]; | 2398 | struct net_device *dev = tile_net_devs[i]; |
2332 | struct tile_net_priv *priv = netdev_priv(dev); | 2399 | struct tile_net_priv *priv = netdev_priv(dev); |
2333 | unregister_netdev(dev); | 2400 | unregister_netdev(dev); |
2334 | finv_buffer(priv->epp_queue, PAGE_SIZE); | 2401 | finv_buffer(priv->eq, EQ_SIZE); |
2335 | free_page((unsigned long)priv->epp_queue); | 2402 | __free_pages(priv->eq_pages, EQ_ORDER); |
2336 | free_netdev(dev); | 2403 | free_netdev(dev); |
2337 | } | 2404 | } |
2338 | } | 2405 | } |
@@ -2355,7 +2422,12 @@ static int tile_net_init_module(void) | |||
2355 | } | 2422 | } |
2356 | 2423 | ||
2357 | 2424 | ||
2425 | module_init(tile_net_init_module); | ||
2426 | module_exit(tile_net_cleanup); | ||
2427 | |||
2428 | |||
2358 | #ifndef MODULE | 2429 | #ifndef MODULE |
2430 | |||
2359 | /* | 2431 | /* |
2360 | * The "network_cpus" boot argument specifies the cpus that are dedicated | 2432 | * The "network_cpus" boot argument specifies the cpus that are dedicated |
2361 | * to handle ingress packets. | 2433 | * to handle ingress packets. |
@@ -2391,8 +2463,5 @@ static int __init network_cpus_setup(char *str) | |||
2391 | return 0; | 2463 | return 0; |
2392 | } | 2464 | } |
2393 | __setup("network_cpus=", network_cpus_setup); | 2465 | __setup("network_cpus=", network_cpus_setup); |
2394 | #endif | ||
2395 | |||
2396 | 2466 | ||
2397 | module_init(tile_net_init_module); | 2467 | #endif |
2398 | module_exit(tile_net_cleanup); | ||