aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--README2
-rw-r--r--arch/tile/Kconfig39
-rw-r--r--arch/tile/include/arch/interrupts_32.h9
-rw-r--r--arch/tile/include/arch/sim.h48
-rw-r--r--arch/tile/include/arch/sim_def.h3
-rw-r--r--arch/tile/include/asm/Kbuild1
-rw-r--r--arch/tile/include/asm/atomic.h2
-rw-r--r--arch/tile/include/asm/bitops_32.h2
-rw-r--r--arch/tile/include/asm/cache.h2
-rw-r--r--arch/tile/include/asm/cacheflush.h55
-rw-r--r--arch/tile/include/asm/edac.h29
-rw-r--r--arch/tile/include/asm/hugetlb.h2
-rw-r--r--arch/tile/include/asm/irqflags.h18
-rw-r--r--arch/tile/include/asm/page.h34
-rw-r--r--arch/tile/include/asm/pgalloc.h7
-rw-r--r--arch/tile/include/asm/pgtable.h31
-rw-r--r--arch/tile/include/asm/pgtable_32.h8
-rw-r--r--arch/tile/include/asm/processor.h1
-rw-r--r--arch/tile/include/asm/ptrace.h3
-rw-r--r--arch/tile/include/asm/spinlock_32.h83
-rw-r--r--arch/tile/include/asm/stack.h3
-rw-r--r--arch/tile/include/asm/system.h19
-rw-r--r--arch/tile/include/asm/thread_info.h1
-rw-r--r--arch/tile/include/asm/timex.h3
-rw-r--r--arch/tile/include/hv/drv_mshim_intf.h50
-rw-r--r--arch/tile/include/hv/hypervisor.h46
-rw-r--r--arch/tile/kernel/entry.S22
-rw-r--r--arch/tile/kernel/head_32.S15
-rw-r--r--arch/tile/kernel/intvec_32.S74
-rw-r--r--arch/tile/kernel/irq.c38
-rw-r--r--arch/tile/kernel/machine_kexec.c7
-rw-r--r--arch/tile/kernel/pci-dma.c38
-rw-r--r--arch/tile/kernel/process.c6
-rw-r--r--arch/tile/kernel/setup.c20
-rw-r--r--arch/tile/kernel/single_step.c21
-rw-r--r--arch/tile/kernel/smp.c33
-rw-r--r--arch/tile/kernel/stack.c28
-rw-r--r--arch/tile/kernel/time.c10
-rw-r--r--arch/tile/kernel/vmlinux.lds.S5
-rw-r--r--arch/tile/lib/Makefile5
-rw-r--r--arch/tile/lib/atomic_32.c5
-rw-r--r--arch/tile/lib/atomic_asm_32.S2
-rw-r--r--arch/tile/lib/cacheflush.c102
-rw-r--r--arch/tile/lib/delay.c21
-rw-r--r--arch/tile/lib/exports.c10
-rw-r--r--arch/tile/lib/mb_incoherent.S34
-rw-r--r--arch/tile/lib/memcpy_tile64.c4
-rw-r--r--arch/tile/lib/spinlock_32.c161
-rw-r--r--arch/tile/mm/fault.c8
-rw-r--r--arch/tile/mm/homecache.c38
-rw-r--r--arch/tile/mm/init.c34
-rw-r--r--arch/tile/mm/migrate_32.S1
-rw-r--r--arch/tile/mm/pgtable.c181
-rw-r--r--drivers/edac/Kconfig10
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/edac/tile_edac.c254
-rw-r--r--drivers/net/tile/tilepro.c965
58 files changed, 1648 insertions, 1007 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 6e696bd37cf9..7d6e12dbdffa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6127,6 +6127,7 @@ S: Supported
6127F: arch/tile/ 6127F: arch/tile/
6128F: drivers/tty/hvc/hvc_tile.c 6128F: drivers/tty/hvc/hvc_tile.c
6129F: drivers/net/tile/ 6129F: drivers/net/tile/
6130F: drivers/edac/tile_edac.c
6130 6131
6131TLAN NETWORK DRIVER 6132TLAN NETWORK DRIVER
6132M: Samuel Chessman <chessman@tux.org> 6133M: Samuel Chessman <chessman@tux.org>
diff --git a/README b/README
index 1b81d2836873..8510017a3576 100644
--- a/README
+++ b/README
@@ -24,7 +24,7 @@ ON WHAT HARDWARE DOES IT RUN?
24 today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and 24 today Linux also runs on (at least) the Compaq Alpha AXP, Sun SPARC and
25 UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH, Cell, 25 UltraSPARC, Motorola 68000, PowerPC, PowerPC64, ARM, Hitachi SuperH, Cell,
26 IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS, 26 IBM S/390, MIPS, HP PA-RISC, Intel IA-64, DEC VAX, AMD x86-64, AXIS CRIS,
27 Xtensa, AVR32 and Renesas M32R architectures. 27 Xtensa, Tilera TILE, AVR32 and Renesas M32R architectures.
28 28
29 Linux is easily portable to most general-purpose 32- or 64-bit architectures 29 Linux is easily portable to most general-purpose 32- or 64-bit architectures
30 as long as they have a paged memory management unit (PMMU) and a port of the 30 as long as they have a paged memory management unit (PMMU) and a port of the
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 08948e4e1503..f3b78701c219 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -1,5 +1,5 @@
1# For a description of the syntax of this configuration file, 1# For a description of the syntax of this configuration file,
2# see Documentation/kbuild/config-language.txt. 2# see Documentation/kbuild/kconfig-language.txt.
3 3
4config TILE 4config TILE
5 def_bool y 5 def_bool y
@@ -11,17 +11,18 @@ config TILE
11 select HAVE_GENERIC_HARDIRQS 11 select HAVE_GENERIC_HARDIRQS
12 select GENERIC_IRQ_PROBE 12 select GENERIC_IRQ_PROBE
13 select GENERIC_PENDING_IRQ if SMP 13 select GENERIC_PENDING_IRQ if SMP
14 select GENERIC_HARDIRQS_NO_DEPRECATED
14 15
15# FIXME: investigate whether we need/want these options. 16# FIXME: investigate whether we need/want these options.
16# select HAVE_IOREMAP_PROT 17# select HAVE_IOREMAP_PROT
17# select HAVE_OPTPROBES 18# select HAVE_OPTPROBES
18# select HAVE_REGS_AND_STACK_ACCESS_API 19# select HAVE_REGS_AND_STACK_ACCESS_API
19# select HAVE_HW_BREAKPOINT 20# select HAVE_HW_BREAKPOINT
20# select PERF_EVENTS 21# select PERF_EVENTS
21# select HAVE_USER_RETURN_NOTIFIER 22# select HAVE_USER_RETURN_NOTIFIER
22# config NO_BOOTMEM 23# config NO_BOOTMEM
23# config ARCH_SUPPORTS_DEBUG_PAGEALLOC 24# config ARCH_SUPPORTS_DEBUG_PAGEALLOC
24# config HUGETLB_PAGE_SIZE_VARIABLE 25# config HUGETLB_PAGE_SIZE_VARIABLE
25 26
26config MMU 27config MMU
27 def_bool y 28 def_bool y
@@ -39,7 +40,7 @@ config HAVE_SETUP_PER_CPU_AREA
39 def_bool y 40 def_bool y
40 41
41config NEED_PER_CPU_PAGE_FIRST_CHUNK 42config NEED_PER_CPU_PAGE_FIRST_CHUNK
42 def_bool y 43 def_bool y
43 44
44config SYS_SUPPORTS_HUGETLBFS 45config SYS_SUPPORTS_HUGETLBFS
45 def_bool y 46 def_bool y
@@ -201,12 +202,6 @@ config NODES_SHIFT
201 By default, 2, i.e. 2^2 == 4 DDR2 controllers. 202 By default, 2, i.e. 2^2 == 4 DDR2 controllers.
202 In a system with more controllers, this value should be raised. 203 In a system with more controllers, this value should be raised.
203 204
204# Need 16MB areas to enable hugetlb
205# See build-time check in arch/tile/mm/init.c.
206config FORCE_MAX_ZONEORDER
207 int
208 default 9
209
210choice 205choice
211 depends on !TILEGX 206 depends on !TILEGX
212 prompt "Memory split" if EXPERT 207 prompt "Memory split" if EXPERT
@@ -233,8 +228,12 @@ choice
233 bool "3.5G/0.5G user/kernel split" 228 bool "3.5G/0.5G user/kernel split"
234 config VMSPLIT_3G 229 config VMSPLIT_3G
235 bool "3G/1G user/kernel split" 230 bool "3G/1G user/kernel split"
236 config VMSPLIT_3G_OPT 231 config VMSPLIT_2_75G
237 bool "3G/1G user/kernel split (for full 1G low memory)" 232 bool "2.75G/1.25G user/kernel split (for full 1G low memory)"
233 config VMSPLIT_2_5G
234 bool "2.5G/1.5G user/kernel split"
235 config VMSPLIT_2_25G
236 bool "2.25G/1.75G user/kernel split"
238 config VMSPLIT_2G 237 config VMSPLIT_2G
239 bool "2G/2G user/kernel split" 238 bool "2G/2G user/kernel split"
240 config VMSPLIT_1G 239 config VMSPLIT_1G
@@ -245,7 +244,9 @@ config PAGE_OFFSET
245 hex 244 hex
246 default 0xF0000000 if VMSPLIT_3_75G 245 default 0xF0000000 if VMSPLIT_3_75G
247 default 0xE0000000 if VMSPLIT_3_5G 246 default 0xE0000000 if VMSPLIT_3_5G
248 default 0xB0000000 if VMSPLIT_3G_OPT 247 default 0xB0000000 if VMSPLIT_2_75G
248 default 0xA0000000 if VMSPLIT_2_5G
249 default 0x90000000 if VMSPLIT_2_25G
249 default 0x80000000 if VMSPLIT_2G 250 default 0x80000000 if VMSPLIT_2G
250 default 0x40000000 if VMSPLIT_1G 251 default 0x40000000 if VMSPLIT_1G
251 default 0xC0000000 252 default 0xC0000000
diff --git a/arch/tile/include/arch/interrupts_32.h b/arch/tile/include/arch/interrupts_32.h
index 9d0bfa7e59be..96b5710505b6 100644
--- a/arch/tile/include/arch/interrupts_32.h
+++ b/arch/tile/include/arch/interrupts_32.h
@@ -16,10 +16,11 @@
16#define __ARCH_INTERRUPTS_H__ 16#define __ARCH_INTERRUPTS_H__
17 17
18/** Mask for an interrupt. */ 18/** Mask for an interrupt. */
19#ifdef __ASSEMBLER__
20/* Note: must handle breaking interrupts into high and low words manually. */ 19/* Note: must handle breaking interrupts into high and low words manually. */
21#define INT_MASK(intno) (1 << (intno)) 20#define INT_MASK_LO(intno) (1 << (intno))
22#else 21#define INT_MASK_HI(intno) (1 << ((intno) - 32))
22
23#ifndef __ASSEMBLER__
23#define INT_MASK(intno) (1ULL << (intno)) 24#define INT_MASK(intno) (1ULL << (intno))
24#endif 25#endif
25 26
@@ -89,6 +90,7 @@
89 90
90#define NUM_INTERRUPTS 49 91#define NUM_INTERRUPTS 49
91 92
93#ifndef __ASSEMBLER__
92#define QUEUED_INTERRUPTS ( \ 94#define QUEUED_INTERRUPTS ( \
93 INT_MASK(INT_MEM_ERROR) | \ 95 INT_MASK(INT_MEM_ERROR) | \
94 INT_MASK(INT_DMATLB_MISS) | \ 96 INT_MASK(INT_DMATLB_MISS) | \
@@ -301,4 +303,5 @@
301 INT_MASK(INT_DOUBLE_FAULT) | \ 303 INT_MASK(INT_DOUBLE_FAULT) | \
302 INT_MASK(INT_AUX_PERF_COUNT) | \ 304 INT_MASK(INT_AUX_PERF_COUNT) | \
303 0) 305 0)
306#endif /* !__ASSEMBLER__ */
304#endif /* !__ARCH_INTERRUPTS_H__ */ 307#endif /* !__ARCH_INTERRUPTS_H__ */
diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h
index 74b7c1624d34..e54b7b0527f3 100644
--- a/arch/tile/include/arch/sim.h
+++ b/arch/tile/include/arch/sim.h
@@ -152,16 +152,33 @@ sim_dump(unsigned int mask)
152/** 152/**
153 * Print a string to the simulator stdout. 153 * Print a string to the simulator stdout.
154 * 154 *
155 * @param str The string to be written; a newline is automatically added. 155 * @param str The string to be written.
156 */
157static __inline void
158sim_print(const char* str)
159{
160 for ( ; *str != '\0'; str++)
161 {
162 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
163 (*str << _SIM_CONTROL_OPERATOR_BITS));
164 }
165 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
166 (SIM_PUTC_FLUSH_BINARY << _SIM_CONTROL_OPERATOR_BITS));
167}
168
169
170/**
171 * Print a string to the simulator stdout.
172 *
173 * @param str The string to be written (a newline is automatically added).
156 */ 174 */
157static __inline void 175static __inline void
158sim_print_string(const char* str) 176sim_print_string(const char* str)
159{ 177{
160 int i; 178 for ( ; *str != '\0'; str++)
161 for (i = 0; str[i] != 0; i++)
162 { 179 {
163 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | 180 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
164 (str[i] << _SIM_CONTROL_OPERATOR_BITS)); 181 (*str << _SIM_CONTROL_OPERATOR_BITS));
165 } 182 }
166 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | 183 __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC |
167 (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); 184 (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS));
@@ -203,7 +220,7 @@ sim_command(const char* str)
203 * we are passing to the simulator are actually valid in the registers 220 * we are passing to the simulator are actually valid in the registers
204 * (i.e. returned from memory) prior to the SIM_CONTROL spr. 221 * (i.e. returned from memory) prior to the SIM_CONTROL spr.
205 */ 222 */
206static __inline int _sim_syscall0(int val) 223static __inline long _sim_syscall0(int val)
207{ 224{
208 long result; 225 long result;
209 __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" 226 __asm__ __volatile__ ("mtspr SIM_CONTROL, r0"
@@ -211,7 +228,7 @@ static __inline int _sim_syscall0(int val)
211 return result; 228 return result;
212} 229}
213 230
214static __inline int _sim_syscall1(int val, long arg1) 231static __inline long _sim_syscall1(int val, long arg1)
215{ 232{
216 long result; 233 long result;
217 __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" 234 __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }"
@@ -219,7 +236,7 @@ static __inline int _sim_syscall1(int val, long arg1)
219 return result; 236 return result;
220} 237}
221 238
222static __inline int _sim_syscall2(int val, long arg1, long arg2) 239static __inline long _sim_syscall2(int val, long arg1, long arg2)
223{ 240{
224 long result; 241 long result;
225 __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" 242 __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }"
@@ -233,7 +250,7 @@ static __inline int _sim_syscall2(int val, long arg1, long arg2)
233 the register values for arguments 3 and up may still be in flight 250 the register values for arguments 3 and up may still be in flight
234 to the core from a stack frame reload. */ 251 to the core from a stack frame reload. */
235 252
236static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) 253static __inline long _sim_syscall3(int val, long arg1, long arg2, long arg3)
237{ 254{
238 long result; 255 long result;
239 __asm__ __volatile__ ("{ and zero, r3, r3 };" 256 __asm__ __volatile__ ("{ and zero, r3, r3 };"
@@ -244,7 +261,7 @@ static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3)
244 return result; 261 return result;
245} 262}
246 263
247static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, 264static __inline long _sim_syscall4(int val, long arg1, long arg2, long arg3,
248 long arg4) 265 long arg4)
249{ 266{
250 long result; 267 long result;
@@ -256,7 +273,7 @@ static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3,
256 return result; 273 return result;
257} 274}
258 275
259static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, 276static __inline long _sim_syscall5(int val, long arg1, long arg2, long arg3,
260 long arg4, long arg5) 277 long arg4, long arg5)
261{ 278{
262 long result; 279 long result;
@@ -268,7 +285,6 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3,
268 return result; 285 return result;
269} 286}
270 287
271
272/** 288/**
273 * Make a special syscall to the simulator itself, if running under 289 * Make a special syscall to the simulator itself, if running under
274 * simulation. This is used as the implementation of other functions 290 * simulation. This is used as the implementation of other functions
@@ -281,7 +297,8 @@ static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3,
281 */ 297 */
282#define _sim_syscall(syscall_num, nr, args...) \ 298#define _sim_syscall(syscall_num, nr, args...) \
283 _sim_syscall##nr( \ 299 _sim_syscall##nr( \
284 ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, args) 300 ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, \
301 ##args)
285 302
286 303
287/* Values for the "access_mask" parameters below. */ 304/* Values for the "access_mask" parameters below. */
@@ -365,6 +382,13 @@ sim_validate_lines_evicted(unsigned long long pa, unsigned long length)
365} 382}
366 383
367 384
385/* Return the current CPU speed in cycles per second. */
386static __inline long
387sim_query_cpu_speed(void)
388{
389 return _sim_syscall(SIM_SYSCALL_QUERY_CPU_SPEED, 0);
390}
391
368#endif /* !__DOXYGEN__ */ 392#endif /* !__DOXYGEN__ */
369 393
370 394
diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h
index 7a17082c3773..4b44a2b6a09a 100644
--- a/arch/tile/include/arch/sim_def.h
+++ b/arch/tile/include/arch/sim_def.h
@@ -243,6 +243,9 @@
243 */ 243 */
244#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 244#define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5
245 245
246/** Syscall number for sim_query_cpu_speed(). */
247#define SIM_SYSCALL_QUERY_CPU_SPEED 6
248
246 249
247/* 250/*
248 * Bit masks which can be shifted by 8, combined with 251 * Bit masks which can be shifted by 8, combined with
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 3b8f55b82dee..849ab2fa1f5c 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -1,3 +1,4 @@
1include include/asm-generic/Kbuild.asm 1include include/asm-generic/Kbuild.asm
2 2
3header-y += ucontext.h 3header-y += ucontext.h
4header-y += hardwall.h
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index b8c49f98a44c..75a16028a952 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -32,7 +32,7 @@
32 */ 32 */
33static inline int atomic_read(const atomic_t *v) 33static inline int atomic_read(const atomic_t *v)
34{ 34{
35 return v->counter; 35 return ACCESS_ONCE(v->counter);
36} 36}
37 37
38/** 38/**
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index 7a93c001ac19..2638be51a164 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -122,7 +122,7 @@ static inline int test_and_change_bit(unsigned nr,
122 return (_atomic_xor(addr, mask) & mask) != 0; 122 return (_atomic_xor(addr, mask) & mask) != 0;
123} 123}
124 124
125/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic.h>. */ 125/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */
126#define smp_mb__before_clear_bit() smp_mb() 126#define smp_mb__before_clear_bit() smp_mb()
127#define smp_mb__after_clear_bit() do {} while (0) 127#define smp_mb__after_clear_bit() do {} while (0)
128 128
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 08a2815b5e4e..392e5333dd8b 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -40,7 +40,7 @@
40#define INTERNODE_CACHE_BYTES L2_CACHE_BYTES 40#define INTERNODE_CACHE_BYTES L2_CACHE_BYTES
41 41
42/* Group together read-mostly things to avoid cache false sharing */ 42/* Group together read-mostly things to avoid cache false sharing */
43#define __read_mostly __attribute__((__section__(".data.read_mostly"))) 43#define __read_mostly __attribute__((__section__(".data..read_mostly")))
44 44
45/* 45/*
46 * Attribute for data that is kept read/write coherent until the end of 46 * Attribute for data that is kept read/write coherent until the end of
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index 14a3f8556ace..12fb0fb330ee 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -138,55 +138,12 @@ static inline void finv_buffer(void *buffer, size_t size)
138} 138}
139 139
140/* 140/*
141 * Flush & invalidate a VA range that is homed remotely on a single core, 141 * Flush and invalidate a VA range that is homed remotely, waiting
142 * waiting until the memory controller holds the flushed values. 142 * until the memory controller holds the flushed values. If "hfh" is
143 * true, we will do a more expensive flush involving additional loads
144 * to make sure we have touched all the possible home cpus of a buffer
145 * that is homed with "hash for home".
143 */ 146 */
144static inline void finv_buffer_remote(void *buffer, size_t size) 147void finv_buffer_remote(void *buffer, size_t size, int hfh);
145{
146 char *p;
147 int i;
148
149 /*
150 * Flush and invalidate the buffer out of the local L1/L2
151 * and request the home cache to flush and invalidate as well.
152 */
153 __finv_buffer(buffer, size);
154
155 /*
156 * Wait for the home cache to acknowledge that it has processed
157 * all the flush-and-invalidate requests. This does not mean
158 * that the flushed data has reached the memory controller yet,
159 * but it does mean the home cache is processing the flushes.
160 */
161 __insn_mf();
162
163 /*
164 * Issue a load to the last cache line, which can't complete
165 * until all the previously-issued flushes to the same memory
166 * controller have also completed. If we weren't striping
167 * memory, that one load would be sufficient, but since we may
168 * be, we also need to back up to the last load issued to
169 * another memory controller, which would be the point where
170 * we crossed an 8KB boundary (the granularity of striping
171 * across memory controllers). Keep backing up and doing this
172 * until we are before the beginning of the buffer, or have
173 * hit all the controllers.
174 */
175 for (i = 0, p = (char *)buffer + size - 1;
176 i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer;
177 ++i) {
178 const unsigned long STRIPE_WIDTH = 8192;
179
180 /* Force a load instruction to issue. */
181 *(volatile char *)p;
182
183 /* Jump to end of previous stripe. */
184 p -= STRIPE_WIDTH;
185 p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1));
186 }
187
188 /* Wait for the loads (and thus flushes) to have completed. */
189 __insn_mf();
190}
191 148
192#endif /* _ASM_TILE_CACHEFLUSH_H */ 149#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/edac.h b/arch/tile/include/asm/edac.h
new file mode 100644
index 000000000000..87fc83eeaffd
--- /dev/null
+++ b/arch/tile/include/asm/edac.h
@@ -0,0 +1,29 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#ifndef _ASM_TILE_EDAC_H
16#define _ASM_TILE_EDAC_H
17
18/* ECC atomic, DMA, SMP and interrupt safe scrub function */
19
20static inline void atomic_scrub(void *va, u32 size)
21{
22 /*
23 * These is nothing to be done here because CE is
24 * corrected by the mshim.
25 */
26 return;
27}
28
29#endif /* _ASM_TILE_EDAC_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index 0521c277bbde..d396d1805163 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -54,7 +54,7 @@ static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
54static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 54static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
55 pte_t *ptep, pte_t pte) 55 pte_t *ptep, pte_t pte)
56{ 56{
57 set_pte_order(ptep, pte, HUGETLB_PAGE_ORDER); 57 set_pte(ptep, pte);
58} 58}
59 59
60static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, 60static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 641e4ff3d805..5db0ce54284d 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -18,6 +18,8 @@
18#include <arch/interrupts.h> 18#include <arch/interrupts.h>
19#include <arch/chip.h> 19#include <arch/chip.h>
20 20
21#if !defined(__tilegx__) && defined(__ASSEMBLY__)
22
21/* 23/*
22 * The set of interrupts we want to allow when interrupts are nominally 24 * The set of interrupts we want to allow when interrupts are nominally
23 * disabled. The remainder are effectively "NMI" interrupts from 25 * disabled. The remainder are effectively "NMI" interrupts from
@@ -25,6 +27,16 @@
25 * interrupts (aka "non-queued") are not blocked by the mask in any case. 27 * interrupts (aka "non-queued") are not blocked by the mask in any case.
26 */ 28 */
27#if CHIP_HAS_AUX_PERF_COUNTERS() 29#if CHIP_HAS_AUX_PERF_COUNTERS()
30#define LINUX_MASKABLE_INTERRUPTS_HI \
31 (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
32#else
33#define LINUX_MASKABLE_INTERRUPTS_HI \
34 (~(INT_MASK_HI(INT_PERF_COUNT)))
35#endif
36
37#else
38
39#if CHIP_HAS_AUX_PERF_COUNTERS()
28#define LINUX_MASKABLE_INTERRUPTS \ 40#define LINUX_MASKABLE_INTERRUPTS \
29 (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT))) 41 (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
30#else 42#else
@@ -32,6 +44,8 @@
32 (~(INT_MASK(INT_PERF_COUNT))) 44 (~(INT_MASK(INT_PERF_COUNT)))
33#endif 45#endif
34 46
47#endif
48
35#ifndef __ASSEMBLY__ 49#ifndef __ASSEMBLY__
36 50
37/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */ 51/* NOTE: we can't include <linux/percpu.h> due to #include dependencies. */
@@ -224,11 +238,11 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
224#define IRQ_DISABLE(tmp0, tmp1) \ 238#define IRQ_DISABLE(tmp0, tmp1) \
225 { \ 239 { \
226 movei tmp0, -1; \ 240 movei tmp0, -1; \
227 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS) \ 241 moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
228 }; \ 242 }; \
229 { \ 243 { \
230 mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ 244 mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \
231 auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS) \ 245 auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS_HI) \
232 }; \ 246 }; \
233 mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 247 mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1
234 248
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 7979a45430d3..3eb53525bf9d 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -16,10 +16,11 @@
16#define _ASM_TILE_PAGE_H 16#define _ASM_TILE_PAGE_H
17 17
18#include <linux/const.h> 18#include <linux/const.h>
19#include <hv/pagesize.h>
19 20
20/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */ 21/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
21#define PAGE_SHIFT 16 22#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
22#define HPAGE_SHIFT 24 23#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE
23 24
24#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) 25#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
25#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT) 26#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
@@ -29,25 +30,18 @@
29 30
30#ifdef __KERNEL__ 31#ifdef __KERNEL__
31 32
32#include <hv/hypervisor.h>
33#include <arch/chip.h>
34
35/* 33/*
36 * The {,H}PAGE_SHIFT values must match the HV_LOG2_PAGE_SIZE_xxx 34 * If the Kconfig doesn't specify, set a maximum zone order that
37 * definitions in <hv/hypervisor.h>. We validate this at build time 35 * is enough so that we can create huge pages from small pages given
38 * here, and again at runtime during early boot. We provide a 36 * the respective sizes of the two page types. See <linux/mmzone.h>.
39 * separate definition since userspace doesn't have <hv/hypervisor.h>.
40 *
41 * Be careful to distinguish PAGE_SHIFT from HV_PTE_INDEX_PFN, since
42 * they are the same on i386 but not TILE.
43 */ 37 */
44#if HV_LOG2_PAGE_SIZE_SMALL != PAGE_SHIFT 38#ifndef CONFIG_FORCE_MAX_ZONEORDER
45# error Small page size mismatch in Linux 39#define CONFIG_FORCE_MAX_ZONEORDER (HPAGE_SHIFT - PAGE_SHIFT + 1)
46#endif
47#if HV_LOG2_PAGE_SIZE_LARGE != HPAGE_SHIFT
48# error Huge page size mismatch in Linux
49#endif 40#endif
50 41
42#include <hv/hypervisor.h>
43#include <arch/chip.h>
44
51#ifndef __ASSEMBLY__ 45#ifndef __ASSEMBLY__
52 46
53#include <linux/types.h> 47#include <linux/types.h>
@@ -81,12 +75,6 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
81 * Hypervisor page tables are made of the same basic structure. 75 * Hypervisor page tables are made of the same basic structure.
82 */ 76 */
83 77
84typedef __u64 pteval_t;
85typedef __u64 pmdval_t;
86typedef __u64 pudval_t;
87typedef __u64 pgdval_t;
88typedef __u64 pgprotval_t;
89
90typedef HV_PTE pte_t; 78typedef HV_PTE pte_t;
91typedef HV_PTE pgd_t; 79typedef HV_PTE pgd_t;
92typedef HV_PTE pgprot_t; 80typedef HV_PTE pgprot_t;
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index cf52791a5501..e919c0bdc22d 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -41,9 +41,9 @@
41static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) 41static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
42{ 42{
43#ifdef CONFIG_64BIT 43#ifdef CONFIG_64BIT
44 set_pte_order(pmdp, pmd, L2_USER_PGTABLE_ORDER); 44 set_pte(pmdp, pmd);
45#else 45#else
46 set_pte_order(&pmdp->pud.pgd, pmd.pud.pgd, L2_USER_PGTABLE_ORDER); 46 set_pte(&pmdp->pud.pgd, pmd.pud.pgd);
47#endif 47#endif
48} 48}
49 49
@@ -100,6 +100,9 @@ pte_t *get_prealloc_pte(unsigned long pfn);
100/* During init, we can shatter kernel huge pages if needed. */ 100/* During init, we can shatter kernel huge pages if needed. */
101void shatter_pmd(pmd_t *pmd); 101void shatter_pmd(pmd_t *pmd);
102 102
103/* After init, a more complex technique is required. */
104void shatter_huge_page(unsigned long addr);
105
103#ifdef __tilegx__ 106#ifdef __tilegx__
104/* We share a single page allocator for both L1 and L2 page tables. */ 107/* We share a single page allocator for both L1 and L2 page tables. */
105#if HV_L1_SIZE != HV_L2_SIZE 108#if HV_L1_SIZE != HV_L2_SIZE
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index a6604e9485da..1a20b7ef8ea2 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -233,15 +233,23 @@ static inline void __pte_clear(pte_t *ptep)
233#define pgd_ERROR(e) \ 233#define pgd_ERROR(e) \
234 pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e)) 234 pr_err("%s:%d: bad pgd 0x%016llx.\n", __FILE__, __LINE__, pgd_val(e))
235 235
236/* Return PA and protection info for a given kernel VA. */
237int va_to_cpa_and_pte(void *va, phys_addr_t *cpa, pte_t *pte);
238
239/*
240 * __set_pte() ensures we write the 64-bit PTE with 32-bit words in
241 * the right order on 32-bit platforms and also allows us to write
242 * hooks to check valid PTEs, etc., if we want.
243 */
244void __set_pte(pte_t *ptep, pte_t pte);
245
236/* 246/*
237 * set_pte_order() sets the given PTE and also sanity-checks the 247 * set_pte() sets the given PTE and also sanity-checks the
238 * requested PTE against the page homecaching. Unspecified parts 248 * requested PTE against the page homecaching. Unspecified parts
239 * of the PTE are filled in when it is written to memory, i.e. all 249 * of the PTE are filled in when it is written to memory, i.e. all
240 * caching attributes if "!forcecache", or the home cpu if "anyhome". 250 * caching attributes if "!forcecache", or the home cpu if "anyhome".
241 */ 251 */
242extern void set_pte_order(pte_t *ptep, pte_t pte, int order); 252extern void set_pte(pte_t *ptep, pte_t pte);
243
244#define set_pte(ptep, pteval) set_pte_order(ptep, pteval, 0)
245#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval) 253#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
246#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval) 254#define set_pte_atomic(pteptr, pteval) set_pte(pteptr, pteval)
247 255
@@ -293,21 +301,6 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
293#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) }) 301#define __swp_entry_to_pte(swp) ((pte_t) { (((long long) ((swp).val)) << 32) })
294 302
295/* 303/*
296 * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
297 *
298 * dst - pointer to pgd range anwhere on a pgd page
299 * src - ""
300 * count - the number of pgds to copy.
301 *
302 * dst and src can be on the same page, but the range must not overlap,
303 * and must not cross a page boundary.
304 */
305static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
306{
307 memcpy(dst, src, count * sizeof(pgd_t));
308}
309
310/*
311 * Conversion functions: convert a page and protection to a page entry, 304 * Conversion functions: convert a page and protection to a page entry,
312 * and a page entry and page directory to the page they refer to. 305 * and a page entry and page directory to the page they refer to.
313 */ 306 */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 53ec34884744..9f98529761fd 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -24,6 +24,7 @@
24#define PGDIR_SIZE HV_PAGE_SIZE_LARGE 24#define PGDIR_SIZE HV_PAGE_SIZE_LARGE
25#define PGDIR_MASK (~(PGDIR_SIZE-1)) 25#define PGDIR_MASK (~(PGDIR_SIZE-1))
26#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT)) 26#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
27#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
27 28
28/* 29/*
29 * The level-2 index is defined by the difference between the huge 30 * The level-2 index is defined by the difference between the huge
@@ -33,6 +34,7 @@
33 * this nomenclature is somewhat confusing. 34 * this nomenclature is somewhat confusing.
34 */ 35 */
35#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL)) 36#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
37#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
36 38
37#ifndef __ASSEMBLY__ 39#ifndef __ASSEMBLY__
38 40
@@ -94,7 +96,6 @@ static inline int pgd_addr_invalid(unsigned long addr)
94 */ 96 */
95#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG 97#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
96#define __HAVE_ARCH_PTEP_SET_WRPROTECT 98#define __HAVE_ARCH_PTEP_SET_WRPROTECT
97#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
98 99
99extern int ptep_test_and_clear_young(struct vm_area_struct *, 100extern int ptep_test_and_clear_young(struct vm_area_struct *,
100 unsigned long addr, pte_t *); 101 unsigned long addr, pte_t *);
@@ -110,6 +111,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
110 return pte; 111 return pte;
111} 112}
112 113
114static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
115{
116 set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
117}
118
113/* Create a pmd from a PTFN. */ 119/* Create a pmd from a PTFN. */
114static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot) 120static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
115{ 121{
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index a9e7c8760334..e6889474038a 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -269,7 +269,6 @@ extern char chip_model[64];
269/* Data on which physical memory controller corresponds to which NUMA node. */ 269/* Data on which physical memory controller corresponds to which NUMA node. */
270extern int node_controller[]; 270extern int node_controller[];
271 271
272
273/* Do we dump information to the console when a user application crashes? */ 272/* Do we dump information to the console when a user application crashes? */
274extern int show_crashinfo; 273extern int show_crashinfo;
275 274
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index ac6d343129d3..6be2246e015c 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -141,6 +141,9 @@ struct single_step_state {
141/* Single-step the instruction at regs->pc */ 141/* Single-step the instruction at regs->pc */
142extern void single_step_once(struct pt_regs *regs); 142extern void single_step_once(struct pt_regs *regs);
143 143
144/* Clean up after execve(). */
145extern void single_step_execve(void);
146
144struct task_struct; 147struct task_struct;
145 148
146extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, 149extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index 88efdde8dd2b..a8f2c6e31a87 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -78,13 +78,6 @@ void arch_spin_unlock_wait(arch_spinlock_t *lock);
78#define _RD_COUNT_SHIFT 24 78#define _RD_COUNT_SHIFT 24
79#define _RD_COUNT_WIDTH 8 79#define _RD_COUNT_WIDTH 8
80 80
81/* Internal functions; do not use. */
82void arch_read_lock_slow(arch_rwlock_t *, u32);
83int arch_read_trylock_slow(arch_rwlock_t *);
84void arch_read_unlock_slow(arch_rwlock_t *);
85void arch_write_lock_slow(arch_rwlock_t *, u32);
86void arch_write_unlock_slow(arch_rwlock_t *, u32);
87
88/** 81/**
89 * arch_read_can_lock() - would read_trylock() succeed? 82 * arch_read_can_lock() - would read_trylock() succeed?
90 */ 83 */
@@ -104,94 +97,32 @@ static inline int arch_write_can_lock(arch_rwlock_t *rwlock)
104/** 97/**
105 * arch_read_lock() - acquire a read lock. 98 * arch_read_lock() - acquire a read lock.
106 */ 99 */
107static inline void arch_read_lock(arch_rwlock_t *rwlock) 100void arch_read_lock(arch_rwlock_t *rwlock);
108{
109 u32 val = __insn_tns((int *)&rwlock->lock);
110 if (unlikely(val << _RD_COUNT_WIDTH)) {
111 arch_read_lock_slow(rwlock, val);
112 return;
113 }
114 rwlock->lock = val + (1 << _RD_COUNT_SHIFT);
115}
116 101
117/** 102/**
118 * arch_read_lock() - acquire a write lock. 103 * arch_write_lock() - acquire a write lock.
119 */ 104 */
120static inline void arch_write_lock(arch_rwlock_t *rwlock) 105void arch_write_lock(arch_rwlock_t *rwlock);
121{
122 u32 val = __insn_tns((int *)&rwlock->lock);
123 if (unlikely(val != 0)) {
124 arch_write_lock_slow(rwlock, val);
125 return;
126 }
127 rwlock->lock = 1 << _WR_NEXT_SHIFT;
128}
129 106
130/** 107/**
131 * arch_read_trylock() - try to acquire a read lock. 108 * arch_read_trylock() - try to acquire a read lock.
132 */ 109 */
133static inline int arch_read_trylock(arch_rwlock_t *rwlock) 110int arch_read_trylock(arch_rwlock_t *rwlock);
134{
135 int locked;
136 u32 val = __insn_tns((int *)&rwlock->lock);
137 if (unlikely(val & 1))
138 return arch_read_trylock_slow(rwlock);
139 locked = (val << _RD_COUNT_WIDTH) == 0;
140 rwlock->lock = val + (locked << _RD_COUNT_SHIFT);
141 return locked;
142}
143 111
144/** 112/**
145 * arch_write_trylock() - try to acquire a write lock. 113 * arch_write_trylock() - try to acquire a write lock.
146 */ 114 */
147static inline int arch_write_trylock(arch_rwlock_t *rwlock) 115int arch_write_trylock(arch_rwlock_t *rwlock);
148{
149 u32 val = __insn_tns((int *)&rwlock->lock);
150
151 /*
152 * If a tns is in progress, or there's a waiting or active locker,
153 * or active readers, we can't take the lock, so give up.
154 */
155 if (unlikely(val != 0)) {
156 if (!(val & 1))
157 rwlock->lock = val;
158 return 0;
159 }
160
161 /* Set the "next" field to mark it locked. */
162 rwlock->lock = 1 << _WR_NEXT_SHIFT;
163 return 1;
164}
165 116
166/** 117/**
167 * arch_read_unlock() - release a read lock. 118 * arch_read_unlock() - release a read lock.
168 */ 119 */
169static inline void arch_read_unlock(arch_rwlock_t *rwlock) 120void arch_read_unlock(arch_rwlock_t *rwlock);
170{
171 u32 val;
172 mb(); /* guarantee anything modified under the lock is visible */
173 val = __insn_tns((int *)&rwlock->lock);
174 if (unlikely(val & 1)) {
175 arch_read_unlock_slow(rwlock);
176 return;
177 }
178 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
179}
180 121
181/** 122/**
182 * arch_write_unlock() - release a write lock. 123 * arch_write_unlock() - release a write lock.
183 */ 124 */
184static inline void arch_write_unlock(arch_rwlock_t *rwlock) 125void arch_write_unlock(arch_rwlock_t *rwlock);
185{
186 u32 val;
187 mb(); /* guarantee anything modified under the lock is visible */
188 val = __insn_tns((int *)&rwlock->lock);
189 if (unlikely(val != (1 << _WR_NEXT_SHIFT))) {
190 arch_write_unlock_slow(rwlock, val);
191 return;
192 }
193 rwlock->lock = 0;
194}
195 126
196#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 127#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
197#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 128#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index f908473c322d..4d97a2db932e 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -18,13 +18,14 @@
18#include <linux/types.h> 18#include <linux/types.h>
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <asm/backtrace.h> 20#include <asm/backtrace.h>
21#include <asm/page.h>
21#include <hv/hypervisor.h> 22#include <hv/hypervisor.h>
22 23
23/* Everything we need to keep track of a backtrace iteration */ 24/* Everything we need to keep track of a backtrace iteration */
24struct KBacktraceIterator { 25struct KBacktraceIterator {
25 BacktraceIterator it; 26 BacktraceIterator it;
26 struct task_struct *task; /* task we are backtracing */ 27 struct task_struct *task; /* task we are backtracing */
27 HV_PTE *pgtable; /* page table for user space access */ 28 pte_t *pgtable; /* page table for user space access */
28 int end; /* iteration complete. */ 29 int end; /* iteration complete. */
29 int new_context; /* new context is starting */ 30 int new_context; /* new context is starting */
30 int profile; /* profiling, so stop on async intrpt */ 31 int profile; /* profiling, so stop on async intrpt */
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
index 5388850deeb2..23d1842f4839 100644
--- a/arch/tile/include/asm/system.h
+++ b/arch/tile/include/asm/system.h
@@ -90,7 +90,24 @@
90#endif 90#endif
91 91
92#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() 92#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
93int __mb_incoherent(void); /* Helper routine for mb_incoherent(). */ 93#include <hv/syscall_public.h>
94/*
95 * Issue an uncacheable load to each memory controller, then
96 * wait until those loads have completed.
97 */
98static inline void __mb_incoherent(void)
99{
100 long clobber_r10;
101 asm volatile("swint2"
102 : "=R10" (clobber_r10)
103 : "R10" (HV_SYS_fence_incoherent)
104 : "r0", "r1", "r2", "r3", "r4",
105 "r5", "r6", "r7", "r8", "r9",
106 "r11", "r12", "r13", "r14",
107 "r15", "r16", "r17", "r18", "r19",
108 "r20", "r21", "r22", "r23", "r24",
109 "r25", "r26", "r27", "r28", "r29");
110}
94#endif 111#endif
95 112
96/* Fence to guarantee visibility of stores to incoherent memory. */ 113/* Fence to guarantee visibility of stores to incoherent memory. */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 3872f2b345d2..9e8e9c4dfa2a 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -68,6 +68,7 @@ struct thread_info {
68#else 68#else
69#define THREAD_SIZE_ORDER (0) 69#define THREAD_SIZE_ORDER (0)
70#endif 70#endif
71#define THREAD_SIZE_PAGES (1 << THREAD_SIZE_ORDER)
71 72
72#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) 73#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
73#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER) 74#define LOG2_THREAD_SIZE (PAGE_SHIFT + THREAD_SIZE_ORDER)
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
index 3baf5fc4c0a1..29921f0b86da 100644
--- a/arch/tile/include/asm/timex.h
+++ b/arch/tile/include/asm/timex.h
@@ -38,6 +38,9 @@ static inline cycles_t get_cycles(void)
38 38
39cycles_t get_clock_rate(void); 39cycles_t get_clock_rate(void);
40 40
41/* Convert nanoseconds to core clock cycles. */
42cycles_t ns2cycles(unsigned long nsecs);
43
41/* Called at cpu initialization to set some low-level constants. */ 44/* Called at cpu initialization to set some low-level constants. */
42void setup_clock(void); 45void setup_clock(void);
43 46
diff --git a/arch/tile/include/hv/drv_mshim_intf.h b/arch/tile/include/hv/drv_mshim_intf.h
new file mode 100644
index 000000000000..c6ef3bdc55cf
--- /dev/null
+++ b/arch/tile/include/hv/drv_mshim_intf.h
@@ -0,0 +1,50 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/**
16 * @file drv_mshim_intf.h
17 * Interface definitions for the Linux EDAC memory controller driver.
18 */
19
20#ifndef _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
21#define _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H
22
23/** Number of memory controllers in the public API. */
24#define TILE_MAX_MSHIMS 4
25
26/** Memory info under each memory controller. */
27struct mshim_mem_info
28{
29 uint64_t mem_size; /**< Total memory size in bytes. */
30 uint8_t mem_type; /**< Memory type, DDR2 or DDR3. */
31 uint8_t mem_ecc; /**< Memory supports ECC. */
32};
33
34/**
35 * DIMM error structure.
36 * For now, only correctable errors are counted and the mshim doesn't record
37 * the error PA. HV takes panic upon uncorrectable errors.
38 */
39struct mshim_mem_error
40{
41 uint32_t sbe_count; /**< Number of single-bit errors. */
42};
43
44/** Read this offset to get the memory info per mshim. */
45#define MSHIM_MEM_INFO_OFF 0x100
46
47/** Read this offset to check DIMM error. */
48#define MSHIM_MEM_ERROR_OFF 0x200
49
50#endif /* _SYS_HV_INCLUDE_DRV_MSHIM_INTF_H */
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index f672544cd4f9..1b8bf03d62a0 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -338,9 +338,10 @@ typedef int HV_Errno;
338#define HV_ENOTREADY -812 /**< Device not ready */ 338#define HV_ENOTREADY -812 /**< Device not ready */
339#define HV_EIO -813 /**< I/O error */ 339#define HV_EIO -813 /**< I/O error */
340#define HV_ENOMEM -814 /**< Out of memory */ 340#define HV_ENOMEM -814 /**< Out of memory */
341#define HV_EAGAIN -815 /**< Try again */
341 342
342#define HV_ERR_MAX -801 /**< Largest HV error code */ 343#define HV_ERR_MAX -801 /**< Largest HV error code */
343#define HV_ERR_MIN -814 /**< Smallest HV error code */ 344#define HV_ERR_MIN -815 /**< Smallest HV error code */
344 345
345#ifndef __ASSEMBLER__ 346#ifndef __ASSEMBLER__
346 347
@@ -867,6 +868,43 @@ typedef struct
867 */ 868 */
868HV_PhysAddrRange hv_inquire_physical(int idx); 869HV_PhysAddrRange hv_inquire_physical(int idx);
869 870
871/** Possible DIMM types. */
872typedef enum
873{
874 NO_DIMM = 0, /**< No DIMM */
875 DDR2 = 1, /**< DDR2 */
876 DDR3 = 2 /**< DDR3 */
877} HV_DIMM_Type;
878
879#ifdef __tilegx__
880
881/** Log2 of minimum DIMM bytes supported by the memory controller. */
882#define HV_MSH_MIN_DIMM_SIZE_SHIFT 29
883
884/** Max number of DIMMs contained by one memory controller. */
885#define HV_MSH_MAX_DIMMS 8
886
887#else
888
889/** Log2 of minimum DIMM bytes supported by the memory controller. */
890#define HV_MSH_MIN_DIMM_SIZE_SHIFT 26
891
892/** Max number of DIMMs contained by one memory controller. */
893#define HV_MSH_MAX_DIMMS 2
894
895#endif
896
897/** Number of bits to right-shift to get the DIMM type. */
898#define HV_DIMM_TYPE_SHIFT 0
899
900/** Bits to mask to get the DIMM type. */
901#define HV_DIMM_TYPE_MASK 0xf
902
903/** Number of bits to right-shift to get the DIMM size. */
904#define HV_DIMM_SIZE_SHIFT 4
905
906/** Bits to mask to get the DIMM size. */
907#define HV_DIMM_SIZE_MASK 0xf
870 908
871/** Memory controller information. */ 909/** Memory controller information. */
872typedef struct 910typedef struct
@@ -964,6 +1002,11 @@ HV_ASIDRange hv_inquire_asid(int idx);
964 1002
965/** Waits for at least the specified number of nanoseconds then returns. 1003/** Waits for at least the specified number of nanoseconds then returns.
966 * 1004 *
1005 * NOTE: this deprecated function currently assumes a 750 MHz clock,
1006 * and is thus not generally suitable for use. New code should call
1007 * hv_sysconf(HV_SYSCONF_CPU_SPEED), compute a cycle count to wait for,
1008 * and delay by looping while checking the cycle counter SPR.
1009 *
967 * @param nanosecs The number of nanoseconds to sleep. 1010 * @param nanosecs The number of nanoseconds to sleep.
968 */ 1011 */
969void hv_nanosleep(int nanosecs); 1012void hv_nanosleep(int nanosecs);
@@ -1038,6 +1081,7 @@ int hv_console_write(HV_VirtAddr bytes, int len);
1038 * downcall: 1081 * downcall:
1039 * 1082 *
1040 * INT_MESSAGE_RCV_DWNCL (hypervisor message available) 1083 * INT_MESSAGE_RCV_DWNCL (hypervisor message available)
1084 * INT_DEV_INTR_DWNCL (device interrupt)
1041 * INT_DMATLB_MISS_DWNCL (DMA TLB miss) 1085 * INT_DMATLB_MISS_DWNCL (DMA TLB miss)
1042 * INT_SNITLB_MISS_DWNCL (SNI TLB miss) 1086 * INT_SNITLB_MISS_DWNCL (SNI TLB miss)
1043 * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation) 1087 * INT_DMATLB_ACCESS_DWNCL (DMA TLB access violation)
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index fd8dc42abdcb..431e9ae60488 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -38,12 +38,6 @@ STD_ENTRY(kernel_execve)
38 jrp lr 38 jrp lr
39 STD_ENDPROC(kernel_execve) 39 STD_ENDPROC(kernel_execve)
40 40
41/* Delay a fixed number of cycles. */
42STD_ENTRY(__delay)
43 { addi r0, r0, -1; bnzt r0, . }
44 jrp lr
45 STD_ENDPROC(__delay)
46
47/* 41/*
48 * We don't run this function directly, but instead copy it to a page 42 * We don't run this function directly, but instead copy it to a page
49 * we map into every user process. See vdso_setup(). 43 * we map into every user process. See vdso_setup().
@@ -97,23 +91,17 @@ STD_ENTRY(smp_nap)
97 91
98/* 92/*
99 * Enable interrupts racelessly and then nap until interrupted. 93 * Enable interrupts racelessly and then nap until interrupted.
94 * Architecturally, we are guaranteed that enabling interrupts via
95 * mtspr to INTERRUPT_CRITICAL_SECTION only interrupts at the next PC.
100 * This function's _cpu_idle_nap address is special; see intvec.S. 96 * This function's _cpu_idle_nap address is special; see intvec.S.
101 * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and 97 * When interrupted at _cpu_idle_nap, we bump the PC forward 8, and
102 * as a result return to the function that called _cpu_idle(). 98 * as a result return to the function that called _cpu_idle().
103 */ 99 */
104STD_ENTRY(_cpu_idle) 100STD_ENTRY(_cpu_idle)
105 { 101 movei r1, 1
106 lnk r0 102 mtspr INTERRUPT_CRITICAL_SECTION, r1
107 movei r1, KERNEL_PL
108 }
109 {
110 addli r0, r0, _cpu_idle_nap - .
111 mtspr INTERRUPT_CRITICAL_SECTION, r1
112 }
113 IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ 103 IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */
114 mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ 104 mtspr INTERRUPT_CRITICAL_SECTION, zero
115 mtspr SPR_EX_CONTEXT_K_0, r0
116 iret
117 .global _cpu_idle_nap 105 .global _cpu_idle_nap
118_cpu_idle_nap: 106_cpu_idle_nap:
119 nap 107 nap
diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S
index 90e7c4435693..1a39b7c1c87e 100644
--- a/arch/tile/kernel/head_32.S
+++ b/arch/tile/kernel/head_32.S
@@ -133,7 +133,7 @@ ENTRY(_start)
133 } 133 }
134 ENDPROC(_start) 134 ENDPROC(_start)
135 135
136.section ".bss.page_aligned","w" 136__PAGE_ALIGNED_BSS
137 .align PAGE_SIZE 137 .align PAGE_SIZE
138ENTRY(empty_zero_page) 138ENTRY(empty_zero_page)
139 .fill PAGE_SIZE,1,0 139 .fill PAGE_SIZE,1,0
@@ -145,10 +145,10 @@ ENTRY(empty_zero_page)
145 .endif 145 .endif
146 .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \ 146 .word HV_PTE_PAGE | HV_PTE_DIRTY | HV_PTE_PRESENT | HV_PTE_ACCESSED | \
147 (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE) 147 (HV_PTE_MODE_CACHE_NO_L3 << HV_PTE_INDEX_MODE)
148 .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << HV_PTE_INDEX_PFN) 148 .word (\bits1) | (HV_CPA_TO_PFN(\cpa) << (HV_PTE_INDEX_PFN - 32))
149 .endm 149 .endm
150 150
151.section ".data.page_aligned","wa" 151__PAGE_ALIGNED_DATA
152 .align PAGE_SIZE 152 .align PAGE_SIZE
153ENTRY(swapper_pg_dir) 153ENTRY(swapper_pg_dir)
154 /* 154 /*
@@ -158,12 +158,14 @@ ENTRY(swapper_pg_dir)
158 */ 158 */
159 .set addr, 0 159 .set addr, 0
160 .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT 160 .rept (MEM_USER_INTRPT - PAGE_OFFSET) >> PGDIR_SHIFT
161 PTE addr + PAGE_OFFSET, addr, HV_PTE_READABLE | HV_PTE_WRITABLE 161 PTE addr + PAGE_OFFSET, addr, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
162 (1 << (HV_PTE_INDEX_WRITABLE - 32))
162 .set addr, addr + PGDIR_SIZE 163 .set addr, addr + PGDIR_SIZE
163 .endr 164 .endr
164 165
165 /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */ 166 /* The true text VAs are mapped as VA = PA + MEM_SV_INTRPT */
166 PTE MEM_SV_INTRPT, 0, HV_PTE_READABLE | HV_PTE_EXECUTABLE 167 PTE MEM_SV_INTRPT, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
168 (1 << (HV_PTE_INDEX_EXECUTABLE - 32))
167 .org swapper_pg_dir + HV_L1_SIZE 169 .org swapper_pg_dir + HV_L1_SIZE
168 END(swapper_pg_dir) 170 END(swapper_pg_dir)
169 171
@@ -176,6 +178,7 @@ ENTRY(swapper_pg_dir)
176 __INITDATA 178 __INITDATA
177 .align CHIP_L2_LINE_SIZE() 179 .align CHIP_L2_LINE_SIZE()
178ENTRY(swapper_pgprot) 180ENTRY(swapper_pgprot)
179 PTE 0, 0, HV_PTE_READABLE | HV_PTE_WRITABLE, 1 181 PTE 0, 0, (1 << (HV_PTE_INDEX_READABLE - 32)) | \
182 (1 << (HV_PTE_INDEX_WRITABLE - 32)), 1
180 .align CHIP_L2_LINE_SIZE() 183 .align CHIP_L2_LINE_SIZE()
181 END(swapper_pgprot) 184 END(swapper_pgprot)
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 5eed4a02bf62..fffcfa6b3a62 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -32,10 +32,6 @@
32# error "No support for kernel preemption currently" 32# error "No support for kernel preemption currently"
33#endif 33#endif
34 34
35#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48
36# error INT_INTCTRL_K coded to set high interrupt mask
37#endif
38
39#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) 35#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
40 36
41#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) 37#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
@@ -1199,46 +1195,6 @@ STD_ENTRY(interrupt_return)
1199 STD_ENDPROC(interrupt_return) 1195 STD_ENDPROC(interrupt_return)
1200 1196
1201 /* 1197 /*
1202 * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit
1203 * before returning, so we can properly get more downcalls.
1204 */
1205 .pushsection .text.handle_interrupt_downcall,"ax"
1206handle_interrupt_downcall:
1207 finish_interrupt_save handle_interrupt_downcall
1208 check_single_stepping normal, .Ldispatch_downcall
1209.Ldispatch_downcall:
1210
1211 /* Clear INTCTRL_K from the set of interrupts we ever enable. */
1212 GET_INTERRUPTS_ENABLED_MASK_PTR(r30)
1213 {
1214 addi r30, r30, 4
1215 movei r31, INT_MASK(INT_INTCTRL_K)
1216 }
1217 {
1218 lw r20, r30
1219 nor r21, r31, zero
1220 }
1221 and r20, r20, r21
1222 sw r30, r20
1223
1224 {
1225 jalr r0
1226 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1227 }
1228 FEEDBACK_REENTER(handle_interrupt_downcall)
1229
1230 /* Allow INTCTRL_K to be enabled next time we enable interrupts. */
1231 lw r20, r30
1232 or r20, r20, r31
1233 sw r30, r20
1234
1235 {
1236 movei r30, 0 /* not an NMI */
1237 j interrupt_return
1238 }
1239 STD_ENDPROC(handle_interrupt_downcall)
1240
1241 /*
1242 * Some interrupts don't check for single stepping 1198 * Some interrupts don't check for single stepping
1243 */ 1199 */
1244 .pushsection .text.handle_interrupt_no_single_step,"ax" 1200 .pushsection .text.handle_interrupt_no_single_step,"ax"
@@ -1600,7 +1556,10 @@ STD_ENTRY(_sys_clone)
1600 .align 64 1556 .align 64
1601 /* Align much later jump on the start of a cache line. */ 1557 /* Align much later jump on the start of a cache line. */
1602#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() 1558#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
1603 nop; nop 1559 nop
1560#if PAGE_SIZE >= 0x10000
1561 nop
1562#endif
1604#endif 1563#endif
1605ENTRY(sys_cmpxchg) 1564ENTRY(sys_cmpxchg)
1606 1565
@@ -1628,9 +1587,13 @@ ENTRY(sys_cmpxchg)
1628 * about aliasing among multiple mappings of the same physical page, 1587 * about aliasing among multiple mappings of the same physical page,
1629 * and we ignore the low 3 bits so we have one lock that covers 1588 * and we ignore the low 3 bits so we have one lock that covers
1630 * both a cmpxchg64() and a cmpxchg() on either its low or high word. 1589 * both a cmpxchg64() and a cmpxchg() on either its low or high word.
1631 * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. 1590 * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c.
1632 */ 1591 */
1633 1592
1593#if (PAGE_OFFSET & 0xffff) != 0
1594# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
1595#endif
1596
1634#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 1597#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1635 { 1598 {
1636 /* Check for unaligned input. */ 1599 /* Check for unaligned input. */
@@ -1723,11 +1686,14 @@ ENTRY(sys_cmpxchg)
1723 lw r26, r0 1686 lw r26, r0
1724 } 1687 }
1725 { 1688 {
1726 /* atomic_locks is page aligned so this suffices to get its addr. */ 1689 auli r21, zero, ha16(atomic_locks)
1727 auli r21, zero, hi16(atomic_locks)
1728 1690
1729 bbns r23, .Lcmpxchg_badaddr 1691 bbns r23, .Lcmpxchg_badaddr
1730 } 1692 }
1693#if PAGE_SIZE < 0x10000
1694 /* atomic_locks is page-aligned so for big pages we don't need this. */
1695 addli r21, r21, lo16(atomic_locks)
1696#endif
1731 { 1697 {
1732 /* 1698 /*
1733 * Insert the hash bits into the page-aligned pointer. 1699 * Insert the hash bits into the page-aligned pointer.
@@ -1762,7 +1728,7 @@ ENTRY(sys_cmpxchg)
1762 1728
1763 /* 1729 /*
1764 * Perform the actual cmpxchg or atomic_update. 1730 * Perform the actual cmpxchg or atomic_update.
1765 * Note that __futex_mark_unlocked() in uClibc relies on 1731 * Note that the system <arch/atomic.h> header relies on
1766 * atomic_update() to always perform an "mf", so don't make 1732 * atomic_update() to always perform an "mf", so don't make
1767 * it optional or conditional without modifying that code. 1733 * it optional or conditional without modifying that code.
1768 */ 1734 */
@@ -2014,17 +1980,17 @@ int_unalign:
2014#endif 1980#endif
2015 int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr 1981 int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr
2016 int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ 1982 int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
2017 hv_message_intr, handle_interrupt_downcall 1983 hv_message_intr
2018 int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ 1984 int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \
2019 tile_dev_intr, handle_interrupt_downcall 1985 tile_dev_intr
2020 int_hand INT_I_ASID, I_ASID, bad_intr 1986 int_hand INT_I_ASID, I_ASID, bad_intr
2021 int_hand INT_D_ASID, D_ASID, bad_intr 1987 int_hand INT_D_ASID, D_ASID, bad_intr
2022 int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ 1988 int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \
2023 do_page_fault, handle_interrupt_downcall 1989 do_page_fault
2024 int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ 1990 int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \
2025 do_page_fault, handle_interrupt_downcall 1991 do_page_fault
2026 int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ 1992 int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \
2027 do_page_fault, handle_interrupt_downcall 1993 do_page_fault
2028 int_hand INT_SN_CPL, SN_CPL, bad_intr 1994 int_hand INT_SN_CPL, SN_CPL, bad_intr
2029 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap 1995 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
2030#if CHIP_HAS_AUX_PERF_COUNTERS() 1996#if CHIP_HAS_AUX_PERF_COUNTERS()
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 128805ef8f2c..0baa7580121f 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -176,43 +176,43 @@ void disable_percpu_irq(unsigned int irq)
176EXPORT_SYMBOL(disable_percpu_irq); 176EXPORT_SYMBOL(disable_percpu_irq);
177 177
178/* Mask an interrupt. */ 178/* Mask an interrupt. */
179static void tile_irq_chip_mask(unsigned int irq) 179static void tile_irq_chip_mask(struct irq_data *d)
180{ 180{
181 mask_irqs(1UL << irq); 181 mask_irqs(1UL << d->irq);
182} 182}
183 183
184/* Unmask an interrupt. */ 184/* Unmask an interrupt. */
185static void tile_irq_chip_unmask(unsigned int irq) 185static void tile_irq_chip_unmask(struct irq_data *d)
186{ 186{
187 unmask_irqs(1UL << irq); 187 unmask_irqs(1UL << d->irq);
188} 188}
189 189
190/* 190/*
191 * Clear an interrupt before processing it so that any new assertions 191 * Clear an interrupt before processing it so that any new assertions
192 * will trigger another irq. 192 * will trigger another irq.
193 */ 193 */
194static void tile_irq_chip_ack(unsigned int irq) 194static void tile_irq_chip_ack(struct irq_data *d)
195{ 195{
196 if ((unsigned long)get_irq_chip_data(irq) != IS_HW_CLEARED) 196 if ((unsigned long)irq_data_get_irq_chip_data(d) != IS_HW_CLEARED)
197 clear_irqs(1UL << irq); 197 clear_irqs(1UL << d->irq);
198} 198}
199 199
200/* 200/*
201 * For per-cpu interrupts, we need to avoid unmasking any interrupts 201 * For per-cpu interrupts, we need to avoid unmasking any interrupts
202 * that we disabled via disable_percpu_irq(). 202 * that we disabled via disable_percpu_irq().
203 */ 203 */
204static void tile_irq_chip_eoi(unsigned int irq) 204static void tile_irq_chip_eoi(struct irq_data *d)
205{ 205{
206 if (!(__get_cpu_var(irq_disable_mask) & (1UL << irq))) 206 if (!(__get_cpu_var(irq_disable_mask) & (1UL << d->irq)))
207 unmask_irqs(1UL << irq); 207 unmask_irqs(1UL << d->irq);
208} 208}
209 209
210static struct irq_chip tile_irq_chip = { 210static struct irq_chip tile_irq_chip = {
211 .name = "tile_irq_chip", 211 .name = "tile_irq_chip",
212 .ack = tile_irq_chip_ack, 212 .irq_ack = tile_irq_chip_ack,
213 .eoi = tile_irq_chip_eoi, 213 .irq_eoi = tile_irq_chip_eoi,
214 .mask = tile_irq_chip_mask, 214 .irq_mask = tile_irq_chip_mask,
215 .unmask = tile_irq_chip_unmask, 215 .irq_unmask = tile_irq_chip_unmask,
216}; 216};
217 217
218void __init init_IRQ(void) 218void __init init_IRQ(void)
@@ -277,8 +277,10 @@ int show_interrupts(struct seq_file *p, void *v)
277 } 277 }
278 278
279 if (i < NR_IRQS) { 279 if (i < NR_IRQS) {
280 raw_spin_lock_irqsave(&irq_desc[i].lock, flags); 280 struct irq_desc *desc = irq_to_desc(i);
281 action = irq_desc[i].action; 281
282 raw_spin_lock_irqsave(&desc->lock, flags);
283 action = desc->action;
282 if (!action) 284 if (!action)
283 goto skip; 285 goto skip;
284 seq_printf(p, "%3d: ", i); 286 seq_printf(p, "%3d: ", i);
@@ -288,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v)
288 for_each_online_cpu(j) 290 for_each_online_cpu(j)
289 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); 291 seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
290#endif 292#endif
291 seq_printf(p, " %14s", irq_desc[i].chip->name); 293 seq_printf(p, " %14s", get_irq_desc_chip(desc)->name);
292 seq_printf(p, " %s", action->name); 294 seq_printf(p, " %s", action->name);
293 295
294 for (action = action->next; action; action = action->next) 296 for (action = action->next; action; action = action->next)
@@ -296,7 +298,7 @@ int show_interrupts(struct seq_file *p, void *v)
296 298
297 seq_putc(p, '\n'); 299 seq_putc(p, '\n');
298skip: 300skip:
299 raw_spin_unlock_irqrestore(&irq_desc[i].lock, flags); 301 raw_spin_unlock_irqrestore(&desc->lock, flags);
300 } 302 }
301 return 0; 303 return 0;
302} 304}
diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c
index 0d8b9e933487..e00d7179989e 100644
--- a/arch/tile/kernel/machine_kexec.c
+++ b/arch/tile/kernel/machine_kexec.c
@@ -240,8 +240,11 @@ static void setup_quasi_va_is_pa(void)
240 pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE); 240 pte = hv_pte(_PAGE_KERNEL | _PAGE_HUGE_PAGE);
241 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); 241 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
242 242
243 for (i = 0; i < pgd_index(PAGE_OFFSET); i++) 243 for (i = 0; i < pgd_index(PAGE_OFFSET); i++) {
244 pgtable[i] = pfn_pte(i << (HPAGE_SHIFT - PAGE_SHIFT), pte); 244 unsigned long pfn = i << (HPAGE_SHIFT - PAGE_SHIFT);
245 if (pfn_valid(pfn))
246 __set_pte(&pgtable[i], pfn_pte(pfn, pte));
247 }
245} 248}
246 249
247 250
diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c
index 5ad5e13b0fa6..658752b2835e 100644
--- a/arch/tile/kernel/pci-dma.c
+++ b/arch/tile/kernel/pci-dma.c
@@ -86,6 +86,21 @@ EXPORT_SYMBOL(dma_free_coherent);
86 * can count on nothing having been touched. 86 * can count on nothing having been touched.
87 */ 87 */
88 88
89/* Flush a PA range from cache page by page. */
90static void __dma_map_pa_range(dma_addr_t dma_addr, size_t size)
91{
92 struct page *page = pfn_to_page(PFN_DOWN(dma_addr));
93 size_t bytesleft = PAGE_SIZE - (dma_addr & (PAGE_SIZE - 1));
94
95 while ((ssize_t)size > 0) {
96 /* Flush the page. */
97 homecache_flush_cache(page++, 0);
98
99 /* Figure out if we need to continue on the next page. */
100 size -= bytesleft;
101 bytesleft = PAGE_SIZE;
102 }
103}
89 104
90/* 105/*
91 * dma_map_single can be passed any memory address, and there appear 106 * dma_map_single can be passed any memory address, and there appear
@@ -97,26 +112,12 @@ EXPORT_SYMBOL(dma_free_coherent);
97dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, 112dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
98 enum dma_data_direction direction) 113 enum dma_data_direction direction)
99{ 114{
100 struct page *page; 115 dma_addr_t dma_addr = __pa(ptr);
101 dma_addr_t dma_addr;
102 int thispage;
103 116
104 BUG_ON(!valid_dma_direction(direction)); 117 BUG_ON(!valid_dma_direction(direction));
105 WARN_ON(size == 0); 118 WARN_ON(size == 0);
106 119
107 dma_addr = __pa(ptr); 120 __dma_map_pa_range(dma_addr, size);
108
109 /* We might have been handed a buffer that wraps a page boundary */
110 while ((int)size > 0) {
111 /* The amount to flush that's on this page */
112 thispage = PAGE_SIZE - ((unsigned long)ptr & (PAGE_SIZE - 1));
113 thispage = min((int)thispage, (int)size);
114 /* Is this valid for any page we could be handed? */
115 page = pfn_to_page(kaddr_to_pfn(ptr));
116 homecache_flush_cache(page, 0);
117 ptr += thispage;
118 size -= thispage;
119 }
120 121
121 return dma_addr; 122 return dma_addr;
122} 123}
@@ -140,10 +141,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
140 WARN_ON(nents == 0 || sglist->length == 0); 141 WARN_ON(nents == 0 || sglist->length == 0);
141 142
142 for_each_sg(sglist, sg, nents, i) { 143 for_each_sg(sglist, sg, nents, i) {
143 struct page *page;
144 sg->dma_address = sg_phys(sg); 144 sg->dma_address = sg_phys(sg);
145 page = pfn_to_page(sg->dma_address >> PAGE_SHIFT); 145 __dma_map_pa_range(sg->dma_address, sg->length);
146 homecache_flush_cache(page, 0);
147 } 146 }
148 147
149 return nents; 148 return nents;
@@ -163,6 +162,7 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page,
163{ 162{
164 BUG_ON(!valid_dma_direction(direction)); 163 BUG_ON(!valid_dma_direction(direction));
165 164
165 BUG_ON(offset + size > PAGE_SIZE);
166 homecache_flush_cache(page, 0); 166 homecache_flush_cache(page, 0);
167 167
168 return page_to_pa(page) + offset; 168 return page_to_pa(page) + offset;
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index e90eb53173b0..b9cd962e1d30 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -165,7 +165,7 @@ void free_thread_info(struct thread_info *info)
165 kfree(step_state); 165 kfree(step_state);
166 } 166 }
167 167
168 free_page((unsigned long)info); 168 free_pages((unsigned long)info, THREAD_SIZE_ORDER);
169} 169}
170 170
171static void save_arch_state(struct thread_struct *t); 171static void save_arch_state(struct thread_struct *t);
@@ -574,6 +574,8 @@ SYSCALL_DEFINE4(execve, const char __user *, path,
574 goto out; 574 goto out;
575 error = do_execve(filename, argv, envp, regs); 575 error = do_execve(filename, argv, envp, regs);
576 putname(filename); 576 putname(filename);
577 if (error == 0)
578 single_step_execve();
577out: 579out:
578 return error; 580 return error;
579} 581}
@@ -593,6 +595,8 @@ long compat_sys_execve(const char __user *path,
593 goto out; 595 goto out;
594 error = compat_do_execve(filename, argv, envp, regs); 596 error = compat_do_execve(filename, argv, envp, regs);
595 putname(filename); 597 putname(filename);
598 if (error == 0)
599 single_step_execve();
596out: 600out:
597 return error; 601 return error;
598} 602}
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index f18573643ed1..3696b1832566 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -59,6 +59,8 @@ unsigned long __initdata node_memmap_pfn[MAX_NUMNODES];
59unsigned long __initdata node_percpu_pfn[MAX_NUMNODES]; 59unsigned long __initdata node_percpu_pfn[MAX_NUMNODES];
60unsigned long __initdata node_free_pfn[MAX_NUMNODES]; 60unsigned long __initdata node_free_pfn[MAX_NUMNODES];
61 61
62static unsigned long __initdata node_percpu[MAX_NUMNODES];
63
62#ifdef CONFIG_HIGHMEM 64#ifdef CONFIG_HIGHMEM
63/* Page frame index of end of lowmem on each controller. */ 65/* Page frame index of end of lowmem on each controller. */
64unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES]; 66unsigned long __cpuinitdata node_lowmem_end_pfn[MAX_NUMNODES];
@@ -554,7 +556,6 @@ static void __init setup_bootmem_allocator(void)
554 reserve_bootmem(crashk_res.start, 556 reserve_bootmem(crashk_res.start,
555 crashk_res.end - crashk_res.start + 1, 0); 557 crashk_res.end - crashk_res.start + 1, 0);
556#endif 558#endif
557
558} 559}
559 560
560void *__init alloc_remap(int nid, unsigned long size) 561void *__init alloc_remap(int nid, unsigned long size)
@@ -568,11 +569,13 @@ void *__init alloc_remap(int nid, unsigned long size)
568 569
569static int __init percpu_size(void) 570static int __init percpu_size(void)
570{ 571{
571 int size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); 572 int size = __per_cpu_end - __per_cpu_start;
572#ifdef CONFIG_MODULES 573 size += PERCPU_MODULE_RESERVE;
573 if (size < PERCPU_ENOUGH_ROOM) 574 size += PERCPU_DYNAMIC_EARLY_SIZE;
574 size = PERCPU_ENOUGH_ROOM; 575 if (size < PCPU_MIN_UNIT_SIZE)
575#endif 576 size = PCPU_MIN_UNIT_SIZE;
577 size = roundup(size, PAGE_SIZE);
578
576 /* In several places we assume the per-cpu data fits on a huge page. */ 579 /* In several places we assume the per-cpu data fits on a huge page. */
577 BUG_ON(kdata_huge && size > HPAGE_SIZE); 580 BUG_ON(kdata_huge && size > HPAGE_SIZE);
578 return size; 581 return size;
@@ -589,7 +592,6 @@ static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
589static void __init zone_sizes_init(void) 592static void __init zone_sizes_init(void)
590{ 593{
591 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 594 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
592 unsigned long node_percpu[MAX_NUMNODES] = { 0 };
593 int size = percpu_size(); 595 int size = percpu_size();
594 int num_cpus = smp_height * smp_width; 596 int num_cpus = smp_height * smp_width;
595 int i; 597 int i;
@@ -674,7 +676,7 @@ static void __init zone_sizes_init(void)
674 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; 676 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
675 677
676 free_area_init_node(i, zones_size, start, NULL); 678 free_area_init_node(i, zones_size, start, NULL);
677 printk(KERN_DEBUG " DMA zone: %ld per-cpu pages\n", 679 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
678 PFN_UP(node_percpu[i])); 680 PFN_UP(node_percpu[i]));
679 681
680 /* Track the type of memory on each node */ 682 /* Track the type of memory on each node */
@@ -1312,6 +1314,8 @@ static void *__init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
1312 1314
1313 BUG_ON(size % PAGE_SIZE != 0); 1315 BUG_ON(size % PAGE_SIZE != 0);
1314 pfn_offset[nid] += size / PAGE_SIZE; 1316 pfn_offset[nid] += size / PAGE_SIZE;
1317 BUG_ON(node_percpu[nid] < size);
1318 node_percpu[nid] -= size;
1315 if (percpu_pfn[cpu] == 0) 1319 if (percpu_pfn[cpu] == 0)
1316 percpu_pfn[cpu] = pfn; 1320 percpu_pfn[cpu] = pfn;
1317 return pfn_to_kaddr(pfn); 1321 return pfn_to_kaddr(pfn);
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 1eb3b39e36c7..84a729e06ec4 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -56,7 +56,7 @@ enum mem_op {
56 MEMOP_STORE_POSTINCR 56 MEMOP_STORE_POSTINCR
57}; 57};
58 58
59static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, int32_t offset) 59static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
60{ 60{
61 tile_bundle_bits result; 61 tile_bundle_bits result;
62 62
@@ -254,6 +254,18 @@ P("\n");
254 return bundle; 254 return bundle;
255} 255}
256 256
257/*
258 * Called after execve() has started the new image. This allows us
259 * to reset the info state. Note that the the mmap'ed memory, if there
260 * was any, has already been unmapped by the exec.
261 */
262void single_step_execve(void)
263{
264 struct thread_info *ti = current_thread_info();
265 kfree(ti->step_state);
266 ti->step_state = NULL;
267}
268
257/** 269/**
258 * single_step_once() - entry point when single stepping has been triggered. 270 * single_step_once() - entry point when single stepping has been triggered.
259 * @regs: The machine register state 271 * @regs: The machine register state
@@ -373,7 +385,7 @@ void single_step_once(struct pt_regs *regs)
373 /* branches */ 385 /* branches */
374 case BRANCH_OPCODE_X1: 386 case BRANCH_OPCODE_X1:
375 { 387 {
376 int32_t offset = signExtend17(get_BrOff_X1(bundle)); 388 s32 offset = signExtend17(get_BrOff_X1(bundle));
377 389
378 /* 390 /*
379 * For branches, we use a rewriting trick to let the 391 * For branches, we use a rewriting trick to let the
@@ -731,4 +743,9 @@ void single_step_once(struct pt_regs *regs)
731 __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); 743 __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL);
732} 744}
733 745
746void single_step_execve(void)
747{
748 /* Nothing */
749}
750
734#endif /* !__tilegx__ */ 751#endif /* !__tilegx__ */
diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c
index 9575b37a8b75..a4293102ef81 100644
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -36,6 +36,22 @@ static unsigned long __iomem *ipi_mappings[NR_CPUS];
36/* Set by smp_send_stop() to avoid recursive panics. */ 36/* Set by smp_send_stop() to avoid recursive panics. */
37static int stopping_cpus; 37static int stopping_cpus;
38 38
39static void __send_IPI_many(HV_Recipient *recip, int nrecip, int tag)
40{
41 int sent = 0;
42 while (sent < nrecip) {
43 int rc = hv_send_message(recip, nrecip,
44 (HV_VirtAddr)&tag, sizeof(tag));
45 if (rc < 0) {
46 if (!stopping_cpus) /* avoid recursive panic */
47 panic("hv_send_message returned %d", rc);
48 break;
49 }
50 WARN_ONCE(rc == 0, "hv_send_message() returned zero\n");
51 sent += rc;
52 }
53}
54
39void send_IPI_single(int cpu, int tag) 55void send_IPI_single(int cpu, int tag)
40{ 56{
41 HV_Recipient recip = { 57 HV_Recipient recip = {
@@ -43,14 +59,13 @@ void send_IPI_single(int cpu, int tag)
43 .x = cpu % smp_width, 59 .x = cpu % smp_width,
44 .state = HV_TO_BE_SENT 60 .state = HV_TO_BE_SENT
45 }; 61 };
46 int rc = hv_send_message(&recip, 1, (HV_VirtAddr)&tag, sizeof(tag)); 62 __send_IPI_many(&recip, 1, tag);
47 BUG_ON(rc <= 0);
48} 63}
49 64
50void send_IPI_many(const struct cpumask *mask, int tag) 65void send_IPI_many(const struct cpumask *mask, int tag)
51{ 66{
52 HV_Recipient recip[NR_CPUS]; 67 HV_Recipient recip[NR_CPUS];
53 int cpu, sent; 68 int cpu;
54 int nrecip = 0; 69 int nrecip = 0;
55 int my_cpu = smp_processor_id(); 70 int my_cpu = smp_processor_id();
56 for_each_cpu(cpu, mask) { 71 for_each_cpu(cpu, mask) {
@@ -61,17 +76,7 @@ void send_IPI_many(const struct cpumask *mask, int tag)
61 r->x = cpu % smp_width; 76 r->x = cpu % smp_width;
62 r->state = HV_TO_BE_SENT; 77 r->state = HV_TO_BE_SENT;
63 } 78 }
64 sent = 0; 79 __send_IPI_many(recip, nrecip, tag);
65 while (sent < nrecip) {
66 int rc = hv_send_message(recip, nrecip,
67 (HV_VirtAddr)&tag, sizeof(tag));
68 if (rc <= 0) {
69 if (!stopping_cpus) /* avoid recursive panic */
70 panic("hv_send_message returned %d", rc);
71 break;
72 }
73 sent += rc;
74 }
75} 80}
76 81
77void send_IPI_allbutself(int tag) 82void send_IPI_allbutself(int tag)
diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c
index 0d54106be3d6..dd81713a90dc 100644
--- a/arch/tile/kernel/stack.c
+++ b/arch/tile/kernel/stack.c
@@ -44,13 +44,6 @@ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp)
44 return sp >= kstack_base && sp < kstack_base + THREAD_SIZE; 44 return sp >= kstack_base && sp < kstack_base + THREAD_SIZE;
45} 45}
46 46
47/* Is address in the specified kernel code? */
48static int in_kernel_text(VirtualAddress address)
49{
50 return (address >= MEM_SV_INTRPT &&
51 address < MEM_SV_INTRPT + HPAGE_SIZE);
52}
53
54/* Is address valid for reading? */ 47/* Is address valid for reading? */
55static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address) 48static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address)
56{ 49{
@@ -63,6 +56,23 @@ static int valid_address(struct KBacktraceIterator *kbt, VirtualAddress address)
63 if (l1_pgtable == NULL) 56 if (l1_pgtable == NULL)
64 return 0; /* can't read user space in other tasks */ 57 return 0; /* can't read user space in other tasks */
65 58
59#ifdef CONFIG_64BIT
60 /* Find the real l1_pgtable by looking in the l0_pgtable. */
61 pte = l1_pgtable[HV_L0_INDEX(address)];
62 if (!hv_pte_get_present(pte))
63 return 0;
64 pfn = hv_pte_get_pfn(pte);
65 if (pte_huge(pte)) {
66 if (!pfn_valid(pfn)) {
67 pr_err("L0 huge page has bad pfn %#lx\n", pfn);
68 return 0;
69 }
70 return hv_pte_get_present(pte) && hv_pte_get_readable(pte);
71 }
72 page = pfn_to_page(pfn);
73 BUG_ON(PageHighMem(page)); /* No HIGHMEM on 64-bit. */
74 l1_pgtable = (HV_PTE *)pfn_to_kaddr(pfn);
75#endif
66 pte = l1_pgtable[HV_L1_INDEX(address)]; 76 pte = l1_pgtable[HV_L1_INDEX(address)];
67 if (!hv_pte_get_present(pte)) 77 if (!hv_pte_get_present(pte))
68 return 0; 78 return 0;
@@ -92,7 +102,7 @@ static bool read_memory_func(void *result, VirtualAddress address,
92{ 102{
93 int retval; 103 int retval;
94 struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt; 104 struct KBacktraceIterator *kbt = (struct KBacktraceIterator *)vkbt;
95 if (in_kernel_text(address)) { 105 if (__kernel_text_address(address)) {
96 /* OK to read kernel code. */ 106 /* OK to read kernel code. */
97 } else if (address >= PAGE_OFFSET) { 107 } else if (address >= PAGE_OFFSET) {
98 /* We only tolerate kernel-space reads of this task's stack */ 108 /* We only tolerate kernel-space reads of this task's stack */
@@ -132,7 +142,7 @@ static struct pt_regs *valid_fault_handler(struct KBacktraceIterator* kbt)
132 } 142 }
133 } 143 }
134 if (EX1_PL(p->ex1) == KERNEL_PL && 144 if (EX1_PL(p->ex1) == KERNEL_PL &&
135 in_kernel_text(p->pc) && 145 __kernel_text_address(p->pc) &&
136 in_kernel_stack(kbt, p->sp) && 146 in_kernel_stack(kbt, p->sp) &&
137 p->sp >= sp) { 147 p->sp >= sp) {
138 if (kbt->verbose) 148 if (kbt->verbose)
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index f2e156e44692..49a605be94c5 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -224,3 +224,13 @@ int setup_profiling_timer(unsigned int multiplier)
224{ 224{
225 return -EINVAL; 225 return -EINVAL;
226} 226}
227
228/*
229 * Use the tile timer to convert nsecs to core clock cycles, relying
230 * on it having the same frequency as SPR_CYCLE.
231 */
232cycles_t ns2cycles(unsigned long nsecs)
233{
234 struct clock_event_device *dev = &__get_cpu_var(tile_timer);
235 return ((u64)nsecs * dev->mult) >> dev->shift;
236}
diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S
index c6ce378e0678..38f64fafdc10 100644
--- a/arch/tile/kernel/vmlinux.lds.S
+++ b/arch/tile/kernel/vmlinux.lds.S
@@ -59,10 +59,7 @@ SECTIONS
59 59
60 . = ALIGN(PAGE_SIZE); 60 . = ALIGN(PAGE_SIZE);
61 VMLINUX_SYMBOL(_sinitdata) = .; 61 VMLINUX_SYMBOL(_sinitdata) = .;
62 .init.page : AT (ADDR(.init.page) - LOAD_OFFSET) { 62 INIT_DATA_SECTION(16) :data =0
63 *(.init.page)
64 } :data =0
65 INIT_DATA_SECTION(16)
66 PERCPU(L2_CACHE_BYTES, PAGE_SIZE) 63 PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
67 . = ALIGN(PAGE_SIZE); 64 . = ALIGN(PAGE_SIZE);
68 VMLINUX_SYMBOL(_einitdata) = .; 65 VMLINUX_SYMBOL(_einitdata) = .;
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 93122d5b1558..0c26086ecbef 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -2,9 +2,8 @@
2# Makefile for TILE-specific library files.. 2# Makefile for TILE-specific library files..
3# 3#
4 4
5lib-y = cacheflush.o checksum.o cpumask.o delay.o \ 5lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
6 mb_incoherent.o uaccess.o memmove.o \ 6 memmove.o memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
7 memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \
8 strchr_$(BITS).o strlen_$(BITS).o 7 strchr_$(BITS).o strlen_$(BITS).o
9 8
10ifeq ($(CONFIG_TILEGX),y) 9ifeq ($(CONFIG_TILEGX),y)
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 7a5cc706ab62..f02040d3614e 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -46,14 +46,13 @@ struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
46#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 46#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
47 47
48/* This page is remapped on startup to be hash-for-home. */ 48/* This page is remapped on startup to be hash-for-home. */
49int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */] 49int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
50 __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
51 50
52#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 51#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
53 52
54static inline int *__atomic_hashed_lock(volatile void *v) 53static inline int *__atomic_hashed_lock(volatile void *v)
55{ 54{
56 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */ 55 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
57#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 56#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
58 unsigned long i = 57 unsigned long i =
59 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long)); 58 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 5a5514b77e78..82f64cc63658 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -14,7 +14,7 @@
14 * Support routines for atomic operations. Each function takes: 14 * Support routines for atomic operations. Each function takes:
15 * 15 *
16 * r0: address to manipulate 16 * r0: address to manipulate
17 * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG) 17 * r1: pointer to atomic lock guarding this operation (for ATOMIC_LOCK_REG)
18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against 18 * r2: new value to write, or for cmpxchg/add_unless, value to compare against
19 * r3: (cmpxchg/xchg_add_unless) new value to write or add; 19 * r3: (cmpxchg/xchg_add_unless) new value to write or add;
20 * (atomic64 ops) high word of value to write 20 * (atomic64 ops) high word of value to write
diff --git a/arch/tile/lib/cacheflush.c b/arch/tile/lib/cacheflush.c
index 11b6164c2097..35c1d8ca5f38 100644
--- a/arch/tile/lib/cacheflush.c
+++ b/arch/tile/lib/cacheflush.c
@@ -21,3 +21,105 @@ void __flush_icache_range(unsigned long start, unsigned long end)
21{ 21{
22 invalidate_icache((const void *)start, end - start, PAGE_SIZE); 22 invalidate_icache((const void *)start, end - start, PAGE_SIZE);
23} 23}
24
25
26/* Force a load instruction to issue. */
27static inline void force_load(char *p)
28{
29 *(volatile char *)p;
30}
31
32/*
33 * Flush and invalidate a VA range that is homed remotely on a single
34 * core (if "!hfh") or homed via hash-for-home (if "hfh"), waiting
35 * until the memory controller holds the flushed values.
36 */
37void finv_buffer_remote(void *buffer, size_t size, int hfh)
38{
39 char *p, *base;
40 size_t step_size, load_count;
41 const unsigned long STRIPE_WIDTH = 8192;
42
43 /*
44 * Flush and invalidate the buffer out of the local L1/L2
45 * and request the home cache to flush and invalidate as well.
46 */
47 __finv_buffer(buffer, size);
48
49 /*
50 * Wait for the home cache to acknowledge that it has processed
51 * all the flush-and-invalidate requests. This does not mean
52 * that the flushed data has reached the memory controller yet,
53 * but it does mean the home cache is processing the flushes.
54 */
55 __insn_mf();
56
57 /*
58 * Issue a load to the last cache line, which can't complete
59 * until all the previously-issued flushes to the same memory
60 * controller have also completed. If we weren't striping
61 * memory, that one load would be sufficient, but since we may
62 * be, we also need to back up to the last load issued to
63 * another memory controller, which would be the point where
64 * we crossed an 8KB boundary (the granularity of striping
65 * across memory controllers). Keep backing up and doing this
66 * until we are before the beginning of the buffer, or have
67 * hit all the controllers.
68 *
69 * If we are flushing a hash-for-home buffer, it's even worse.
70 * Each line may be homed on a different tile, and each tile
71 * may have up to four lines that are on different
72 * controllers. So as we walk backwards, we have to touch
73 * enough cache lines to satisfy these constraints. In
74 * practice this ends up being close enough to "load from
75 * every cache line on a full memory stripe on each
76 * controller" that we simply do that, to simplify the logic.
77 *
78 * FIXME: See bug 9535 for some issues with this code.
79 */
80 if (hfh) {
81 step_size = L2_CACHE_BYTES;
82 load_count = (STRIPE_WIDTH / L2_CACHE_BYTES) *
83 (1 << CHIP_LOG_NUM_MSHIMS());
84 } else {
85 step_size = STRIPE_WIDTH;
86 load_count = (1 << CHIP_LOG_NUM_MSHIMS());
87 }
88
89 /* Load the last byte of the buffer. */
90 p = (char *)buffer + size - 1;
91 force_load(p);
92
93 /* Bump down to the end of the previous stripe or cache line. */
94 p -= step_size;
95 p = (char *)((unsigned long)p | (step_size - 1));
96
97 /* Figure out how far back we need to go. */
98 base = p - (step_size * (load_count - 2));
99 if ((long)base < (long)buffer)
100 base = buffer;
101
102 /*
103 * Fire all the loads we need. The MAF only has eight entries
104 * so we can have at most eight outstanding loads, so we
105 * unroll by that amount.
106 */
107#pragma unroll 8
108 for (; p >= base; p -= step_size)
109 force_load(p);
110
111 /*
112 * Repeat, but with inv's instead of loads, to get rid of the
113 * data we just loaded into our own cache and the old home L3.
114 * No need to unroll since inv's don't target a register.
115 */
116 p = (char *)buffer + size - 1;
117 __insn_inv(p);
118 p -= step_size;
119 p = (char *)((unsigned long)p | (step_size - 1));
120 for (; p >= base; p -= step_size)
121 __insn_inv(p);
122
123 /* Wait for the load+inv's (and thus finvs) to have completed. */
124 __insn_mf();
125}
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
index 5801b03c13ef..cdacdd11d360 100644
--- a/arch/tile/lib/delay.c
+++ b/arch/tile/lib/delay.c
@@ -15,20 +15,31 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/delay.h> 16#include <linux/delay.h>
17#include <linux/thread_info.h> 17#include <linux/thread_info.h>
18#include <asm/fixmap.h> 18#include <asm/timex.h>
19#include <hv/hypervisor.h>
20 19
21void __udelay(unsigned long usecs) 20void __udelay(unsigned long usecs)
22{ 21{
23 hv_nanosleep(usecs * 1000); 22 if (usecs > ULONG_MAX / 1000) {
23 WARN_ON_ONCE(usecs > ULONG_MAX / 1000);
24 usecs = ULONG_MAX / 1000;
25 }
26 __ndelay(usecs * 1000);
24} 27}
25EXPORT_SYMBOL(__udelay); 28EXPORT_SYMBOL(__udelay);
26 29
27void __ndelay(unsigned long nsecs) 30void __ndelay(unsigned long nsecs)
28{ 31{
29 hv_nanosleep(nsecs); 32 cycles_t target = get_cycles();
33 target += ns2cycles(nsecs);
34 while (get_cycles() < target)
35 cpu_relax();
30} 36}
31EXPORT_SYMBOL(__ndelay); 37EXPORT_SYMBOL(__ndelay);
32 38
33/* FIXME: should be declared in a header somewhere. */ 39void __delay(unsigned long cycles)
40{
41 cycles_t target = get_cycles() + cycles;
42 while (get_cycles() < target)
43 cpu_relax();
44}
34EXPORT_SYMBOL(__delay); 45EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
index 1509c5597653..49284fae9d09 100644
--- a/arch/tile/lib/exports.c
+++ b/arch/tile/lib/exports.c
@@ -29,6 +29,9 @@ EXPORT_SYMBOL(__put_user_8);
29EXPORT_SYMBOL(strnlen_user_asm); 29EXPORT_SYMBOL(strnlen_user_asm);
30EXPORT_SYMBOL(strncpy_from_user_asm); 30EXPORT_SYMBOL(strncpy_from_user_asm);
31EXPORT_SYMBOL(clear_user_asm); 31EXPORT_SYMBOL(clear_user_asm);
32EXPORT_SYMBOL(flush_user_asm);
33EXPORT_SYMBOL(inv_user_asm);
34EXPORT_SYMBOL(finv_user_asm);
32 35
33/* arch/tile/kernel/entry.S */ 36/* arch/tile/kernel/entry.S */
34#include <linux/kernel.h> 37#include <linux/kernel.h>
@@ -45,9 +48,6 @@ EXPORT_SYMBOL(__copy_from_user_zeroing);
45EXPORT_SYMBOL(__copy_in_user_inatomic); 48EXPORT_SYMBOL(__copy_in_user_inatomic);
46#endif 49#endif
47 50
48/* arch/tile/lib/mb_incoherent.S */
49EXPORT_SYMBOL(__mb_incoherent);
50
51/* hypervisor glue */ 51/* hypervisor glue */
52#include <hv/hypervisor.h> 52#include <hv/hypervisor.h>
53EXPORT_SYMBOL(hv_dev_open); 53EXPORT_SYMBOL(hv_dev_open);
@@ -85,4 +85,8 @@ int64_t __muldi3(int64_t, int64_t);
85EXPORT_SYMBOL(__muldi3); 85EXPORT_SYMBOL(__muldi3);
86uint64_t __lshrdi3(uint64_t, unsigned int); 86uint64_t __lshrdi3(uint64_t, unsigned int);
87EXPORT_SYMBOL(__lshrdi3); 87EXPORT_SYMBOL(__lshrdi3);
88uint64_t __ashrdi3(uint64_t, unsigned int);
89EXPORT_SYMBOL(__ashrdi3);
90uint64_t __ashldi3(uint64_t, unsigned int);
91EXPORT_SYMBOL(__ashldi3);
88#endif 92#endif
diff --git a/arch/tile/lib/mb_incoherent.S b/arch/tile/lib/mb_incoherent.S
deleted file mode 100644
index 989ad7b68d5a..000000000000
--- a/arch/tile/lib/mb_incoherent.S
+++ /dev/null
@@ -1,34 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 *
14 * Assembly code for invoking the HV's fence_incoherent syscall.
15 */
16
17#include <linux/linkage.h>
18#include <hv/syscall_public.h>
19#include <arch/abi.h>
20#include <arch/chip.h>
21
22#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
23
24/*
25 * Invoke the hypervisor's fence_incoherent syscall, which guarantees
26 * that all victims for cachelines homed on this tile have reached memory.
27 */
28STD_ENTRY(__mb_incoherent)
29 moveli TREG_SYSCALL_NR_NAME, HV_SYS_fence_incoherent
30 swint2
31 jrp lr
32 STD_ENDPROC(__mb_incoherent)
33
34#endif
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
index f7d4a6ad61e8..b2fe15e01075 100644
--- a/arch/tile/lib/memcpy_tile64.c
+++ b/arch/tile/lib/memcpy_tile64.c
@@ -96,7 +96,7 @@ static void memcpy_multicache(void *dest, const void *source,
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); 96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc); 97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc); 98 ptep = pte_offset_kernel(pmdp, newsrc);
99 *ptep = src_pte; /* set_pte() would be confused by this */ 99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101 101
102 /* Actually move the data. */ 102 /* Actually move the data. */
@@ -109,7 +109,7 @@ static void memcpy_multicache(void *dest, const void *source,
109 */ 109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3); 110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */ 111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 *ptep = src_pte; /* set_pte() would be confused by this */ 112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE); 113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114 114
115 /* 115 /*
diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c
index 5cd1c4004eca..cb0999fb64b4 100644
--- a/arch/tile/lib/spinlock_32.c
+++ b/arch/tile/lib/spinlock_32.c
@@ -15,6 +15,7 @@
15#include <linux/spinlock.h> 15#include <linux/spinlock.h>
16#include <linux/module.h> 16#include <linux/module.h>
17#include <asm/processor.h> 17#include <asm/processor.h>
18#include <arch/spr_def.h>
18 19
19#include "spinlock_common.h" 20#include "spinlock_common.h"
20 21
@@ -91,75 +92,75 @@ EXPORT_SYMBOL(arch_spin_unlock_wait);
91#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1) 92#define RD_COUNT_MASK ((1 << RD_COUNT_WIDTH) - 1)
92 93
93 94
94/* Lock the word, spinning until there are no tns-ers. */ 95/*
95static inline u32 get_rwlock(arch_rwlock_t *rwlock) 96 * We can get the read lock if everything but the reader bits (which
96{ 97 * are in the high part of the word) is zero, i.e. no active or
97 u32 iterations = 0; 98 * waiting writers, no tns.
98 for (;;) { 99 *
99 u32 val = __insn_tns((int *)&rwlock->lock); 100 * We guard the tns/store-back with an interrupt critical section to
100 if (unlikely(val & 1)) { 101 * preserve the semantic that the same read lock can be acquired in an
101 delay_backoff(iterations++); 102 * interrupt context.
102 continue; 103 */
103 } 104inline int arch_read_trylock(arch_rwlock_t *rwlock)
104 return val;
105 }
106}
107
108int arch_read_trylock_slow(arch_rwlock_t *rwlock)
109{
110 u32 val = get_rwlock(rwlock);
111 int locked = (val << RD_COUNT_WIDTH) == 0;
112 rwlock->lock = val + (locked << RD_COUNT_SHIFT);
113 return locked;
114}
115EXPORT_SYMBOL(arch_read_trylock_slow);
116
117void arch_read_unlock_slow(arch_rwlock_t *rwlock)
118{
119 u32 val = get_rwlock(rwlock);
120 rwlock->lock = val - (1 << RD_COUNT_SHIFT);
121}
122EXPORT_SYMBOL(arch_read_unlock_slow);
123
124void arch_write_unlock_slow(arch_rwlock_t *rwlock, u32 val)
125{ 105{
126 u32 eq, mask = 1 << WR_CURR_SHIFT; 106 u32 val;
127 while (unlikely(val & 1)) { 107 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
128 /* Limited backoff since we are the highest-priority task. */ 108 val = __insn_tns((int *)&rwlock->lock);
129 relax(4); 109 if (likely((val << _RD_COUNT_WIDTH) == 0)) {
130 val = __insn_tns((int *)&rwlock->lock); 110 val += 1 << RD_COUNT_SHIFT;
111 rwlock->lock = val;
112 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
113 BUG_ON(val == 0); /* we don't expect wraparound */
114 return 1;
131 } 115 }
132 val = __insn_addb(val, mask); 116 if ((val & 1) == 0)
133 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT)); 117 rwlock->lock = val;
134 val = __insn_mz(eq & mask, val); 118 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
135 rwlock->lock = val; 119 return 0;
136} 120}
137EXPORT_SYMBOL(arch_write_unlock_slow); 121EXPORT_SYMBOL(arch_read_trylock);
138 122
139/* 123/*
140 * We spin until everything but the reader bits (which are in the high 124 * Spin doing arch_read_trylock() until we acquire the lock.
141 * part of the word) are zero, i.e. no active or waiting writers, no tns.
142 *
143 * ISSUE: This approach can permanently starve readers. A reader who sees 125 * ISSUE: This approach can permanently starve readers. A reader who sees
144 * a writer could instead take a ticket lock (just like a writer would), 126 * a writer could instead take a ticket lock (just like a writer would),
145 * and atomically enter read mode (with 1 reader) when it gets the ticket. 127 * and atomically enter read mode (with 1 reader) when it gets the ticket.
146 * This way both readers and writers will always make forward progress 128 * This way both readers and writers would always make forward progress
147 * in a finite time. 129 * in a finite time.
148 */ 130 */
149void arch_read_lock_slow(arch_rwlock_t *rwlock, u32 val) 131void arch_read_lock(arch_rwlock_t *rwlock)
150{ 132{
151 u32 iterations = 0; 133 u32 iterations = 0;
152 do { 134 while (unlikely(!arch_read_trylock(rwlock)))
153 if (!(val & 1))
154 rwlock->lock = val;
155 delay_backoff(iterations++); 135 delay_backoff(iterations++);
136}
137EXPORT_SYMBOL(arch_read_lock);
138
139void arch_read_unlock(arch_rwlock_t *rwlock)
140{
141 u32 val, iterations = 0;
142
143 mb(); /* guarantee anything modified under the lock is visible */
144 for (;;) {
145 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1);
156 val = __insn_tns((int *)&rwlock->lock); 146 val = __insn_tns((int *)&rwlock->lock);
157 } while ((val << RD_COUNT_WIDTH) != 0); 147 if (likely(val & 1) == 0) {
158 rwlock->lock = val + (1 << RD_COUNT_SHIFT); 148 rwlock->lock = val - (1 << _RD_COUNT_SHIFT);
149 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
150 break;
151 }
152 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0);
153 delay_backoff(iterations++);
154 }
159} 155}
160EXPORT_SYMBOL(arch_read_lock_slow); 156EXPORT_SYMBOL(arch_read_unlock);
161 157
162void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) 158/*
159 * We don't need an interrupt critical section here (unlike for
160 * arch_read_lock) since we should never use a bare write lock where
161 * it could be interrupted by code that could try to re-acquire it.
162 */
163void arch_write_lock(arch_rwlock_t *rwlock)
163{ 164{
164 /* 165 /*
165 * The trailing underscore on this variable (and curr_ below) 166 * The trailing underscore on this variable (and curr_ below)
@@ -168,6 +169,12 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
168 */ 169 */
169 u32 my_ticket_; 170 u32 my_ticket_;
170 u32 iterations = 0; 171 u32 iterations = 0;
172 u32 val = __insn_tns((int *)&rwlock->lock);
173
174 if (likely(val == 0)) {
175 rwlock->lock = 1 << _WR_NEXT_SHIFT;
176 return;
177 }
171 178
172 /* 179 /*
173 * Wait until there are no readers, then bump up the next 180 * Wait until there are no readers, then bump up the next
@@ -206,23 +213,47 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val)
206 relax(4); 213 relax(4);
207 } 214 }
208} 215}
209EXPORT_SYMBOL(arch_write_lock_slow); 216EXPORT_SYMBOL(arch_write_lock);
210 217
211int __tns_atomic_acquire(atomic_t *lock) 218int arch_write_trylock(arch_rwlock_t *rwlock)
212{ 219{
213 int ret; 220 u32 val = __insn_tns((int *)&rwlock->lock);
214 u32 iterations = 0;
215 221
216 BUG_ON(__insn_mfspr(SPR_INTERRUPT_CRITICAL_SECTION)); 222 /*
217 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 1); 223 * If a tns is in progress, or there's a waiting or active locker,
224 * or active readers, we can't take the lock, so give up.
225 */
226 if (unlikely(val != 0)) {
227 if (!(val & 1))
228 rwlock->lock = val;
229 return 0;
230 }
218 231
219 while ((ret = __insn_tns((void *)&lock->counter)) == 1) 232 /* Set the "next" field to mark it locked. */
220 delay_backoff(iterations++); 233 rwlock->lock = 1 << _WR_NEXT_SHIFT;
221 return ret; 234 return 1;
222} 235}
236EXPORT_SYMBOL(arch_write_trylock);
223 237
224void __tns_atomic_release(atomic_t *p, int v) 238void arch_write_unlock(arch_rwlock_t *rwlock)
225{ 239{
226 p->counter = v; 240 u32 val, eq, mask;
227 __insn_mtspr(SPR_INTERRUPT_CRITICAL_SECTION, 0); 241
242 mb(); /* guarantee anything modified under the lock is visible */
243 val = __insn_tns((int *)&rwlock->lock);
244 if (likely(val == (1 << _WR_NEXT_SHIFT))) {
245 rwlock->lock = 0;
246 return;
247 }
248 while (unlikely(val & 1)) {
249 /* Limited backoff since we are the highest-priority task. */
250 relax(4);
251 val = __insn_tns((int *)&rwlock->lock);
252 }
253 mask = 1 << WR_CURR_SHIFT;
254 val = __insn_addb(val, mask);
255 eq = __insn_seqb(val, val << (WR_CURR_SHIFT - WR_NEXT_SHIFT));
256 val = __insn_mz(eq & mask, val);
257 rwlock->lock = val;
228} 258}
259EXPORT_SYMBOL(arch_write_unlock);
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index dcebfc831cd6..758f597f488c 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -655,14 +655,6 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num,
655 } 655 }
656 656
657 /* 657 /*
658 * NOTE: the one other type of access that might bring us here
659 * are the memory ops in __tns_atomic_acquire/__tns_atomic_release,
660 * but we don't have to check specially for them since we can
661 * always safely return to the address of the fault and retry,
662 * since no separate atomic locks are involved.
663 */
664
665 /*
666 * Now that we have released the atomic lock (if necessary), 658 * Now that we have released the atomic lock (if necessary),
667 * it's safe to spin if the PTE that caused the fault was migrating. 659 * it's safe to spin if the PTE that caused the fault was migrating.
668 */ 660 */
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index d78df3a6ee15..cbe6f4f9eca3 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -179,23 +179,46 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control,
179 panic("Unsafe to continue."); 179 panic("Unsafe to continue.");
180} 180}
181 181
182void flush_remote_page(struct page *page, int order)
183{
184 int i, pages = (1 << order);
185 for (i = 0; i < pages; ++i, ++page) {
186 void *p = kmap_atomic(page);
187 int hfh = 0;
188 int home = page_home(page);
189#if CHIP_HAS_CBOX_HOME_MAP()
190 if (home == PAGE_HOME_HASH)
191 hfh = 1;
192 else
193#endif
194 BUG_ON(home < 0 || home >= NR_CPUS);
195 finv_buffer_remote(p, PAGE_SIZE, hfh);
196 kunmap_atomic(p);
197 }
198}
199
182void homecache_evict(const struct cpumask *mask) 200void homecache_evict(const struct cpumask *mask)
183{ 201{
184 flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0); 202 flush_remote(0, HV_FLUSH_EVICT_L2, mask, 0, 0, 0, NULL, NULL, 0);
185} 203}
186 204
187/* Return a mask of the cpus whose caches currently own these pages. */ 205/*
188static void homecache_mask(struct page *page, int pages, 206 * Return a mask of the cpus whose caches currently own these pages.
189 struct cpumask *home_mask) 207 * The return value is whether the pages are all coherently cached
208 * (i.e. none are immutable, incoherent, or uncached).
209 */
210static int homecache_mask(struct page *page, int pages,
211 struct cpumask *home_mask)
190{ 212{
191 int i; 213 int i;
214 int cached_coherently = 1;
192 cpumask_clear(home_mask); 215 cpumask_clear(home_mask);
193 for (i = 0; i < pages; ++i) { 216 for (i = 0; i < pages; ++i) {
194 int home = page_home(&page[i]); 217 int home = page_home(&page[i]);
195 if (home == PAGE_HOME_IMMUTABLE || 218 if (home == PAGE_HOME_IMMUTABLE ||
196 home == PAGE_HOME_INCOHERENT) { 219 home == PAGE_HOME_INCOHERENT) {
197 cpumask_copy(home_mask, cpu_possible_mask); 220 cpumask_copy(home_mask, cpu_possible_mask);
198 return; 221 return 0;
199 } 222 }
200#if CHIP_HAS_CBOX_HOME_MAP() 223#if CHIP_HAS_CBOX_HOME_MAP()
201 if (home == PAGE_HOME_HASH) { 224 if (home == PAGE_HOME_HASH) {
@@ -203,11 +226,14 @@ static void homecache_mask(struct page *page, int pages,
203 continue; 226 continue;
204 } 227 }
205#endif 228#endif
206 if (home == PAGE_HOME_UNCACHED) 229 if (home == PAGE_HOME_UNCACHED) {
230 cached_coherently = 0;
207 continue; 231 continue;
232 }
208 BUG_ON(home < 0 || home >= NR_CPUS); 233 BUG_ON(home < 0 || home >= NR_CPUS);
209 cpumask_set_cpu(home, home_mask); 234 cpumask_set_cpu(home, home_mask);
210 } 235 }
236 return cached_coherently;
211} 237}
212 238
213/* 239/*
@@ -386,7 +412,7 @@ void homecache_change_page_home(struct page *page, int order, int home)
386 pte_t *ptep = virt_to_pte(NULL, kva); 412 pte_t *ptep = virt_to_pte(NULL, kva);
387 pte_t pteval = *ptep; 413 pte_t pteval = *ptep;
388 BUG_ON(!pte_present(pteval) || pte_huge(pteval)); 414 BUG_ON(!pte_present(pteval) || pte_huge(pteval));
389 *ptep = pte_set_home(pteval, home); 415 __set_pte(ptep, pte_set_home(pteval, home));
390 } 416 }
391} 417}
392 418
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index 0b9ce69b0ee5..d6e87fda2fb2 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -53,22 +53,11 @@
53 53
54#include "migrate.h" 54#include "migrate.h"
55 55
56/*
57 * We could set FORCE_MAX_ZONEORDER to "(HPAGE_SHIFT - PAGE_SHIFT + 1)"
58 * in the Tile Kconfig, but this generates configure warnings.
59 * Do it here and force people to get it right to compile this file.
60 * The problem is that with 4KB small pages and 16MB huge pages,
61 * the default value doesn't allow us to group enough small pages
62 * together to make up a huge page.
63 */
64#if CONFIG_FORCE_MAX_ZONEORDER < HPAGE_SHIFT - PAGE_SHIFT + 1
65# error "Change FORCE_MAX_ZONEORDER in arch/tile/Kconfig to match page size"
66#endif
67
68#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0)) 56#define clear_pgd(pmdptr) (*(pmdptr) = hv_pte(0))
69 57
70#ifndef __tilegx__ 58#ifndef __tilegx__
71unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE; 59unsigned long VMALLOC_RESERVE = CONFIG_VMALLOC_RESERVE;
60EXPORT_SYMBOL(VMALLOC_RESERVE);
72#endif 61#endif
73 62
74DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); 63DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -445,7 +434,7 @@ static pmd_t *__init get_pmd(pgd_t pgtables[], unsigned long va)
445 434
446/* Temporary page table we use for staging. */ 435/* Temporary page table we use for staging. */
447static pgd_t pgtables[PTRS_PER_PGD] 436static pgd_t pgtables[PTRS_PER_PGD]
448 __attribute__((section(".init.page"))); 437 __attribute__((aligned(HV_PAGE_TABLE_ALIGN)));
449 438
450/* 439/*
451 * This maps the physical memory to kernel virtual address space, a total 440 * This maps the physical memory to kernel virtual address space, a total
@@ -653,6 +642,17 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
653 memcpy(pgd_base, pgtables, sizeof(pgtables)); 642 memcpy(pgd_base, pgtables, sizeof(pgtables));
654 __install_page_table(pgd_base, __get_cpu_var(current_asid), 643 __install_page_table(pgd_base, __get_cpu_var(current_asid),
655 swapper_pgprot); 644 swapper_pgprot);
645
646 /*
647 * We just read swapper_pgprot and thus brought it into the cache,
648 * with its new home & caching mode. When we start the other CPUs,
649 * they're going to reference swapper_pgprot via their initial fake
650 * VA-is-PA mappings, which cache everything locally. At that
651 * time, if it's in our cache with a conflicting home, the
652 * simulator's coherence checker will complain. So, flush it out
653 * of our cache; we're not going to ever use it again anyway.
654 */
655 __insn_finv(&swapper_pgprot);
656} 656}
657 657
658/* 658/*
@@ -950,11 +950,7 @@ struct kmem_cache *pgd_cache;
950 950
951void __init pgtable_cache_init(void) 951void __init pgtable_cache_init(void)
952{ 952{
953 pgd_cache = kmem_cache_create("pgd", 953 pgd_cache = kmem_cache_create("pgd", SIZEOF_PGD, SIZEOF_PGD, 0, NULL);
954 PTRS_PER_PGD*sizeof(pgd_t),
955 PTRS_PER_PGD*sizeof(pgd_t),
956 0,
957 NULL);
958 if (!pgd_cache) 954 if (!pgd_cache)
959 panic("pgtable_cache_init(): Cannot create pgd cache"); 955 panic("pgtable_cache_init(): Cannot create pgd cache");
960} 956}
@@ -989,7 +985,7 @@ static long __write_once initfree = 1;
989static int __init set_initfree(char *str) 985static int __init set_initfree(char *str)
990{ 986{
991 long val; 987 long val;
992 if (strict_strtol(str, 0, &val)) { 988 if (strict_strtol(str, 0, &val) == 0) {
993 initfree = val; 989 initfree = val;
994 pr_info("initfree: %s free init pages\n", 990 pr_info("initfree: %s free init pages\n",
995 initfree ? "will" : "won't"); 991 initfree ? "will" : "won't");
diff --git a/arch/tile/mm/migrate_32.S b/arch/tile/mm/migrate_32.S
index f738765cd1e6..ac01a7cdf77f 100644
--- a/arch/tile/mm/migrate_32.S
+++ b/arch/tile/mm/migrate_32.S
@@ -18,6 +18,7 @@
18#include <linux/linkage.h> 18#include <linux/linkage.h>
19#include <linux/threads.h> 19#include <linux/threads.h>
20#include <asm/page.h> 20#include <asm/page.h>
21#include <asm/thread_info.h>
21#include <asm/types.h> 22#include <asm/types.h>
22#include <asm/asm-offsets.h> 23#include <asm/asm-offsets.h>
23#include <hv/hypervisor.h> 24#include <hv/hypervisor.h>
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 1f5430c53d0d..1a2b36f8866d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
142} 142}
143#endif 143#endif
144 144
145/**
146 * shatter_huge_page() - ensure a given address is mapped by a small page.
147 *
148 * This function converts a huge PTE mapping kernel LOWMEM into a bunch
149 * of small PTEs with the same caching. No cache flush required, but we
150 * must do a global TLB flush.
151 *
152 * Any caller that wishes to modify a kernel mapping that might
153 * have been made with a huge page should call this function,
154 * since doing so properly avoids race conditions with installing the
155 * newly-shattered page and then flushing all the TLB entries.
156 *
157 * @addr: Address at which to shatter any existing huge page.
158 */
159void shatter_huge_page(unsigned long addr)
160{
161 pgd_t *pgd;
162 pud_t *pud;
163 pmd_t *pmd;
164 unsigned long flags = 0; /* happy compiler */
165#ifdef __PAGETABLE_PMD_FOLDED
166 struct list_head *pos;
167#endif
168
169 /* Get a pointer to the pmd entry that we need to change. */
170 addr &= HPAGE_MASK;
171 BUG_ON(pgd_addr_invalid(addr));
172 BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
173 pgd = swapper_pg_dir + pgd_index(addr);
174 pud = pud_offset(pgd, addr);
175 BUG_ON(!pud_present(*pud));
176 pmd = pmd_offset(pud, addr);
177 BUG_ON(!pmd_present(*pmd));
178 if (!pmd_huge_page(*pmd))
179 return;
180
181 /*
182 * Grab the pgd_lock, since we may need it to walk the pgd_list,
183 * and since we need some kind of lock here to avoid races.
184 */
185 spin_lock_irqsave(&pgd_lock, flags);
186 if (!pmd_huge_page(*pmd)) {
187 /* Lost the race to convert the huge page. */
188 spin_unlock_irqrestore(&pgd_lock, flags);
189 return;
190 }
191
192 /* Shatter the huge page into the preallocated L2 page table. */
193 pmd_populate_kernel(&init_mm, pmd,
194 get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
195
196#ifdef __PAGETABLE_PMD_FOLDED
197 /* Walk every pgd on the system and update the pmd there. */
198 list_for_each(pos, &pgd_list) {
199 pmd_t *copy_pmd;
200 pgd = list_to_pgd(pos) + pgd_index(addr);
201 pud = pud_offset(pgd, addr);
202 copy_pmd = pmd_offset(pud, addr);
203 __set_pmd(copy_pmd, *pmd);
204 }
205#endif
206
207 /* Tell every cpu to notice the change. */
208 flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
209 cpu_possible_mask, NULL, 0);
210
211 /* Hold the lock until the TLB flush is finished to avoid races. */
212 spin_unlock_irqrestore(&pgd_lock, flags);
213}
214
145/* 215/*
146 * List of all pgd's needed so it can invalidate entries in both cached 216 * List of all pgd's needed so it can invalidate entries in both cached
147 * and uncached pgd's. This is essentially codepath-based locking 217 * and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
184 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); 254 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
185#endif 255#endif
186 256
187 clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, 257 memcpy(pgd + KERNEL_PGD_INDEX_START,
188 swapper_pg_dir + KERNEL_PGD_INDEX_START, 258 swapper_pg_dir + KERNEL_PGD_INDEX_START,
189 KERNEL_PGD_PTRS); 259 KERNEL_PGD_PTRS * sizeof(pgd_t));
190 260
191 pgd_list_add(pgd); 261 pgd_list_add(pgd);
192 spin_unlock_irqrestore(&pgd_lock, flags); 262 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
220 290
221struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 291struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
222{ 292{
223 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; 293 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
224 struct page *p; 294 struct page *p;
295#if L2_USER_PGTABLE_ORDER > 0
296 int i;
297#endif
225 298
226#ifdef CONFIG_HIGHPTE 299#ifdef CONFIG_HIGHPTE
227 flags |= __GFP_HIGHMEM; 300 flags |= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
231 if (p == NULL) 304 if (p == NULL)
232 return NULL; 305 return NULL;
233 306
307#if L2_USER_PGTABLE_ORDER > 0
308 /*
309 * Make every page have a page_count() of one, not just the first.
310 * We don't use __GFP_COMP since it doesn't look like it works
311 * correctly with tlb_remove_page().
312 */
313 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
314 init_page_count(p+i);
315 inc_zone_page_state(p+i, NR_PAGETABLE);
316 }
317#endif
318
234 pgtable_page_ctor(p); 319 pgtable_page_ctor(p);
235 return p; 320 return p;
236} 321}
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
242 */ 327 */
243void pte_free(struct mm_struct *mm, struct page *p) 328void pte_free(struct mm_struct *mm, struct page *p)
244{ 329{
330 int i;
331
245 pgtable_page_dtor(p); 332 pgtable_page_dtor(p);
246 __free_pages(p, L2_USER_PGTABLE_ORDER); 333 __free_page(p);
334
335 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
336 __free_page(p+i);
337 dec_zone_page_state(p+i, NR_PAGETABLE);
338 }
247} 339}
248 340
249void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, 341void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -252,18 +344,11 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
252 int i; 344 int i;
253 345
254 pgtable_page_dtor(pte); 346 pgtable_page_dtor(pte);
255 tlb->need_flush = 1; 347 tlb_remove_page(tlb, pte);
256 if (tlb_fast_mode(tlb)) { 348
257 struct page *pte_pages[L2_USER_PGTABLE_PAGES]; 349 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
258 for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) 350 tlb_remove_page(tlb, pte + i);
259 pte_pages[i] = pte + i; 351 dec_zone_page_state(pte + i, NR_PAGETABLE);
260 free_pages_and_swap_cache(pte_pages, L2_USER_PGTABLE_PAGES);
261 return;
262 }
263 for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) {
264 tlb->pages[tlb->nr++] = pte + i;
265 if (tlb->nr >= FREE_PTE_NR)
266 tlb_flush_mmu(tlb, 0, 0);
267 } 352 }
268} 353}
269 354
@@ -346,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
346 return x + y * smp_width; 431 return x + y * smp_width;
347} 432}
348 433
349void set_pte_order(pte_t *ptep, pte_t pte, int order) 434/*
435 * Convert a kernel VA to a PA and homing information.
436 */
437int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
350{ 438{
351 unsigned long pfn = pte_pfn(pte); 439 struct page *page = virt_to_page(va);
352 struct page *page = pfn_to_page(pfn); 440 pte_t null_pte = { 0 };
353 441
354 /* Update the home of a PTE if necessary */ 442 *cpa = __pa(va);
355 pte = pte_set_home(pte, page_home(page));
356 443
444 /* Note that this is not writing a page table, just returning a pte. */
445 *pte = pte_set_home(null_pte, page_home(page));
446
447 return 0; /* return non-zero if not hfh? */
448}
449EXPORT_SYMBOL(va_to_cpa_and_pte);
450
451void __set_pte(pte_t *ptep, pte_t pte)
452{
357#ifdef __tilegx__ 453#ifdef __tilegx__
358 *ptep = pte; 454 *ptep = pte;
359#else 455#else
360 /* 456# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
361 * When setting a PTE, write the high bits first, then write 457# error Must write the present and migrating bits last
362 * the low bits. This sets the "present" bit only after the 458# endif
363 * other bits are in place. If a particular PTE update 459 if (pte_present(pte)) {
364 * involves transitioning from one valid PTE to another, it 460 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
365 * may be necessary to call set_pte_order() more than once, 461 barrier();
366 * transitioning via a suitable intermediate state. 462 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
367 * Note that this sequence also means that if we are transitioning 463 } else {
368 * from any migrating PTE to a non-migrating one, we will not 464 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
369 * see a half-updated PTE with the migrating bit off. 465 barrier();
370 */ 466 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
371#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 467 }
372# error Must write the present and migrating bits last 468#endif /* __tilegx__ */
373#endif 469}
374 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); 470
375 barrier(); 471void set_pte(pte_t *ptep, pte_t pte)
376 ((u32 *)ptep)[0] = (u32)(pte_val(pte)); 472{
377#endif 473 struct page *page = pfn_to_page(pte_pfn(pte));
474
475 /* Update the home of a PTE if necessary */
476 pte = pte_set_home(pte, page_home(page));
477
478 __set_pte(ptep, pte);
378} 479}
379 480
380/* Can this mm load a PTE with cached_priority set? */ 481/* Can this mm load a PTE with cached_priority set? */
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index fe70a341bd8b..fac1a2002e67 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -7,7 +7,7 @@
7menuconfig EDAC 7menuconfig EDAC
8 bool "EDAC (Error Detection And Correction) reporting" 8 bool "EDAC (Error Detection And Correction) reporting"
9 depends on HAS_IOMEM 9 depends on HAS_IOMEM
10 depends on X86 || PPC 10 depends on X86 || PPC || TILE
11 help 11 help
12 EDAC is designed to report errors in the core system. 12 EDAC is designed to report errors in the core system.
13 These are low-level errors that are reported in the CPU or 13 These are low-level errors that are reported in the CPU or
@@ -282,4 +282,12 @@ config EDAC_CPC925
282 a companion chip to the PowerPC 970 family of 282 a companion chip to the PowerPC 970 family of
283 processors. 283 processors.
284 284
285config EDAC_TILE
286 tristate "Tilera Memory Controller"
287 depends on EDAC_MM_EDAC && TILE
288 default y
289 help
290 Support for error detection and correction on the
291 Tilera memory controller.
292
285endif # EDAC 293endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index ba2898b3639b..3e239133e29e 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -54,3 +54,4 @@ obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o
54obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o 54obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o
55obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o 55obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o
56 56
57obj-$(CONFIG_EDAC_TILE) += tile_edac.o
diff --git a/drivers/edac/tile_edac.c b/drivers/edac/tile_edac.c
new file mode 100644
index 000000000000..1d5cf06f6c6b
--- /dev/null
+++ b/drivers/edac/tile_edac.c
@@ -0,0 +1,254 @@
1/*
2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 * Tilera-specific EDAC driver.
14 *
15 * This source code is derived from the following driver:
16 *
17 * Cell MIC driver for ECC counting
18 *
19 * Copyright 2007 Benjamin Herrenschmidt, IBM Corp.
20 * <benh@kernel.crashing.org>
21 *
22 */
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/platform_device.h>
27#include <linux/io.h>
28#include <linux/uaccess.h>
29#include <linux/edac.h>
30#include <hv/hypervisor.h>
31#include <hv/drv_mshim_intf.h>
32
33#include "edac_core.h"
34
35#define DRV_NAME "tile-edac"
36
37/* Number of cs_rows needed per memory controller on TILEPro. */
38#define TILE_EDAC_NR_CSROWS 1
39
40/* Number of channels per memory controller on TILEPro. */
41#define TILE_EDAC_NR_CHANS 1
42
43/* Granularity of reported error in bytes on TILEPro. */
44#define TILE_EDAC_ERROR_GRAIN 8
45
46/* TILE processor has multiple independent memory controllers. */
47struct platform_device *mshim_pdev[TILE_MAX_MSHIMS];
48
49struct tile_edac_priv {
50 int hv_devhdl; /* Hypervisor device handle. */
51 int node; /* Memory controller instance #. */
52 unsigned int ce_count; /*
53 * Correctable-error counter
54 * kept by the driver.
55 */
56};
57
58static void tile_edac_check(struct mem_ctl_info *mci)
59{
60 struct tile_edac_priv *priv = mci->pvt_info;
61 struct mshim_mem_error mem_error;
62
63 if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_error,
64 sizeof(struct mshim_mem_error), MSHIM_MEM_ERROR_OFF) !=
65 sizeof(struct mshim_mem_error)) {
66 pr_err(DRV_NAME ": MSHIM_MEM_ERROR_OFF pread failure.\n");
67 return;
68 }
69
70 /* Check if the current error count is different from the saved one. */
71 if (mem_error.sbe_count != priv->ce_count) {
72 dev_dbg(mci->dev, "ECC CE err on node %d\n", priv->node);
73 priv->ce_count = mem_error.sbe_count;
74 edac_mc_handle_ce(mci, 0, 0, 0, 0, 0, mci->ctl_name);
75 }
76}
77
78/*
79 * Initialize the 'csrows' table within the mci control structure with the
80 * addressing of memory.
81 */
82static int __devinit tile_edac_init_csrows(struct mem_ctl_info *mci)
83{
84 struct csrow_info *csrow = &mci->csrows[0];
85 struct tile_edac_priv *priv = mci->pvt_info;
86 struct mshim_mem_info mem_info;
87
88 if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&mem_info,
89 sizeof(struct mshim_mem_info), MSHIM_MEM_INFO_OFF) !=
90 sizeof(struct mshim_mem_info)) {
91 pr_err(DRV_NAME ": MSHIM_MEM_INFO_OFF pread failure.\n");
92 return -1;
93 }
94
95 if (mem_info.mem_ecc)
96 csrow->edac_mode = EDAC_SECDED;
97 else
98 csrow->edac_mode = EDAC_NONE;
99 switch (mem_info.mem_type) {
100 case DDR2:
101 csrow->mtype = MEM_DDR2;
102 break;
103
104 case DDR3:
105 csrow->mtype = MEM_DDR3;
106 break;
107
108 default:
109 return -1;
110 }
111
112 csrow->first_page = 0;
113 csrow->nr_pages = mem_info.mem_size >> PAGE_SHIFT;
114 csrow->last_page = csrow->first_page + csrow->nr_pages - 1;
115 csrow->grain = TILE_EDAC_ERROR_GRAIN;
116 csrow->dtype = DEV_UNKNOWN;
117
118 return 0;
119}
120
121static int __devinit tile_edac_mc_probe(struct platform_device *pdev)
122{
123 char hv_file[32];
124 int hv_devhdl;
125 struct mem_ctl_info *mci;
126 struct tile_edac_priv *priv;
127 int rc;
128
129 sprintf(hv_file, "mshim/%d", pdev->id);
130 hv_devhdl = hv_dev_open((HV_VirtAddr)hv_file, 0);
131 if (hv_devhdl < 0)
132 return -EINVAL;
133
134 /* A TILE MC has a single channel and one chip-select row. */
135 mci = edac_mc_alloc(sizeof(struct tile_edac_priv),
136 TILE_EDAC_NR_CSROWS, TILE_EDAC_NR_CHANS, pdev->id);
137 if (mci == NULL)
138 return -ENOMEM;
139 priv = mci->pvt_info;
140 priv->node = pdev->id;
141 priv->hv_devhdl = hv_devhdl;
142
143 mci->dev = &pdev->dev;
144 mci->mtype_cap = MEM_FLAG_DDR2;
145 mci->edac_ctl_cap = EDAC_FLAG_SECDED;
146
147 mci->mod_name = DRV_NAME;
148 mci->ctl_name = "TILEPro_Memory_Controller";
149 mci->dev_name = dev_name(&pdev->dev);
150 mci->edac_check = tile_edac_check;
151
152 /*
153 * Initialize the MC control structure 'csrows' table
154 * with the mapping and control information.
155 */
156 if (tile_edac_init_csrows(mci)) {
157 /* No csrows found. */
158 mci->edac_cap = EDAC_FLAG_NONE;
159 } else {
160 mci->edac_cap = EDAC_FLAG_SECDED;
161 }
162
163 platform_set_drvdata(pdev, mci);
164
165 /* Register with EDAC core */
166 rc = edac_mc_add_mc(mci);
167 if (rc) {
168 dev_err(&pdev->dev, "failed to register with EDAC core\n");
169 edac_mc_free(mci);
170 return rc;
171 }
172
173 return 0;
174}
175
176static int __devexit tile_edac_mc_remove(struct platform_device *pdev)
177{
178 struct mem_ctl_info *mci = platform_get_drvdata(pdev);
179
180 edac_mc_del_mc(&pdev->dev);
181 if (mci)
182 edac_mc_free(mci);
183 return 0;
184}
185
186static struct platform_driver tile_edac_mc_driver = {
187 .driver = {
188 .name = DRV_NAME,
189 .owner = THIS_MODULE,
190 },
191 .probe = tile_edac_mc_probe,
192 .remove = __devexit_p(tile_edac_mc_remove),
193};
194
195/*
196 * Driver init routine.
197 */
198static int __init tile_edac_init(void)
199{
200 char hv_file[32];
201 struct platform_device *pdev;
202 int i, err, num = 0;
203
204 /* Only support POLL mode. */
205 edac_op_state = EDAC_OPSTATE_POLL;
206
207 err = platform_driver_register(&tile_edac_mc_driver);
208 if (err)
209 return err;
210
211 for (i = 0; i < TILE_MAX_MSHIMS; i++) {
212 /*
213 * Not all memory controllers are configured such as in the
214 * case of a simulator. So we register only those mshims
215 * that are configured by the hypervisor.
216 */
217 sprintf(hv_file, "mshim/%d", i);
218 if (hv_dev_open((HV_VirtAddr)hv_file, 0) < 0)
219 continue;
220
221 pdev = platform_device_register_simple(DRV_NAME, i, NULL, 0);
222 if (IS_ERR(pdev))
223 continue;
224 mshim_pdev[i] = pdev;
225 num++;
226 }
227
228 if (num == 0) {
229 platform_driver_unregister(&tile_edac_mc_driver);
230 return -ENODEV;
231 }
232 return 0;
233}
234
235/*
236 * Driver cleanup routine.
237 */
238static void __exit tile_edac_exit(void)
239{
240 int i;
241
242 for (i = 0; i < TILE_MAX_MSHIMS; i++) {
243 struct platform_device *pdev = mshim_pdev[i];
244 if (!pdev)
245 continue;
246
247 platform_set_drvdata(pdev, NULL);
248 platform_device_unregister(pdev);
249 }
250 platform_driver_unregister(&tile_edac_mc_driver);
251}
252
253module_init(tile_edac_init);
254module_exit(tile_edac_exit);
diff --git a/drivers/net/tile/tilepro.c b/drivers/net/tile/tilepro.c
index 7cb301da7474..0825db6d883f 100644
--- a/drivers/net/tile/tilepro.c
+++ b/drivers/net/tile/tilepro.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved. 2 * Copyright 2011 Tilera Corporation. All Rights Reserved.
3 * 3 *
4 * This program is free software; you can redistribute it and/or 4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License 5 * modify it under the terms of the GNU General Public License
@@ -44,10 +44,6 @@
44#include <linux/tcp.h> 44#include <linux/tcp.h>
45 45
46 46
47/* There is no singlethread_cpu, so schedule work on the current cpu. */
48#define singlethread_cpu -1
49
50
51/* 47/*
52 * First, "tile_net_init_module()" initializes all four "devices" which 48 * First, "tile_net_init_module()" initializes all four "devices" which
53 * can be used by linux. 49 * can be used by linux.
@@ -73,15 +69,16 @@
73 * return, knowing we will be called again later. Otherwise, we 69 * return, knowing we will be called again later. Otherwise, we
74 * reenable the ingress interrupt, and call "napi_complete()". 70 * reenable the ingress interrupt, and call "napi_complete()".
75 * 71 *
72 * HACK: Since disabling the ingress interrupt is not reliable, we
73 * ignore the interrupt if the global "active" flag is false.
74 *
76 * 75 *
77 * NOTE: The use of "native_driver" ensures that EPP exists, and that 76 * NOTE: The use of "native_driver" ensures that EPP exists, and that
78 * "epp_sendv" is legal, and that "LIPP" is being used. 77 * we are using "LIPP" and "LEPP".
79 * 78 *
80 * NOTE: Failing to free completions for an arbitrarily long time 79 * NOTE: Failing to free completions for an arbitrarily long time
81 * (which is defined to be illegal) does in fact cause bizarre 80 * (which is defined to be illegal) does in fact cause bizarre
82 * problems. The "egress_timer" helps prevent this from happening. 81 * problems. The "egress_timer" helps prevent this from happening.
83 *
84 * NOTE: The egress code can be interrupted by the interrupt handler.
85 */ 82 */
86 83
87 84
@@ -142,6 +139,7 @@
142MODULE_AUTHOR("Tilera"); 139MODULE_AUTHOR("Tilera");
143MODULE_LICENSE("GPL"); 140MODULE_LICENSE("GPL");
144 141
142
145/* 143/*
146 * Queue of incoming packets for a specific cpu and device. 144 * Queue of incoming packets for a specific cpu and device.
147 * 145 *
@@ -177,7 +175,7 @@ struct tile_net_cpu {
177 struct tile_netio_queue queue; 175 struct tile_netio_queue queue;
178 /* Statistics. */ 176 /* Statistics. */
179 struct tile_net_stats_t stats; 177 struct tile_net_stats_t stats;
180 /* ISSUE: Is this needed? */ 178 /* True iff NAPI is enabled. */
181 bool napi_enabled; 179 bool napi_enabled;
182 /* True if this tile has succcessfully registered with the IPP. */ 180 /* True if this tile has succcessfully registered with the IPP. */
183 bool registered; 181 bool registered;
@@ -200,20 +198,20 @@ struct tile_net_cpu {
200struct tile_net_priv { 198struct tile_net_priv {
201 /* Our network device. */ 199 /* Our network device. */
202 struct net_device *dev; 200 struct net_device *dev;
203 /* The actual egress queue. */ 201 /* Pages making up the egress queue. */
204 lepp_queue_t *epp_queue; 202 struct page *eq_pages;
205 /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ 203 /* Address of the actual egress queue. */
206 spinlock_t cmd_lock; 204 lepp_queue_t *eq;
207 /* Protects "epp_queue->comp_head". */ 205 /* Protects "eq". */
208 spinlock_t comp_lock; 206 spinlock_t eq_lock;
209 /* The hypervisor handle for this interface. */ 207 /* The hypervisor handle for this interface. */
210 int hv_devhdl; 208 int hv_devhdl;
211 /* The intr bit mask that IDs this device. */ 209 /* The intr bit mask that IDs this device. */
212 u32 intr_id; 210 u32 intr_id;
213 /* True iff "tile_net_open_aux()" has succeeded. */ 211 /* True iff "tile_net_open_aux()" has succeeded. */
214 int partly_opened; 212 bool partly_opened;
215 /* True iff "tile_net_open_inner()" has succeeded. */ 213 /* True iff the device is "active". */
216 int fully_opened; 214 bool active;
217 /* Effective network cpus. */ 215 /* Effective network cpus. */
218 struct cpumask network_cpus_map; 216 struct cpumask network_cpus_map;
219 /* Number of network cpus. */ 217 /* Number of network cpus. */
@@ -228,6 +226,10 @@ struct tile_net_priv {
228 struct tile_net_cpu *cpu[NR_CPUS]; 226 struct tile_net_cpu *cpu[NR_CPUS];
229}; 227};
230 228
229/* Log2 of the number of small pages needed for the egress queue. */
230#define EQ_ORDER get_order(sizeof(lepp_queue_t))
231/* Size of the egress queue's pages. */
232#define EQ_SIZE (1 << (PAGE_SHIFT + EQ_ORDER))
231 233
232/* 234/*
233 * The actual devices (xgbe0, xgbe1, gbe0, gbe1). 235 * The actual devices (xgbe0, xgbe1, gbe0, gbe1).
@@ -284,7 +286,11 @@ static void net_printk(char *fmt, ...)
284 */ 286 */
285static void dump_packet(unsigned char *data, unsigned long length, char *s) 287static void dump_packet(unsigned char *data, unsigned long length, char *s)
286{ 288{
289 int my_cpu = smp_processor_id();
290
287 unsigned long i; 291 unsigned long i;
292 char buf[128];
293
288 static unsigned int count; 294 static unsigned int count;
289 295
290 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", 296 pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n",
@@ -294,10 +300,12 @@ static void dump_packet(unsigned char *data, unsigned long length, char *s)
294 300
295 for (i = 0; i < length; i++) { 301 for (i = 0; i < length; i++) {
296 if ((i & 0xf) == 0) 302 if ((i & 0xf) == 0)
297 sprintf(buf, "%8.8lx:", i); 303 sprintf(buf, "[%02d] %8.8lx:", my_cpu, i);
298 sprintf(buf + strlen(buf), " %2.2x", data[i]); 304 sprintf(buf + strlen(buf), " %2.2x", data[i]);
299 if ((i & 0xf) == 0xf || i == length - 1) 305 if ((i & 0xf) == 0xf || i == length - 1) {
300 pr_info("%s\n", buf); 306 strcat(buf, "\n");
307 pr_info("%s", buf);
308 }
301 } 309 }
302} 310}
303#endif 311#endif
@@ -351,60 +359,109 @@ static void tile_net_provide_linux_buffer(struct tile_net_cpu *info,
351 359
352/* 360/*
353 * Provide a linux buffer for LIPP. 361 * Provide a linux buffer for LIPP.
362 *
363 * Note that the ACTUAL allocation for each buffer is a "struct sk_buff",
364 * plus a chunk of memory that includes not only the requested bytes, but
365 * also NET_SKB_PAD bytes of initial padding, and a "struct skb_shared_info".
366 *
367 * Note that "struct skb_shared_info" is 88 bytes with 64K pages and
368 * 268 bytes with 4K pages (since the frags[] array needs 18 entries).
369 *
370 * Without jumbo packets, the maximum packet size will be 1536 bytes,
371 * and we use 2 bytes (NET_IP_ALIGN) of padding. ISSUE: If we told
372 * the hardware to clip at 1518 bytes instead of 1536 bytes, then we
373 * could save an entire cache line, but in practice, we don't need it.
374 *
375 * Since CPAs are 38 bits, and we can only encode the high 31 bits in
376 * a "linux_buffer_t", the low 7 bits must be zero, and thus, we must
377 * align the actual "va" mod 128.
378 *
379 * We assume that the underlying "head" will be aligned mod 64. Note
380 * that in practice, we have seen "head" NOT aligned mod 128 even when
381 * using 2048 byte allocations, which is surprising.
382 *
383 * If "head" WAS always aligned mod 128, we could change LIPP to
384 * assume that the low SIX bits are zero, and the 7th bit is one, that
385 * is, align the actual "va" mod 128 plus 64, which would be "free".
386 *
387 * For now, the actual "head" pointer points at NET_SKB_PAD bytes of
388 * padding, plus 28 or 92 bytes of extra padding, plus the sk_buff
389 * pointer, plus the NET_IP_ALIGN padding, plus 126 or 1536 bytes for
390 * the actual packet, plus 62 bytes of empty padding, plus some
391 * padding and the "struct skb_shared_info".
392 *
393 * With 64K pages, a large buffer thus needs 32+92+4+2+1536+62+88
394 * bytes, or 1816 bytes, which fits comfortably into 2048 bytes.
395 *
396 * With 64K pages, a small buffer thus needs 32+92+4+2+126+88
397 * bytes, or 344 bytes, which means we are wasting 64+ bytes, and
398 * could presumably increase the size of small buffers.
399 *
400 * With 4K pages, a large buffer thus needs 32+92+4+2+1536+62+268
401 * bytes, or 1996 bytes, which fits comfortably into 2048 bytes.
402 *
403 * With 4K pages, a small buffer thus needs 32+92+4+2+126+268
404 * bytes, or 524 bytes, which is annoyingly wasteful.
405 *
406 * Maybe we should increase LIPP_SMALL_PACKET_SIZE to 192?
407 *
408 * ISSUE: Maybe we should increase "NET_SKB_PAD" to 64?
354 */ 409 */
355static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, 410static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
356 bool small) 411 bool small)
357{ 412{
358 /* ISSUE: What should we use here? */ 413#if TILE_NET_MTU <= 1536
414 /* Without "jumbo", 2 + 1536 should be sufficient. */
415 unsigned int large_size = NET_IP_ALIGN + 1536;
416#else
417 /* ISSUE: This has not been tested. */
359 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; 418 unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100;
419#endif
360 420
361 /* Round up to ensure to avoid "false sharing" with last cache line. */ 421 /* Avoid "false sharing" with last cache line. */
362 unsigned int buffer_size = 422 /* ISSUE: This is already done by "dev_alloc_skb()". */
423 unsigned int len =
363 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + 424 (((small ? LIPP_SMALL_PACKET_SIZE : large_size) +
364 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); 425 CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE());
365 426
366 /* 427 unsigned int padding = 128 - NET_SKB_PAD;
367 * ISSUE: Since CPAs are 38 bits, and we can only encode the 428 unsigned int align;
368 * high 31 bits in a "linux_buffer_t", the low 7 bits must be
369 * zero, and thus, we must align the actual "va" mod 128.
370 */
371 const unsigned long align = 128;
372 429
373 struct sk_buff *skb; 430 struct sk_buff *skb;
374 void *va; 431 void *va;
375 432
376 struct sk_buff **skb_ptr; 433 struct sk_buff **skb_ptr;
377 434
378 /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ 435 /* Request 96 extra bytes for alignment purposes. */
379 /* and also "reserves" that many bytes. */ 436 skb = dev_alloc_skb(len + padding);
380 /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ 437 if (skb == NULL)
381 int len = sizeof(*skb_ptr) + align + buffer_size; 438 return false;
382
383 while (1) {
384
385 /* Allocate (or fail). */
386 skb = dev_alloc_skb(len);
387 if (skb == NULL)
388 return false;
389
390 /* Make room for a back-pointer to 'skb'. */
391 skb_reserve(skb, sizeof(*skb_ptr));
392 439
393 /* Make sure we are aligned. */ 440 /* Skip 32 or 96 bytes to align "data" mod 128. */
394 skb_reserve(skb, -(long)skb->data & (align - 1)); 441 align = -(long)skb->data & (128 - 1);
442 BUG_ON(align > padding);
443 skb_reserve(skb, align);
395 444
396 /* This address is given to IPP. */ 445 /* This address is given to IPP. */
397 va = skb->data; 446 va = skb->data;
398 447
399 if (small) 448 /* Buffers must not span a huge page. */
400 break; 449 BUG_ON(((((long)va & ~HPAGE_MASK) + len) & HPAGE_MASK) != 0);
401 450
402 /* ISSUE: This has never been observed! */ 451#ifdef TILE_NET_PARANOIA
403 /* Large buffers must not span a huge page. */ 452#if CHIP_HAS_CBOX_HOME_MAP()
404 if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) 453 if (hash_default) {
405 break; 454 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
406 pr_err("Leaking unaligned linux buffer at %p.\n", va); 455 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
456 panic("Non-HFH ingress buffer! VA=%p Mode=%d PTE=%llx",
457 va, hv_pte_get_mode(pte), hv_pte_val(pte));
407 } 458 }
459#endif
460#endif
461
462 /* Invalidate the packet buffer. */
463 if (!hash_default)
464 __inv_buffer(va, len);
408 465
409 /* Skip two bytes to satisfy LIPP assumptions. */ 466 /* Skip two bytes to satisfy LIPP assumptions. */
410 /* Note that this aligns IP on a 16 byte boundary. */ 467 /* Note that this aligns IP on a 16 byte boundary. */
@@ -415,23 +472,9 @@ static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info,
415 skb_ptr = va - sizeof(*skb_ptr); 472 skb_ptr = va - sizeof(*skb_ptr);
416 *skb_ptr = skb; 473 *skb_ptr = skb;
417 474
418 /* Invalidate the packet buffer. */
419 if (!hash_default)
420 __inv_buffer(skb->data, buffer_size);
421
422 /* Make sure "skb_ptr" has been flushed. */ 475 /* Make sure "skb_ptr" has been flushed. */
423 __insn_mf(); 476 __insn_mf();
424 477
425#ifdef TILE_NET_PARANOIA
426#if CHIP_HAS_CBOX_HOME_MAP()
427 if (hash_default) {
428 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va);
429 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
430 panic("Non-coherent ingress buffer!");
431 }
432#endif
433#endif
434
435 /* Provide the new buffer. */ 478 /* Provide the new buffer. */
436 tile_net_provide_linux_buffer(info, va, small); 479 tile_net_provide_linux_buffer(info, va, small);
437 480
@@ -469,48 +512,64 @@ oops:
469 * Grab some LEPP completions, and store them in "comps", of size 512 * Grab some LEPP completions, and store them in "comps", of size
470 * "comps_size", and return the number of completions which were 513 * "comps_size", and return the number of completions which were
471 * stored, so the caller can free them. 514 * stored, so the caller can free them.
472 *
473 * If "pending" is not NULL, it will be set to true if there might
474 * still be some pending completions caused by this tile, else false.
475 */ 515 */
476static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, 516static unsigned int tile_net_lepp_grab_comps(lepp_queue_t *eq,
477 struct sk_buff *comps[], 517 struct sk_buff *comps[],
478 unsigned int comps_size, 518 unsigned int comps_size,
479 bool *pending) 519 unsigned int min_size)
480{ 520{
481 struct tile_net_priv *priv = netdev_priv(dev);
482
483 lepp_queue_t *eq = priv->epp_queue;
484
485 unsigned int n = 0; 521 unsigned int n = 0;
486 522
487 unsigned int comp_head; 523 unsigned int comp_head = eq->comp_head;
488 unsigned int comp_busy; 524 unsigned int comp_busy = eq->comp_busy;
489 unsigned int comp_tail;
490
491 spin_lock(&priv->comp_lock);
492
493 comp_head = eq->comp_head;
494 comp_busy = eq->comp_busy;
495 comp_tail = eq->comp_tail;
496 525
497 while (comp_head != comp_busy && n < comps_size) { 526 while (comp_head != comp_busy && n < comps_size) {
498 comps[n++] = eq->comps[comp_head]; 527 comps[n++] = eq->comps[comp_head];
499 LEPP_QINC(comp_head); 528 LEPP_QINC(comp_head);
500 } 529 }
501 530
502 if (pending != NULL) 531 if (n < min_size)
503 *pending = (comp_head != comp_tail); 532 return 0;
504 533
505 eq->comp_head = comp_head; 534 eq->comp_head = comp_head;
506 535
507 spin_unlock(&priv->comp_lock);
508
509 return n; 536 return n;
510} 537}
511 538
512 539
513/* 540/*
541 * Free some comps, and return true iff there are still some pending.
542 */
543static bool tile_net_lepp_free_comps(struct net_device *dev, bool all)
544{
545 struct tile_net_priv *priv = netdev_priv(dev);
546
547 lepp_queue_t *eq = priv->eq;
548
549 struct sk_buff *olds[64];
550 unsigned int wanted = 64;
551 unsigned int i, n;
552 bool pending;
553
554 spin_lock(&priv->eq_lock);
555
556 if (all)
557 eq->comp_busy = eq->comp_tail;
558
559 n = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
560
561 pending = (eq->comp_head != eq->comp_tail);
562
563 spin_unlock(&priv->eq_lock);
564
565 for (i = 0; i < n; i++)
566 kfree_skb(olds[i]);
567
568 return pending;
569}
570
571
572/*
514 * Make sure the egress timer is scheduled. 573 * Make sure the egress timer is scheduled.
515 * 574 *
516 * Note that we use "schedule if not scheduled" logic instead of the more 575 * Note that we use "schedule if not scheduled" logic instead of the more
@@ -544,21 +603,11 @@ static void tile_net_handle_egress_timer(unsigned long arg)
544 struct tile_net_cpu *info = (struct tile_net_cpu *)arg; 603 struct tile_net_cpu *info = (struct tile_net_cpu *)arg;
545 struct net_device *dev = info->napi.dev; 604 struct net_device *dev = info->napi.dev;
546 605
547 struct sk_buff *olds[32];
548 unsigned int wanted = 32;
549 unsigned int i, nolds = 0;
550 bool pending;
551
552 /* The timer is no longer scheduled. */ 606 /* The timer is no longer scheduled. */
553 info->egress_timer_scheduled = false; 607 info->egress_timer_scheduled = false;
554 608
555 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); 609 /* Free comps, and reschedule timer if more are pending. */
556 610 if (tile_net_lepp_free_comps(dev, false))
557 for (i = 0; i < nolds; i++)
558 kfree_skb(olds[i]);
559
560 /* Reschedule timer if needed. */
561 if (pending)
562 tile_net_schedule_egress_timer(info); 611 tile_net_schedule_egress_timer(info);
563} 612}
564 613
@@ -636,8 +685,39 @@ static bool is_dup_ack(char *s1, char *s2, unsigned int len)
636 685
637 686
638 687
688static void tile_net_discard_aux(struct tile_net_cpu *info, int index)
689{
690 struct tile_netio_queue *queue = &info->queue;
691 netio_queue_impl_t *qsp = queue->__system_part;
692 netio_queue_user_impl_t *qup = &queue->__user_part;
693
694 int index2_aux = index + sizeof(netio_pkt_t);
695 int index2 =
696 ((index2_aux ==
697 qsp->__packet_receive_queue.__last_packet_plus_one) ?
698 0 : index2_aux);
699
700 netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index);
701
702 /* Extract the "linux_buffer_t". */
703 unsigned int buffer = pkt->__packet.word;
704
705 /* Convert "linux_buffer_t" to "va". */
706 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
707
708 /* Acquire the associated "skb". */
709 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
710 struct sk_buff *skb = *skb_ptr;
711
712 kfree_skb(skb);
713
714 /* Consume this packet. */
715 qup->__packet_receive_read = index2;
716}
717
718
639/* 719/*
640 * Like "tile_net_handle_packets()", but just discard packets. 720 * Like "tile_net_poll()", but just discard packets.
641 */ 721 */
642static void tile_net_discard_packets(struct net_device *dev) 722static void tile_net_discard_packets(struct net_device *dev)
643{ 723{
@@ -650,32 +730,8 @@ static void tile_net_discard_packets(struct net_device *dev)
650 730
651 while (qup->__packet_receive_read != 731 while (qup->__packet_receive_read !=
652 qsp->__packet_receive_queue.__packet_write) { 732 qsp->__packet_receive_queue.__packet_write) {
653
654 int index = qup->__packet_receive_read; 733 int index = qup->__packet_receive_read;
655 734 tile_net_discard_aux(info, index);
656 int index2_aux = index + sizeof(netio_pkt_t);
657 int index2 =
658 ((index2_aux ==
659 qsp->__packet_receive_queue.__last_packet_plus_one) ?
660 0 : index2_aux);
661
662 netio_pkt_t *pkt = (netio_pkt_t *)
663 ((unsigned long) &qsp[1] + index);
664
665 /* Extract the "linux_buffer_t". */
666 unsigned int buffer = pkt->__packet.word;
667
668 /* Convert "linux_buffer_t" to "va". */
669 void *va = __va((phys_addr_t)(buffer >> 1) << 7);
670
671 /* Acquire the associated "skb". */
672 struct sk_buff **skb_ptr = va - sizeof(*skb_ptr);
673 struct sk_buff *skb = *skb_ptr;
674
675 kfree_skb(skb);
676
677 /* Consume this packet. */
678 qup->__packet_receive_read = index2;
679 } 735 }
680} 736}
681 737
@@ -704,7 +760,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
704 760
705 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); 761 netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt);
706 762
707 /* Extract the packet size. */ 763 /* Extract the packet size. FIXME: Shouldn't the second line */
764 /* get subtracted? Mostly moot, since it should be "zero". */
708 unsigned long len = 765 unsigned long len =
709 (NETIO_PKT_CUSTOM_LENGTH(pkt) + 766 (NETIO_PKT_CUSTOM_LENGTH(pkt) +
710 NET_IP_ALIGN - NETIO_PACKET_PADDING); 767 NET_IP_ALIGN - NETIO_PACKET_PADDING);
@@ -722,15 +779,6 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
722 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ 779 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
723 unsigned char *buf = va + NET_IP_ALIGN; 780 unsigned char *buf = va + NET_IP_ALIGN;
724 781
725#ifdef IGNORE_DUP_ACKS
726
727 static int other;
728 static int final;
729 static int keep;
730 static int skip;
731
732#endif
733
734 /* Invalidate the packet buffer. */ 782 /* Invalidate the packet buffer. */
735 if (!hash_default) 783 if (!hash_default)
736 __inv_buffer(buf, len); 784 __inv_buffer(buf, len);
@@ -745,16 +793,8 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
745#ifdef TILE_NET_VERIFY_INGRESS 793#ifdef TILE_NET_VERIFY_INGRESS
746 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && 794 if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) &&
747 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { 795 NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) {
748 /* 796 /* Bug 6624: Includes UDP packets with a "zero" checksum. */
749 * FIXME: This complains about UDP packets
750 * with a "zero" checksum (bug 6624).
751 */
752#ifdef TILE_NET_PANIC_ON_BAD
753 dump_packet(buf, len, "rx");
754 panic("Bad L4 checksum.");
755#else
756 pr_warning("Bad L4 checksum on %d byte packet.\n", len); 797 pr_warning("Bad L4 checksum on %d byte packet.\n", len);
757#endif
758 } 798 }
759 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && 799 if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) &&
760 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { 800 NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) {
@@ -769,90 +809,29 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
769 } 809 }
770 break; 810 break;
771 case NETIO_PKT_STATUS_BAD: 811 case NETIO_PKT_STATUS_BAD:
772#ifdef TILE_NET_PANIC_ON_BAD 812 pr_warning("Unexpected BAD %ld byte packet.\n", len);
773 dump_packet(buf, len, "rx");
774 panic("Unexpected BAD packet.");
775#else
776 pr_warning("Unexpected BAD %d byte packet.\n", len);
777#endif
778 } 813 }
779#endif 814#endif
780 815
781 filter = 0; 816 filter = 0;
782 817
818 /* ISSUE: Filter TCP packets with "bad" checksums? */
819
783 if (!(dev->flags & IFF_UP)) { 820 if (!(dev->flags & IFF_UP)) {
784 /* Filter packets received before we're up. */ 821 /* Filter packets received before we're up. */
785 filter = 1; 822 filter = 1;
823 } else if (NETIO_PKT_STATUS_M(metadata, pkt) == NETIO_PKT_STATUS_BAD) {
824 /* Filter "truncated" packets. */
825 filter = 1;
786 } else if (!(dev->flags & IFF_PROMISC)) { 826 } else if (!(dev->flags & IFF_PROMISC)) {
787 /* 827 /* FIXME: Implement HW multicast filter. */
788 * FIXME: Implement HW multicast filter. 828 if (!is_multicast_ether_addr(buf)) {
789 */
790 if (is_unicast_ether_addr(buf)) {
791 /* Filter packets not for our address. */ 829 /* Filter packets not for our address. */
792 const u8 *mine = dev->dev_addr; 830 const u8 *mine = dev->dev_addr;
793 filter = compare_ether_addr(mine, buf); 831 filter = compare_ether_addr(mine, buf);
794 } 832 }
795 } 833 }
796 834
797#ifdef IGNORE_DUP_ACKS
798
799 if (len != 66) {
800 /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */
801
802 other++;
803
804 } else if (index2 ==
805 qsp->__packet_receive_queue.__packet_write) {
806
807 final++;
808
809 } else {
810
811 netio_pkt_t *pkt2 = (netio_pkt_t *)
812 ((unsigned long) &qsp[1] + index2);
813
814 netio_pkt_metadata_t *metadata2 =
815 NETIO_PKT_METADATA(pkt2);
816
817 /* Extract the packet size. */
818 unsigned long len2 =
819 (NETIO_PKT_CUSTOM_LENGTH(pkt2) +
820 NET_IP_ALIGN - NETIO_PACKET_PADDING);
821
822 if (len2 == 66 &&
823 NETIO_PKT_FLOW_HASH_M(metadata, pkt) ==
824 NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) {
825
826 /* Extract the "linux_buffer_t". */
827 unsigned int buffer2 = pkt2->__packet.word;
828
829 /* Convert "linux_buffer_t" to "va". */
830 void *va2 =
831 __va((phys_addr_t)(buffer2 >> 1) << 7);
832
833 /* Extract the packet data pointer. */
834 /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */
835 unsigned char *buf2 = va2 + NET_IP_ALIGN;
836
837 /* Invalidate the packet buffer. */
838 if (!hash_default)
839 __inv_buffer(buf2, len2);
840
841 if (is_dup_ack(buf, buf2, len)) {
842 skip++;
843 filter = 1;
844 } else {
845 keep++;
846 }
847 }
848 }
849
850 if (net_ratelimit())
851 pr_info("Other %d Final %d Keep %d Skip %d.\n",
852 other, final, keep, skip);
853
854#endif
855
856 if (filter) { 835 if (filter) {
857 836
858 /* ISSUE: Update "drop" statistics? */ 837 /* ISSUE: Update "drop" statistics? */
@@ -877,10 +856,7 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
877 /* NOTE: This call also sets "skb->dev = dev". */ 856 /* NOTE: This call also sets "skb->dev = dev". */
878 skb->protocol = eth_type_trans(skb, dev); 857 skb->protocol = eth_type_trans(skb, dev);
879 858
880 /* ISSUE: Discard corrupt packets? */ 859 /* Avoid recomputing "good" TCP/UDP checksums. */
881 /* ISSUE: Discard packets with bad checksums? */
882
883 /* Avoid recomputing TCP/UDP checksums. */
884 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) 860 if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt))
885 skb->ip_summed = CHECKSUM_UNNECESSARY; 861 skb->ip_summed = CHECKSUM_UNNECESSARY;
886 862
@@ -912,9 +888,14 @@ static bool tile_net_poll_aux(struct tile_net_cpu *info, int index)
912/* 888/*
913 * Handle some packets for the given device on the current CPU. 889 * Handle some packets for the given device on the current CPU.
914 * 890 *
915 * ISSUE: The "rotting packet" race condition occurs if a packet 891 * If "tile_net_stop()" is called on some other tile while this
916 * arrives after the queue appears to be empty, and before the 892 * function is running, we will return, hopefully before that
917 * hypervisor interrupt is re-enabled. 893 * other tile asks us to call "napi_disable()".
894 *
895 * The "rotting packet" race condition occurs if a packet arrives
896 * during the extremely narrow window between the queue appearing to
897 * be empty, and the ingress interrupt being re-enabled. This happens
898 * a LOT under heavy network load.
918 */ 899 */
919static int tile_net_poll(struct napi_struct *napi, int budget) 900static int tile_net_poll(struct napi_struct *napi, int budget)
920{ 901{
@@ -928,7 +909,7 @@ static int tile_net_poll(struct napi_struct *napi, int budget)
928 909
929 unsigned int work = 0; 910 unsigned int work = 0;
930 911
931 while (1) { 912 while (priv->active) {
932 int index = qup->__packet_receive_read; 913 int index = qup->__packet_receive_read;
933 if (index == qsp->__packet_receive_queue.__packet_write) 914 if (index == qsp->__packet_receive_queue.__packet_write)
934 break; 915 break;
@@ -941,19 +922,24 @@ static int tile_net_poll(struct napi_struct *napi, int budget)
941 922
942 napi_complete(&info->napi); 923 napi_complete(&info->napi);
943 924
944 /* Re-enable hypervisor interrupts. */ 925 if (!priv->active)
926 goto done;
927
928 /* Re-enable the ingress interrupt. */
945 enable_percpu_irq(priv->intr_id); 929 enable_percpu_irq(priv->intr_id);
946 930
947 /* HACK: Avoid the "rotting packet" problem. */ 931 /* HACK: Avoid the "rotting packet" problem (see above). */
948 if (qup->__packet_receive_read != 932 if (qup->__packet_receive_read !=
949 qsp->__packet_receive_queue.__packet_write) 933 qsp->__packet_receive_queue.__packet_write) {
950 napi_schedule(&info->napi); 934 /* ISSUE: Sometimes this returns zero, presumably */
951 935 /* because an interrupt was handled for this tile. */
952 /* ISSUE: Handle completions? */ 936 (void)napi_reschedule(&info->napi);
937 }
953 938
954done: 939done:
955 940
956 tile_net_provide_needed_buffers(info); 941 if (priv->active)
942 tile_net_provide_needed_buffers(info);
957 943
958 return work; 944 return work;
959} 945}
@@ -961,6 +947,12 @@ done:
961 947
962/* 948/*
963 * Handle an ingress interrupt for the given device on the current cpu. 949 * Handle an ingress interrupt for the given device on the current cpu.
950 *
951 * ISSUE: Sometimes this gets called after "disable_percpu_irq()" has
952 * been called! This is probably due to "pending hypervisor downcalls".
953 *
954 * ISSUE: Is there any race condition between the "napi_schedule()" here
955 * and the "napi_complete()" call above?
964 */ 956 */
965static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) 957static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
966{ 958{
@@ -969,9 +961,15 @@ static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr)
969 int my_cpu = smp_processor_id(); 961 int my_cpu = smp_processor_id();
970 struct tile_net_cpu *info = priv->cpu[my_cpu]; 962 struct tile_net_cpu *info = priv->cpu[my_cpu];
971 963
972 /* Disable hypervisor interrupt. */ 964 /* Disable the ingress interrupt. */
973 disable_percpu_irq(priv->intr_id); 965 disable_percpu_irq(priv->intr_id);
974 966
967 /* Ignore unwanted interrupts. */
968 if (!priv->active)
969 return IRQ_HANDLED;
970
971 /* ISSUE: Sometimes "info->napi_enabled" is false here. */
972
975 napi_schedule(&info->napi); 973 napi_schedule(&info->napi);
976 974
977 return IRQ_HANDLED; 975 return IRQ_HANDLED;
@@ -1005,8 +1003,7 @@ static int tile_net_open_aux(struct net_device *dev)
1005 */ 1003 */
1006 { 1004 {
1007 int epp_home = hv_lotar_to_cpu(epp_lotar); 1005 int epp_home = hv_lotar_to_cpu(epp_lotar);
1008 struct page *page = virt_to_page(priv->epp_queue); 1006 homecache_change_page_home(priv->eq_pages, EQ_ORDER, epp_home);
1009 homecache_change_page_home(page, 0, epp_home);
1010 } 1007 }
1011 1008
1012 /* 1009 /*
@@ -1015,9 +1012,9 @@ static int tile_net_open_aux(struct net_device *dev)
1015 { 1012 {
1016 netio_ipp_address_t ea = { 1013 netio_ipp_address_t ea = {
1017 .va = 0, 1014 .va = 0,
1018 .pa = __pa(priv->epp_queue), 1015 .pa = __pa(priv->eq),
1019 .pte = hv_pte(0), 1016 .pte = hv_pte(0),
1020 .size = PAGE_SIZE, 1017 .size = EQ_SIZE,
1021 }; 1018 };
1022 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); 1019 ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar);
1023 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); 1020 ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3);
@@ -1043,7 +1040,7 @@ static int tile_net_open_aux(struct net_device *dev)
1043 1040
1044 1041
1045/* 1042/*
1046 * Register with hypervisor on each CPU. 1043 * Register with hypervisor on the current CPU.
1047 * 1044 *
1048 * Strangely, this function does important things even if it "fails", 1045 * Strangely, this function does important things even if it "fails",
1049 * which is especially common if the link is not up yet. Hopefully 1046 * which is especially common if the link is not up yet. Hopefully
@@ -1092,7 +1089,8 @@ static void tile_net_register(void *dev_ptr)
1092 priv->cpu[my_cpu] = info; 1089 priv->cpu[my_cpu] = info;
1093 1090
1094 /* 1091 /*
1095 * Register ourselves with the IPP. 1092 * Register ourselves with LIPP. This does a lot of stuff,
1093 * including invoking the LIPP registration code.
1096 */ 1094 */
1097 ret = hv_dev_pwrite(priv->hv_devhdl, 0, 1095 ret = hv_dev_pwrite(priv->hv_devhdl, 0,
1098 (HV_VirtAddr)&config, 1096 (HV_VirtAddr)&config,
@@ -1101,8 +1099,11 @@ static void tile_net_register(void *dev_ptr)
1101 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", 1099 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n",
1102 ret); 1100 ret);
1103 if (ret < 0) { 1101 if (ret < 0) {
1104 printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" 1102 if (ret != NETIO_LINK_DOWN) {
1105 " failure %d\n", ret); 1103 printk(KERN_DEBUG "hv_dev_pwrite "
1104 "NETIO_IPP_INPUT_REGISTER_OFF failure %d\n",
1105 ret);
1106 }
1106 info->link_down = (ret == NETIO_LINK_DOWN); 1107 info->link_down = (ret == NETIO_LINK_DOWN);
1107 return; 1108 return;
1108 } 1109 }
@@ -1145,15 +1146,47 @@ static void tile_net_register(void *dev_ptr)
1145 NETIO_IPP_GET_FASTIO_OFF); 1146 NETIO_IPP_GET_FASTIO_OFF);
1146 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); 1147 PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret);
1147 1148
1148 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1149
1150 /* Now we are registered. */ 1149 /* Now we are registered. */
1151 info->registered = true; 1150 info->registered = true;
1152} 1151}
1153 1152
1154 1153
1155/* 1154/*
1156 * Unregister with hypervisor on each CPU. 1155 * Deregister with hypervisor on the current CPU.
1156 *
1157 * This simply discards all our credits, so no more packets will be
1158 * delivered to this tile. There may still be packets in our queue.
1159 *
1160 * Also, disable the ingress interrupt.
1161 */
1162static void tile_net_deregister(void *dev_ptr)
1163{
1164 struct net_device *dev = (struct net_device *)dev_ptr;
1165 struct tile_net_priv *priv = netdev_priv(dev);
1166 int my_cpu = smp_processor_id();
1167 struct tile_net_cpu *info = priv->cpu[my_cpu];
1168
1169 /* Disable the ingress interrupt. */
1170 disable_percpu_irq(priv->intr_id);
1171
1172 /* Do nothing else if not registered. */
1173 if (info == NULL || !info->registered)
1174 return;
1175
1176 {
1177 struct tile_netio_queue *queue = &info->queue;
1178 netio_queue_user_impl_t *qup = &queue->__user_part;
1179
1180 /* Discard all our credits. */
1181 __netio_fastio_return_credits(qup->__fastio_index, -1);
1182 }
1183}
1184
1185
1186/*
1187 * Unregister with hypervisor on the current CPU.
1188 *
1189 * Also, disable the ingress interrupt.
1157 */ 1190 */
1158static void tile_net_unregister(void *dev_ptr) 1191static void tile_net_unregister(void *dev_ptr)
1159{ 1192{
@@ -1162,35 +1195,23 @@ static void tile_net_unregister(void *dev_ptr)
1162 int my_cpu = smp_processor_id(); 1195 int my_cpu = smp_processor_id();
1163 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1196 struct tile_net_cpu *info = priv->cpu[my_cpu];
1164 1197
1165 int ret = 0; 1198 int ret;
1166 int dummy = 0; 1199 int dummy = 0;
1167 1200
1168 /* Do nothing if never registered. */ 1201 /* Disable the ingress interrupt. */
1169 if (info == NULL) 1202 disable_percpu_irq(priv->intr_id);
1170 return;
1171 1203
1172 /* Do nothing if already unregistered. */ 1204 /* Do nothing else if not registered. */
1173 if (!info->registered) 1205 if (info == NULL || !info->registered)
1174 return; 1206 return;
1175 1207
1176 /* 1208 /* Unregister ourselves with LIPP/LEPP. */
1177 * Unregister ourselves with LIPP.
1178 */
1179 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1209 ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1180 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); 1210 sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF);
1181 PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", 1211 if (ret < 0)
1182 ret); 1212 panic("Failed to unregister with LIPP/LEPP!\n");
1183 if (ret < 0) {
1184 /* FIXME: Just panic? */
1185 pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF"
1186 " failure %d\n", ret);
1187 }
1188 1213
1189 /* 1214 /* Discard all packets still in our NetIO queue. */
1190 * Discard all packets still in our NetIO queue. Hopefully,
1191 * once the unregister call is complete, there will be no
1192 * packets still in flight on the IDN.
1193 */
1194 tile_net_discard_packets(dev); 1215 tile_net_discard_packets(dev);
1195 1216
1196 /* Reset state. */ 1217 /* Reset state. */
@@ -1200,11 +1221,6 @@ static void tile_net_unregister(void *dev_ptr)
1200 /* Cancel egress timer. */ 1221 /* Cancel egress timer. */
1201 del_timer(&info->egress_timer); 1222 del_timer(&info->egress_timer);
1202 info->egress_timer_scheduled = false; 1223 info->egress_timer_scheduled = false;
1203
1204 netif_napi_del(&info->napi);
1205
1206 /* Now we are unregistered. */
1207 info->registered = false;
1208} 1224}
1209 1225
1210 1226
@@ -1212,18 +1228,28 @@ static void tile_net_unregister(void *dev_ptr)
1212 * Helper function for "tile_net_stop()". 1228 * Helper function for "tile_net_stop()".
1213 * 1229 *
1214 * Also used to handle registration failure in "tile_net_open_inner()", 1230 * Also used to handle registration failure in "tile_net_open_inner()",
1215 * when "fully_opened" is known to be false, and the various extra 1231 * when the various extra steps in "tile_net_stop()" are not necessary.
1216 * steps in "tile_net_stop()" are not necessary. ISSUE: It might be
1217 * simpler if we could just call "tile_net_stop()" anyway.
1218 */ 1232 */
1219static void tile_net_stop_aux(struct net_device *dev) 1233static void tile_net_stop_aux(struct net_device *dev)
1220{ 1234{
1221 struct tile_net_priv *priv = netdev_priv(dev); 1235 struct tile_net_priv *priv = netdev_priv(dev);
1236 int i;
1222 1237
1223 int dummy = 0; 1238 int dummy = 0;
1224 1239
1225 /* Unregister all tiles, so LIPP will stop delivering packets. */ 1240 /*
1241 * Unregister all tiles, so LIPP will stop delivering packets.
1242 * Also, delete all the "napi" objects (sequentially, to protect
1243 * "dev->napi_list").
1244 */
1226 on_each_cpu(tile_net_unregister, (void *)dev, 1); 1245 on_each_cpu(tile_net_unregister, (void *)dev, 1);
1246 for_each_online_cpu(i) {
1247 struct tile_net_cpu *info = priv->cpu[i];
1248 if (info != NULL && info->registered) {
1249 netif_napi_del(&info->napi);
1250 info->registered = false;
1251 }
1252 }
1227 1253
1228 /* Stop LIPP/LEPP. */ 1254 /* Stop LIPP/LEPP. */
1229 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1255 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
@@ -1235,18 +1261,15 @@ static void tile_net_stop_aux(struct net_device *dev)
1235 1261
1236 1262
1237/* 1263/*
1238 * Disable ingress interrupts for the given device on the current cpu. 1264 * Disable NAPI for the given device on the current cpu.
1239 */ 1265 */
1240static void tile_net_disable_intr(void *dev_ptr) 1266static void tile_net_stop_disable(void *dev_ptr)
1241{ 1267{
1242 struct net_device *dev = (struct net_device *)dev_ptr; 1268 struct net_device *dev = (struct net_device *)dev_ptr;
1243 struct tile_net_priv *priv = netdev_priv(dev); 1269 struct tile_net_priv *priv = netdev_priv(dev);
1244 int my_cpu = smp_processor_id(); 1270 int my_cpu = smp_processor_id();
1245 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1271 struct tile_net_cpu *info = priv->cpu[my_cpu];
1246 1272
1247 /* Disable hypervisor interrupt. */
1248 disable_percpu_irq(priv->intr_id);
1249
1250 /* Disable NAPI if needed. */ 1273 /* Disable NAPI if needed. */
1251 if (info != NULL && info->napi_enabled) { 1274 if (info != NULL && info->napi_enabled) {
1252 napi_disable(&info->napi); 1275 napi_disable(&info->napi);
@@ -1256,21 +1279,24 @@ static void tile_net_disable_intr(void *dev_ptr)
1256 1279
1257 1280
1258/* 1281/*
1259 * Enable ingress interrupts for the given device on the current cpu. 1282 * Enable NAPI and the ingress interrupt for the given device
1283 * on the current cpu.
1284 *
1285 * ISSUE: Only do this for "network cpus"?
1260 */ 1286 */
1261static void tile_net_enable_intr(void *dev_ptr) 1287static void tile_net_open_enable(void *dev_ptr)
1262{ 1288{
1263 struct net_device *dev = (struct net_device *)dev_ptr; 1289 struct net_device *dev = (struct net_device *)dev_ptr;
1264 struct tile_net_priv *priv = netdev_priv(dev); 1290 struct tile_net_priv *priv = netdev_priv(dev);
1265 int my_cpu = smp_processor_id(); 1291 int my_cpu = smp_processor_id();
1266 struct tile_net_cpu *info = priv->cpu[my_cpu]; 1292 struct tile_net_cpu *info = priv->cpu[my_cpu];
1267 1293
1268 /* Enable hypervisor interrupt. */
1269 enable_percpu_irq(priv->intr_id);
1270
1271 /* Enable NAPI. */ 1294 /* Enable NAPI. */
1272 napi_enable(&info->napi); 1295 napi_enable(&info->napi);
1273 info->napi_enabled = true; 1296 info->napi_enabled = true;
1297
1298 /* Enable the ingress interrupt. */
1299 enable_percpu_irq(priv->intr_id);
1274} 1300}
1275 1301
1276 1302
@@ -1288,8 +1314,9 @@ static int tile_net_open_inner(struct net_device *dev)
1288 int my_cpu = smp_processor_id(); 1314 int my_cpu = smp_processor_id();
1289 struct tile_net_cpu *info; 1315 struct tile_net_cpu *info;
1290 struct tile_netio_queue *queue; 1316 struct tile_netio_queue *queue;
1291 unsigned int irq; 1317 int result = 0;
1292 int i; 1318 int i;
1319 int dummy = 0;
1293 1320
1294 /* 1321 /*
1295 * First try to register just on the local CPU, and handle any 1322 * First try to register just on the local CPU, and handle any
@@ -1307,42 +1334,52 @@ static int tile_net_open_inner(struct net_device *dev)
1307 /* 1334 /*
1308 * Now register everywhere else. If any registration fails, 1335 * Now register everywhere else. If any registration fails,
1309 * even for "link down" (which might not be possible), we 1336 * even for "link down" (which might not be possible), we
1310 * clean up using "tile_net_stop_aux()". 1337 * clean up using "tile_net_stop_aux()". Also, add all the
1338 * "napi" objects (sequentially, to protect "dev->napi_list").
1339 * ISSUE: Only use "netif_napi_add()" for "network cpus"?
1311 */ 1340 */
1312 smp_call_function(tile_net_register, (void *)dev, 1); 1341 smp_call_function(tile_net_register, (void *)dev, 1);
1313 for_each_online_cpu(i) { 1342 for_each_online_cpu(i) {
1314 if (!priv->cpu[i]->registered) { 1343 struct tile_net_cpu *info = priv->cpu[i];
1315 tile_net_stop_aux(dev); 1344 if (info->registered)
1316 return -EAGAIN; 1345 netif_napi_add(dev, &info->napi, tile_net_poll, 64);
1317 } 1346 else
1347 result = -EAGAIN;
1348 }
1349 if (result != 0) {
1350 tile_net_stop_aux(dev);
1351 return result;
1318 } 1352 }
1319 1353
1320 queue = &info->queue; 1354 queue = &info->queue;
1321 1355
1322 /* 1356 if (priv->intr_id == 0) {
1323 * Set the device intr bit mask. 1357 unsigned int irq;
1324 * The tile_net_register above sets per tile __intr_id.
1325 */
1326 priv->intr_id = queue->__system_part->__intr_id;
1327 BUG_ON(!priv->intr_id);
1328
1329 /*
1330 * Register the device interrupt handler.
1331 * The __ffs() function returns the index into the interrupt handler
1332 * table from the interrupt bit mask which should have one bit
1333 * and one bit only set.
1334 */
1335 irq = __ffs(priv->intr_id);
1336 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1337 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1338 0, dev->name, (void *)dev) != 0);
1339 1358
1340 /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ 1359 /*
1341 1360 * Acquire the irq allocated by the hypervisor. Every
1342 if (!priv->fully_opened) { 1361 * queue gets the same irq. The "__intr_id" field is
1362 * "1 << irq", so we use "__ffs()" to extract "irq".
1363 */
1364 priv->intr_id = queue->__system_part->__intr_id;
1365 BUG_ON(priv->intr_id == 0);
1366 irq = __ffs(priv->intr_id);
1343 1367
1344 int dummy = 0; 1368 /*
1369 * Register the ingress interrupt handler for this
1370 * device, permanently.
1371 *
1372 * We used to call "free_irq()" in "tile_net_stop()",
1373 * and then re-register the handler here every time,
1374 * but that caused DNP errors in "handle_IRQ_event()"
1375 * because "desc->action" was NULL. See bug 9143.
1376 */
1377 tile_irq_activate(irq, TILE_IRQ_PERCPU);
1378 BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt,
1379 0, dev->name, (void *)dev) != 0);
1380 }
1345 1381
1382 {
1346 /* Allocate initial buffers. */ 1383 /* Allocate initial buffers. */
1347 1384
1348 int max_buffers = 1385 int max_buffers =
@@ -1359,18 +1396,21 @@ static int tile_net_open_inner(struct net_device *dev)
1359 if (info->num_needed_small_buffers != 0 || 1396 if (info->num_needed_small_buffers != 0 ||
1360 info->num_needed_large_buffers != 0) 1397 info->num_needed_large_buffers != 0)
1361 panic("Insufficient memory for buffer stack!"); 1398 panic("Insufficient memory for buffer stack!");
1399 }
1362 1400
1363 /* Start LIPP/LEPP and activate "ingress" at the shim. */ 1401 /* We are about to be active. */
1364 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1402 priv->active = true;
1365 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1366 panic("Failed to activate the LIPP Shim!\n");
1367 1403
1368 priv->fully_opened = 1; 1404 /* Make sure "active" is visible to all tiles. */
1369 } 1405 mb();
1370 1406
1371 /* On each tile, enable the hypervisor to trigger interrupts. */ 1407 /* On each tile, enable NAPI and the ingress interrupt. */
1372 /* ISSUE: Do this before starting LIPP/LEPP? */ 1408 on_each_cpu(tile_net_open_enable, (void *)dev, 1);
1373 on_each_cpu(tile_net_enable_intr, (void *)dev, 1); 1409
1410 /* Start LIPP/LEPP and activate "ingress" at the shim. */
1411 if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy,
1412 sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0)
1413 panic("Failed to activate the LIPP Shim!\n");
1374 1414
1375 /* Start our transmit queue. */ 1415 /* Start our transmit queue. */
1376 netif_start_queue(dev); 1416 netif_start_queue(dev);
@@ -1396,9 +1436,9 @@ static void tile_net_open_retry(struct work_struct *w)
1396 * ourselves to try again later; otherwise, tell Linux we now have 1436 * ourselves to try again later; otherwise, tell Linux we now have
1397 * a working link. ISSUE: What if the return value is negative? 1437 * a working link. ISSUE: What if the return value is negative?
1398 */ 1438 */
1399 if (tile_net_open_inner(priv->dev)) 1439 if (tile_net_open_inner(priv->dev) != 0)
1400 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1440 schedule_delayed_work(&priv->retry_work,
1401 TILE_NET_RETRY_INTERVAL); 1441 TILE_NET_RETRY_INTERVAL);
1402 else 1442 else
1403 netif_carrier_on(priv->dev); 1443 netif_carrier_on(priv->dev);
1404} 1444}
@@ -1412,8 +1452,8 @@ static void tile_net_open_retry(struct work_struct *w)
1412 * The open entry point is called when a network interface is made 1452 * The open entry point is called when a network interface is made
1413 * active by the system (IFF_UP). At this point all resources needed 1453 * active by the system (IFF_UP). At this point all resources needed
1414 * for transmit and receive operations are allocated, the interrupt 1454 * for transmit and receive operations are allocated, the interrupt
1415 * handler is registered with the OS, the watchdog timer is started, 1455 * handler is registered with the OS (if needed), the watchdog timer
1416 * and the stack is notified that the interface is ready. 1456 * is started, and the stack is notified that the interface is ready.
1417 * 1457 *
1418 * If the actual link is not available yet, then we tell Linux that 1458 * If the actual link is not available yet, then we tell Linux that
1419 * we have no carrier, and we keep checking until the link comes up. 1459 * we have no carrier, and we keep checking until the link comes up.
@@ -1468,6 +1508,10 @@ static int tile_net_open(struct net_device *dev)
1468#endif 1508#endif
1469 1509
1470 priv->partly_opened = 1; 1510 priv->partly_opened = 1;
1511
1512 } else {
1513 /* FIXME: Is this possible? */
1514 /* printk("Already partly opened.\n"); */
1471 } 1515 }
1472 1516
1473 /* 1517 /*
@@ -1487,57 +1531,17 @@ static int tile_net_open(struct net_device *dev)
1487 * and then remember to try again later. 1531 * and then remember to try again later.
1488 */ 1532 */
1489 netif_carrier_off(dev); 1533 netif_carrier_off(dev);
1490 schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1534 schedule_delayed_work(&priv->retry_work, TILE_NET_RETRY_INTERVAL);
1491 TILE_NET_RETRY_INTERVAL);
1492 1535
1493 return 0; 1536 return 0;
1494} 1537}
1495 1538
1496 1539
1497/* 1540static int tile_net_drain_lipp_buffers(struct tile_net_priv *priv)
1498 * Disables a network interface.
1499 *
1500 * Returns 0, this is not allowed to fail.
1501 *
1502 * The close entry point is called when an interface is de-activated
1503 * by the OS. The hardware is still under the drivers control, but
1504 * needs to be disabled. A global MAC reset is issued to stop the
1505 * hardware, and all transmit and receive resources are freed.
1506 *
1507 * ISSUE: Can this can be called while "tile_net_poll()" is running?
1508 */
1509static int tile_net_stop(struct net_device *dev)
1510{ 1541{
1511 struct tile_net_priv *priv = netdev_priv(dev); 1542 int n = 0;
1512
1513 bool pending = true;
1514
1515 PDEBUG("tile_net_stop()\n");
1516
1517 /* ISSUE: Only needed if not yet fully open. */
1518 cancel_delayed_work_sync(&priv->retry_work);
1519
1520 /* Can't transmit any more. */
1521 netif_stop_queue(dev);
1522
1523 /*
1524 * Disable hypervisor interrupts on each tile.
1525 */
1526 on_each_cpu(tile_net_disable_intr, (void *)dev, 1);
1527
1528 /*
1529 * Unregister the interrupt handler.
1530 * The __ffs() function returns the index into the interrupt handler
1531 * table from the interrupt bit mask which should have one bit
1532 * and one bit only set.
1533 */
1534 if (priv->intr_id)
1535 free_irq(__ffs(priv->intr_id), dev);
1536
1537 /*
1538 * Drain all the LIPP buffers.
1539 */
1540 1543
1544 /* Drain all the LIPP buffers. */
1541 while (true) { 1545 while (true) {
1542 int buffer; 1546 int buffer;
1543 1547
@@ -1560,43 +1564,105 @@ static int tile_net_stop(struct net_device *dev)
1560 1564
1561 kfree_skb(skb); 1565 kfree_skb(skb);
1562 } 1566 }
1567
1568 n++;
1563 } 1569 }
1564 1570
1565 /* Stop LIPP/LEPP. */ 1571 return n;
1566 tile_net_stop_aux(dev); 1572}
1567 1573
1568 1574
1569 priv->fully_opened = 0; 1575/*
1576 * Disables a network interface.
1577 *
1578 * Returns 0, this is not allowed to fail.
1579 *
1580 * The close entry point is called when an interface is de-activated
1581 * by the OS. The hardware is still under the drivers control, but
1582 * needs to be disabled. A global MAC reset is issued to stop the
1583 * hardware, and all transmit and receive resources are freed.
1584 *
1585 * ISSUE: How closely does "netif_running(dev)" mirror "priv->active"?
1586 *
1587 * Before we are called by "__dev_close()", "netif_running()" will
1588 * have been cleared, so no NEW calls to "tile_net_poll()" will be
1589 * made by "netpoll_poll_dev()".
1590 *
1591 * Often, this can cause some tiles to still have packets in their
1592 * queues, so we must call "tile_net_discard_packets()" later.
1593 *
1594 * Note that some other tile may still be INSIDE "tile_net_poll()",
1595 * and in fact, many will be, if there is heavy network load.
1596 *
1597 * Calling "on_each_cpu(tile_net_stop_disable, (void *)dev, 1)" when
1598 * any tile is still "napi_schedule()"'d will induce a horrible crash
1599 * when "msleep()" is called. This includes tiles which are inside
1600 * "tile_net_poll()" which have not yet called "napi_complete()".
1601 *
1602 * So, we must first try to wait long enough for other tiles to finish
1603 * with any current "tile_net_poll()" call, and, hopefully, to clear
1604 * the "scheduled" flag. ISSUE: It is unclear what happens to tiles
1605 * which have called "napi_schedule()" but which had not yet tried to
1606 * call "tile_net_poll()", or which exhausted their budget inside
1607 * "tile_net_poll()" just before this function was called.
1608 */
1609static int tile_net_stop(struct net_device *dev)
1610{
1611 struct tile_net_priv *priv = netdev_priv(dev);
1612
1613 PDEBUG("tile_net_stop()\n");
1570 1614
1615 /* Start discarding packets. */
1616 priv->active = false;
1617
1618 /* Make sure "active" is visible to all tiles. */
1619 mb();
1571 1620
1572 /* 1621 /*
1573 * XXX: ISSUE: It appears that, in practice anyway, by the 1622 * On each tile, make sure no NEW packets get delivered, and
1574 * time we get here, there are no pending completions. 1623 * disable the ingress interrupt.
1624 *
1625 * Note that the ingress interrupt can fire AFTER this,
1626 * presumably due to packets which were recently delivered,
1627 * but it will have no effect.
1575 */ 1628 */
1576 while (pending) { 1629 on_each_cpu(tile_net_deregister, (void *)dev, 1);
1577 1630
1578 struct sk_buff *olds[32]; 1631 /* Optimistically drain LIPP buffers. */
1579 unsigned int wanted = 32; 1632 (void)tile_net_drain_lipp_buffers(priv);
1580 unsigned int i, nolds = 0;
1581 1633
1582 nolds = tile_net_lepp_grab_comps(dev, olds, 1634 /* ISSUE: Only needed if not yet fully open. */
1583 wanted, &pending); 1635 cancel_delayed_work_sync(&priv->retry_work);
1584 1636
1585 /* ISSUE: We have never actually seen this debug spew. */ 1637 /* Can't transmit any more. */
1586 if (nolds != 0) 1638 netif_stop_queue(dev);
1587 pr_info("During tile_net_stop(), grabbed %d comps.\n",
1588 nolds);
1589 1639
1590 for (i = 0; i < nolds; i++) 1640 /* Disable NAPI on each tile. */
1591 kfree_skb(olds[i]); 1641 on_each_cpu(tile_net_stop_disable, (void *)dev, 1);
1592 } 1642
1643 /*
1644 * Drain any remaining LIPP buffers. NOTE: This "printk()"
1645 * has never been observed, but in theory it could happen.
1646 */
1647 if (tile_net_drain_lipp_buffers(priv) != 0)
1648 printk("Had to drain some extra LIPP buffers!\n");
1593 1649
1650 /* Stop LIPP/LEPP. */
1651 tile_net_stop_aux(dev);
1652
1653 /*
1654 * ISSUE: It appears that, in practice anyway, by the time we
1655 * get here, there are no pending completions, but just in case,
1656 * we free (all of) them anyway.
1657 */
1658 while (tile_net_lepp_free_comps(dev, true))
1659 /* loop */;
1594 1660
1595 /* Wipe the EPP queue. */ 1661 /* Wipe the EPP queue. */
1596 memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); 1662 memset(priv->eq, 0, sizeof(lepp_queue_t));
1597 1663
1598 /* Evict the EPP queue. */ 1664 /* Evict the EPP queue. */
1599 finv_buffer(priv->epp_queue, PAGE_SIZE); 1665 finv_buffer(priv->eq, EQ_SIZE);
1600 1666
1601 return 0; 1667 return 0;
1602} 1668}
@@ -1620,7 +1686,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
1620 if (b_len != 0) { 1686 if (b_len != 0) {
1621 1687
1622 if (!hash_default) 1688 if (!hash_default)
1623 finv_buffer_remote(b_data, b_len); 1689 finv_buffer_remote(b_data, b_len, 0);
1624 1690
1625 cpa = __pa(b_data); 1691 cpa = __pa(b_data);
1626 frags[n].cpa_lo = cpa; 1692 frags[n].cpa_lo = cpa;
@@ -1643,7 +1709,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags,
1643 if (!hash_default) { 1709 if (!hash_default) {
1644 void *va = pfn_to_kaddr(pfn) + f->page_offset; 1710 void *va = pfn_to_kaddr(pfn) + f->page_offset;
1645 BUG_ON(PageHighMem(f->page)); 1711 BUG_ON(PageHighMem(f->page));
1646 finv_buffer_remote(va, f->size); 1712 finv_buffer_remote(va, f->size, 0);
1647 } 1713 }
1648 1714
1649 cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; 1715 cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset;
@@ -1742,17 +1808,15 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1742 1808
1743 unsigned long irqflags; 1809 unsigned long irqflags;
1744 1810
1745 lepp_queue_t *eq = priv->epp_queue; 1811 lepp_queue_t *eq = priv->eq;
1746 1812
1747 struct sk_buff *olds[4]; 1813 struct sk_buff *olds[8];
1748 unsigned int wanted = 4; 1814 unsigned int wanted = 8;
1749 unsigned int i, nolds = 0; 1815 unsigned int i, nolds = 0;
1750 1816
1751 unsigned int cmd_head, cmd_tail, cmd_next; 1817 unsigned int cmd_head, cmd_tail, cmd_next;
1752 unsigned int comp_tail; 1818 unsigned int comp_tail;
1753 1819
1754 unsigned int free_slots;
1755
1756 1820
1757 /* Paranoia. */ 1821 /* Paranoia. */
1758 BUG_ON(skb->protocol != htons(ETH_P_IP)); 1822 BUG_ON(skb->protocol != htons(ETH_P_IP));
@@ -1780,34 +1844,32 @@ static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev)
1780 1844
1781 /* Enqueue the command. */ 1845 /* Enqueue the command. */
1782 1846
1783 spin_lock_irqsave(&priv->cmd_lock, irqflags); 1847 spin_lock_irqsave(&priv->eq_lock, irqflags);
1784 1848
1785 /* 1849 /*
1786 * Handle completions if needed to make room. 1850 * Handle completions if needed to make room.
1787 * HACK: Spin until there is sufficient room. 1851 * HACK: Spin until there is sufficient room.
1788 */ 1852 */
1789 free_slots = lepp_num_free_comp_slots(eq); 1853 if (lepp_num_free_comp_slots(eq) == 0) {
1790 if (free_slots < 1) { 1854 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
1791spin: 1855 if (nolds == 0) {
1792 nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 1856busy:
1793 wanted - nolds, NULL); 1857 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1794 if (lepp_num_free_comp_slots(eq) < 1) 1858 return NETDEV_TX_BUSY;
1795 goto spin; 1859 }
1796 } 1860 }
1797 1861
1798 cmd_head = eq->cmd_head; 1862 cmd_head = eq->cmd_head;
1799 cmd_tail = eq->cmd_tail; 1863 cmd_tail = eq->cmd_tail;
1800 1864
1801 /* NOTE: The "gotos" below are untested. */
1802
1803 /* Prepare to advance, detecting full queue. */ 1865 /* Prepare to advance, detecting full queue. */
1804 cmd_next = cmd_tail + cmd_size; 1866 cmd_next = cmd_tail + cmd_size;
1805 if (cmd_tail < cmd_head && cmd_next >= cmd_head) 1867 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1806 goto spin; 1868 goto busy;
1807 if (cmd_next > LEPP_CMD_LIMIT) { 1869 if (cmd_next > LEPP_CMD_LIMIT) {
1808 cmd_next = 0; 1870 cmd_next = 0;
1809 if (cmd_next == cmd_head) 1871 if (cmd_next == cmd_head)
1810 goto spin; 1872 goto busy;
1811 } 1873 }
1812 1874
1813 /* Copy the command. */ 1875 /* Copy the command. */
@@ -1823,14 +1885,18 @@ spin:
1823 eq->comp_tail = comp_tail; 1885 eq->comp_tail = comp_tail;
1824 1886
1825 /* Flush before allowing LEPP to handle the command. */ 1887 /* Flush before allowing LEPP to handle the command. */
1888 /* ISSUE: Is this the optimal location for the flush? */
1826 __insn_mf(); 1889 __insn_mf();
1827 1890
1828 eq->cmd_tail = cmd_tail; 1891 eq->cmd_tail = cmd_tail;
1829 1892
1830 spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 1893 /* NOTE: Using "4" here is more efficient than "0" or "2", */
1831 1894 /* and, strangely, more efficient than pre-checking the number */
1895 /* of available completions, and comparing it to 4. */
1832 if (nolds == 0) 1896 if (nolds == 0)
1833 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 1897 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4);
1898
1899 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1834 1900
1835 /* Handle completions. */ 1901 /* Handle completions. */
1836 for (i = 0; i < nolds; i++) 1902 for (i = 0; i < nolds; i++)
@@ -1870,10 +1936,10 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1870 1936
1871 unsigned int num_frags; 1937 unsigned int num_frags;
1872 1938
1873 lepp_queue_t *eq = priv->epp_queue; 1939 lepp_queue_t *eq = priv->eq;
1874 1940
1875 struct sk_buff *olds[4]; 1941 struct sk_buff *olds[8];
1876 unsigned int wanted = 4; 1942 unsigned int wanted = 8;
1877 unsigned int i, nolds = 0; 1943 unsigned int i, nolds = 0;
1878 1944
1879 unsigned int cmd_size = sizeof(lepp_cmd_t); 1945 unsigned int cmd_size = sizeof(lepp_cmd_t);
@@ -1883,8 +1949,6 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1883 1949
1884 lepp_cmd_t cmds[LEPP_MAX_FRAGS]; 1950 lepp_cmd_t cmds[LEPP_MAX_FRAGS];
1885 1951
1886 unsigned int free_slots;
1887
1888 1952
1889 /* 1953 /*
1890 * This is paranoia, since we think that if the link doesn't come 1954 * This is paranoia, since we think that if the link doesn't come
@@ -1905,7 +1969,8 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1905 if (hash_default) { 1969 if (hash_default) {
1906 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); 1970 HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data);
1907 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) 1971 if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3)
1908 panic("Non-coherent egress buffer!"); 1972 panic("Non-HFH egress buffer! VA=%p Mode=%d PTE=%llx",
1973 data, hv_pte_get_mode(pte), hv_pte_val(pte));
1909 } 1974 }
1910#endif 1975#endif
1911#endif 1976#endif
@@ -1958,37 +2023,35 @@ static int tile_net_tx(struct sk_buff *skb, struct net_device *dev)
1958 2023
1959 /* Enqueue the commands. */ 2024 /* Enqueue the commands. */
1960 2025
1961 spin_lock_irqsave(&priv->cmd_lock, irqflags); 2026 spin_lock_irqsave(&priv->eq_lock, irqflags);
1962 2027
1963 /* 2028 /*
1964 * Handle completions if needed to make room. 2029 * Handle completions if needed to make room.
1965 * HACK: Spin until there is sufficient room. 2030 * HACK: Spin until there is sufficient room.
1966 */ 2031 */
1967 free_slots = lepp_num_free_comp_slots(eq); 2032 if (lepp_num_free_comp_slots(eq) == 0) {
1968 if (free_slots < 1) { 2033 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 0);
1969spin: 2034 if (nolds == 0) {
1970 nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 2035busy:
1971 wanted - nolds, NULL); 2036 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
1972 if (lepp_num_free_comp_slots(eq) < 1) 2037 return NETDEV_TX_BUSY;
1973 goto spin; 2038 }
1974 } 2039 }
1975 2040
1976 cmd_head = eq->cmd_head; 2041 cmd_head = eq->cmd_head;
1977 cmd_tail = eq->cmd_tail; 2042 cmd_tail = eq->cmd_tail;
1978 2043
1979 /* NOTE: The "gotos" below are untested. */
1980
1981 /* Copy the commands, or fail. */ 2044 /* Copy the commands, or fail. */
1982 for (i = 0; i < num_frags; i++) { 2045 for (i = 0; i < num_frags; i++) {
1983 2046
1984 /* Prepare to advance, detecting full queue. */ 2047 /* Prepare to advance, detecting full queue. */
1985 cmd_next = cmd_tail + cmd_size; 2048 cmd_next = cmd_tail + cmd_size;
1986 if (cmd_tail < cmd_head && cmd_next >= cmd_head) 2049 if (cmd_tail < cmd_head && cmd_next >= cmd_head)
1987 goto spin; 2050 goto busy;
1988 if (cmd_next > LEPP_CMD_LIMIT) { 2051 if (cmd_next > LEPP_CMD_LIMIT) {
1989 cmd_next = 0; 2052 cmd_next = 0;
1990 if (cmd_next == cmd_head) 2053 if (cmd_next == cmd_head)
1991 goto spin; 2054 goto busy;
1992 } 2055 }
1993 2056
1994 /* Copy the command. */ 2057 /* Copy the command. */
@@ -2005,14 +2068,18 @@ spin:
2005 eq->comp_tail = comp_tail; 2068 eq->comp_tail = comp_tail;
2006 2069
2007 /* Flush before allowing LEPP to handle the command. */ 2070 /* Flush before allowing LEPP to handle the command. */
2071 /* ISSUE: Is this the optimal location for the flush? */
2008 __insn_mf(); 2072 __insn_mf();
2009 2073
2010 eq->cmd_tail = cmd_tail; 2074 eq->cmd_tail = cmd_tail;
2011 2075
2012 spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 2076 /* NOTE: Using "4" here is more efficient than "0" or "2", */
2013 2077 /* and, strangely, more efficient than pre-checking the number */
2078 /* of available completions, and comparing it to 4. */
2014 if (nolds == 0) 2079 if (nolds == 0)
2015 nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 2080 nolds = tile_net_lepp_grab_comps(eq, olds, wanted, 4);
2081
2082 spin_unlock_irqrestore(&priv->eq_lock, irqflags);
2016 2083
2017 /* Handle completions. */ 2084 /* Handle completions. */
2018 for (i = 0; i < nolds; i++) 2085 for (i = 0; i < nolds; i++)
@@ -2261,7 +2328,6 @@ static struct net_device *tile_net_dev_init(const char *name)
2261 int ret; 2328 int ret;
2262 struct net_device *dev; 2329 struct net_device *dev;
2263 struct tile_net_priv *priv; 2330 struct tile_net_priv *priv;
2264 struct page *page;
2265 2331
2266 /* 2332 /*
2267 * Allocate the device structure. This allocates "priv", calls 2333 * Allocate the device structure. This allocates "priv", calls
@@ -2285,23 +2351,21 @@ static struct net_device *tile_net_dev_init(const char *name)
2285 2351
2286 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); 2352 INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry);
2287 2353
2288 spin_lock_init(&priv->cmd_lock); 2354 spin_lock_init(&priv->eq_lock);
2289 spin_lock_init(&priv->comp_lock);
2290 2355
2291 /* Allocate "epp_queue". */ 2356 /* Allocate "eq". */
2292 BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); 2357 priv->eq_pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, EQ_ORDER);
2293 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); 2358 if (!priv->eq_pages) {
2294 if (!page) {
2295 free_netdev(dev); 2359 free_netdev(dev);
2296 return NULL; 2360 return NULL;
2297 } 2361 }
2298 priv->epp_queue = page_address(page); 2362 priv->eq = page_address(priv->eq_pages);
2299 2363
2300 /* Register the network device. */ 2364 /* Register the network device. */
2301 ret = register_netdev(dev); 2365 ret = register_netdev(dev);
2302 if (ret) { 2366 if (ret) {
2303 pr_err("register_netdev %s failed %d\n", dev->name, ret); 2367 pr_err("register_netdev %s failed %d\n", dev->name, ret);
2304 free_page((unsigned long)priv->epp_queue); 2368 __free_pages(priv->eq_pages, EQ_ORDER);
2305 free_netdev(dev); 2369 free_netdev(dev);
2306 return NULL; 2370 return NULL;
2307 } 2371 }
@@ -2310,7 +2374,7 @@ static struct net_device *tile_net_dev_init(const char *name)
2310 ret = tile_net_get_mac(dev); 2374 ret = tile_net_get_mac(dev);
2311 if (ret < 0) { 2375 if (ret < 0) {
2312 unregister_netdev(dev); 2376 unregister_netdev(dev);
2313 free_page((unsigned long)priv->epp_queue); 2377 __free_pages(priv->eq_pages, EQ_ORDER);
2314 free_netdev(dev); 2378 free_netdev(dev);
2315 return NULL; 2379 return NULL;
2316 } 2380 }
@@ -2321,6 +2385,9 @@ static struct net_device *tile_net_dev_init(const char *name)
2321 2385
2322/* 2386/*
2323 * Module cleanup. 2387 * Module cleanup.
2388 *
2389 * FIXME: If compiled as a module, this module cannot be "unloaded",
2390 * because the "ingress interrupt handler" is registered permanently.
2324 */ 2391 */
2325static void tile_net_cleanup(void) 2392static void tile_net_cleanup(void)
2326{ 2393{
@@ -2331,8 +2398,8 @@ static void tile_net_cleanup(void)
2331 struct net_device *dev = tile_net_devs[i]; 2398 struct net_device *dev = tile_net_devs[i];
2332 struct tile_net_priv *priv = netdev_priv(dev); 2399 struct tile_net_priv *priv = netdev_priv(dev);
2333 unregister_netdev(dev); 2400 unregister_netdev(dev);
2334 finv_buffer(priv->epp_queue, PAGE_SIZE); 2401 finv_buffer(priv->eq, EQ_SIZE);
2335 free_page((unsigned long)priv->epp_queue); 2402 __free_pages(priv->eq_pages, EQ_ORDER);
2336 free_netdev(dev); 2403 free_netdev(dev);
2337 } 2404 }
2338 } 2405 }
@@ -2355,7 +2422,12 @@ static int tile_net_init_module(void)
2355} 2422}
2356 2423
2357 2424
2425module_init(tile_net_init_module);
2426module_exit(tile_net_cleanup);
2427
2428
2358#ifndef MODULE 2429#ifndef MODULE
2430
2359/* 2431/*
2360 * The "network_cpus" boot argument specifies the cpus that are dedicated 2432 * The "network_cpus" boot argument specifies the cpus that are dedicated
2361 * to handle ingress packets. 2433 * to handle ingress packets.
@@ -2391,8 +2463,5 @@ static int __init network_cpus_setup(char *str)
2391 return 0; 2463 return 0;
2392} 2464}
2393__setup("network_cpus=", network_cpus_setup); 2465__setup("network_cpus=", network_cpus_setup);
2394#endif
2395
2396 2466
2397module_init(tile_net_init_module); 2467#endif
2398module_exit(tile_net_cleanup);