aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2013-08-15 16:23:24 -0400
committerChris Metcalf <cmetcalf@tilera.com>2013-09-03 14:53:29 -0400
commitd7c9661115fd23b4dabb710b3080dd9919dfa891 (patch)
tree5eaeb8c4aab296f39d6aa896ec9408419ec17441
parentd6a0aa314c06743b702931cb468f400b7615c5c9 (diff)
tile: remove support for TILE64
This chip is no longer being actively developed for (it was superceded by the TILEPro64 in 2008), and in any case the existing compiler and toolchain in the community do not support it. It's unlikely that the kernel works with TILE64 at this point as the configuration has not been tested in years. The support is also awkward as it requires maintaining a significant number of ifdefs. So, just remove it altogether. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/include/asm/atomic_32.h17
-rw-r--r--arch/tile/include/asm/barrier.h4
-rw-r--r--arch/tile/include/asm/elf.h1
-rw-r--r--arch/tile/include/asm/homecache.h3
-rw-r--r--arch/tile/include/asm/processor.h16
-rw-r--r--arch/tile/include/asm/smp.h2
-rw-r--r--arch/tile/include/asm/traps.h2
-rw-r--r--arch/tile/include/uapi/arch/Kbuild1
-rw-r--r--arch/tile/include/uapi/arch/chip.h4
-rw-r--r--arch/tile/include/uapi/arch/chip_tile64.h258
-rw-r--r--arch/tile/include/uapi/arch/spr_def_32.h2
-rw-r--r--arch/tile/kernel/intvec_32.S69
-rw-r--r--arch/tile/kernel/intvec_64.S2
-rw-r--r--arch/tile/kernel/irq.c4
-rw-r--r--arch/tile/kernel/process.c44
-rw-r--r--arch/tile/kernel/relocate_kernel_32.S2
-rw-r--r--arch/tile/kernel/relocate_kernel_64.S2
-rw-r--r--arch/tile/kernel/setup.c13
-rw-r--r--arch/tile/kernel/single_step.c2
-rw-r--r--arch/tile/lib/Makefile2
-rw-r--r--arch/tile/lib/atomic_32.c90
-rw-r--r--arch/tile/lib/memcpy_32.S61
-rw-r--r--arch/tile/lib/memcpy_tile64.c280
-rw-r--r--arch/tile/lib/memset_32.c105
-rw-r--r--arch/tile/mm/fault.c70
-rw-r--r--arch/tile/mm/homecache.c29
-rw-r--r--arch/tile/mm/init.c68
28 files changed, 34 insertions, 1121 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index b2be42524483..6e1ed55f6cfc 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -207,7 +207,7 @@ config SYSVIPC_COMPAT
207 def_bool y 207 def_bool y
208 depends on COMPAT && SYSVIPC 208 depends on COMPAT && SYSVIPC
209 209
210# We do not currently support disabling HIGHMEM on tile64 and tilepro. 210# We do not currently support disabling HIGHMEM on tilepro.
211config HIGHMEM 211config HIGHMEM
212 bool # "Support for more than 512 MB of RAM" 212 bool # "Support for more than 512 MB of RAM"
213 default !TILEGX 213 default !TILEGX
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index e7fb5cfb9597..96156f5ba640 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -252,21 +252,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
252 * Internal definitions only beyond this point. 252 * Internal definitions only beyond this point.
253 */ 253 */
254 254
255#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
256 (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
257
258#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
259
260/* Number of entries in atomic_lock_ptr[]. */
261#define ATOMIC_HASH_L1_SHIFT 6
262#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
263
264/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
265#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
266#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
267
268#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
269
270/* 255/*
271 * Number of atomic locks in atomic_locks[]. Must be a power of two. 256 * Number of atomic locks in atomic_locks[]. Must be a power of two.
272 * There is no reason for more than PAGE_SIZE / 8 entries, since that 257 * There is no reason for more than PAGE_SIZE / 8 entries, since that
@@ -281,8 +266,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
281extern int atomic_locks[]; 266extern int atomic_locks[];
282#endif 267#endif
283 268
284#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
285
286/* 269/*
287 * All the code that may fault while holding an atomic lock must 270 * All the code that may fault while holding an atomic lock must
288 * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code 271 * place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
index 990a217a0b72..a9a73da5865d 100644
--- a/arch/tile/include/asm/barrier.h
+++ b/arch/tile/include/asm/barrier.h
@@ -77,7 +77,6 @@
77 77
78#define __sync() __insn_mf() 78#define __sync() __insn_mf()
79 79
80#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
81#include <hv/syscall_public.h> 80#include <hv/syscall_public.h>
82/* 81/*
83 * Issue an uncacheable load to each memory controller, then 82 * Issue an uncacheable load to each memory controller, then
@@ -96,7 +95,6 @@ static inline void __mb_incoherent(void)
96 "r20", "r21", "r22", "r23", "r24", 95 "r20", "r21", "r22", "r23", "r24",
97 "r25", "r26", "r27", "r28", "r29"); 96 "r25", "r26", "r27", "r28", "r29");
98} 97}
99#endif
100 98
101/* Fence to guarantee visibility of stores to incoherent memory. */ 99/* Fence to guarantee visibility of stores to incoherent memory. */
102static inline void 100static inline void
@@ -104,7 +102,6 @@ mb_incoherent(void)
104{ 102{
105 __insn_mf(); 103 __insn_mf();
106 104
107#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
108 { 105 {
109#if CHIP_HAS_TILE_WRITE_PENDING() 106#if CHIP_HAS_TILE_WRITE_PENDING()
110 const unsigned long WRITE_TIMEOUT_CYCLES = 400; 107 const unsigned long WRITE_TIMEOUT_CYCLES = 400;
@@ -116,7 +113,6 @@ mb_incoherent(void)
116#endif /* CHIP_HAS_TILE_WRITE_PENDING() */ 113#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
117 (void) __mb_incoherent(); 114 (void) __mb_incoherent();
118 } 115 }
119#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
120} 116}
121 117
122#define fast_wmb() __sync() 118#define fast_wmb() __sync()
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index e1da88e8aa9f..41d9878a9686 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t;
30#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t)) 30#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
31typedef elf_greg_t elf_gregset_t[ELF_NGREG]; 31typedef elf_greg_t elf_gregset_t[ELF_NGREG];
32 32
33#define EM_TILE64 187
34#define EM_TILEPRO 188 33#define EM_TILEPRO 188
35#define EM_TILEGX 191 34#define EM_TILEGX 191
36 35
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index 49d19dfc0630..7ddd1b8d6910 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -33,8 +33,7 @@ struct zone;
33 33
34/* 34/*
35 * Is this page immutable (unwritable) and thus able to be cached more 35 * Is this page immutable (unwritable) and thus able to be cached more
36 * widely than would otherwise be possible? On tile64 this means we 36 * widely than would otherwise be possible? This means we have "nc" set.
37 * mark the PTE to cache locally; on tilepro it means we have "nc" set.
38 */ 37 */
39#define PAGE_HOME_IMMUTABLE -2 38#define PAGE_HOME_IMMUTABLE -2
40 39
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 5aa54319d2ef..42323636c459 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -113,18 +113,14 @@ struct thread_struct {
113 unsigned long intctrl_0; 113 unsigned long intctrl_0;
114 /* Is this task currently doing a backtrace? */ 114 /* Is this task currently doing a backtrace? */
115 bool in_backtrace; 115 bool in_backtrace;
116#if CHIP_HAS_PROC_STATUS_SPR()
117 /* Any other miscellaneous processor state bits */ 116 /* Any other miscellaneous processor state bits */
118 unsigned long proc_status; 117 unsigned long proc_status;
119#endif
120#if !CHIP_HAS_FIXED_INTVEC_BASE() 118#if !CHIP_HAS_FIXED_INTVEC_BASE()
121 /* Interrupt base for PL0 interrupts */ 119 /* Interrupt base for PL0 interrupts */
122 unsigned long interrupt_vector_base; 120 unsigned long interrupt_vector_base;
123#endif 121#endif
124#if CHIP_HAS_TILE_RTF_HWM()
125 /* Tile cache retry fifo high-water mark */ 122 /* Tile cache retry fifo high-water mark */
126 unsigned long tile_rtf_hwm; 123 unsigned long tile_rtf_hwm;
127#endif
128#if CHIP_HAS_DSTREAM_PF() 124#if CHIP_HAS_DSTREAM_PF()
129 /* Data stream prefetch control */ 125 /* Data stream prefetch control */
130 unsigned long dstream_pf; 126 unsigned long dstream_pf;
@@ -137,12 +133,6 @@ struct thread_struct {
137 /* Async DMA TLB fault information */ 133 /* Async DMA TLB fault information */
138 struct async_tlb dma_async_tlb; 134 struct async_tlb dma_async_tlb;
139#endif 135#endif
140#if CHIP_HAS_SN_PROC()
141 /* Was static network processor when we were switched out? */
142 int sn_proc_running;
143 /* Async SNI TLB fault information */
144 struct async_tlb sn_async_tlb;
145#endif
146}; 136};
147 137
148#endif /* !__ASSEMBLY__ */ 138#endif /* !__ASSEMBLY__ */
@@ -286,7 +276,6 @@ extern char chip_model[64];
286/* Data on which physical memory controller corresponds to which NUMA node. */ 276/* Data on which physical memory controller corresponds to which NUMA node. */
287extern int node_controller[]; 277extern int node_controller[];
288 278
289#if CHIP_HAS_CBOX_HOME_MAP()
290/* Does the heap allocator return hash-for-home pages by default? */ 279/* Does the heap allocator return hash-for-home pages by default? */
291extern int hash_default; 280extern int hash_default;
292 281
@@ -296,11 +285,6 @@ extern int kstack_hash;
296/* Does MAP_ANONYMOUS return hash-for-home pages by default? */ 285/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
297#define uheap_hash hash_default 286#define uheap_hash hash_default
298 287
299#else
300#define hash_default 0
301#define kstack_hash 0
302#define uheap_hash 0
303#endif
304 288
305/* Are we using huge pages in the TLB for kernel data? */ 289/* Are we using huge pages in the TLB for kernel data? */
306extern int kdata_huge; 290extern int kdata_huge;
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 1aa759aeb5b3..9a326b64f7ae 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -101,10 +101,8 @@ void print_disabled_cpus(void);
101extern struct cpumask cpu_lotar_map; 101extern struct cpumask cpu_lotar_map;
102#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map) 102#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
103 103
104#if CHIP_HAS_CBOX_HOME_MAP()
105/* Which processors are used for hash-for-home mapping */ 104/* Which processors are used for hash-for-home mapping */
106extern struct cpumask hash_for_home_map; 105extern struct cpumask hash_for_home_map;
107#endif
108 106
109/* Which cpus can have their cache flushed by hv_flush_remote(). */ 107/* Which cpus can have their cache flushed by hv_flush_remote(). */
110extern struct cpumask cpu_cacheable_map; 108extern struct cpumask cpu_cacheable_map;
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 5f172b2403a6..4b99a1c3aab2 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -21,7 +21,7 @@
21/* mm/fault.c */ 21/* mm/fault.c */
22void do_page_fault(struct pt_regs *, int fault_num, 22void do_page_fault(struct pt_regs *, int fault_num,
23 unsigned long address, unsigned long write); 23 unsigned long address, unsigned long write);
24#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 24#if CHIP_HAS_TILE_DMA()
25void do_async_page_fault(struct pt_regs *); 25void do_async_page_fault(struct pt_regs *);
26#endif 26#endif
27 27
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
index 4ebc34f4768d..97dfbecec6b6 100644
--- a/arch/tile/include/uapi/arch/Kbuild
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -1,7 +1,6 @@
1# UAPI Header export list 1# UAPI Header export list
2header-y += abi.h 2header-y += abi.h
3header-y += chip.h 3header-y += chip.h
4header-y += chip_tile64.h
5header-y += chip_tilegx.h 4header-y += chip_tilegx.h
6header-y += chip_tilepro.h 5header-y += chip_tilepro.h
7header-y += icache.h 6header-y += icache.h
diff --git a/arch/tile/include/uapi/arch/chip.h b/arch/tile/include/uapi/arch/chip.h
index 926d3db0e91e..4c91f90b9369 100644
--- a/arch/tile/include/uapi/arch/chip.h
+++ b/arch/tile/include/uapi/arch/chip.h
@@ -12,9 +12,7 @@
12 * more details. 12 * more details.
13 */ 13 */
14 14
15#if __tile_chip__ == 0 15#if __tile_chip__ == 1
16#include <arch/chip_tile64.h>
17#elif __tile_chip__ == 1
18#include <arch/chip_tilepro.h> 16#include <arch/chip_tilepro.h>
19#elif defined(__tilegx__) 17#elif defined(__tilegx__)
20#include <arch/chip_tilegx.h> 18#include <arch/chip_tilegx.h>
diff --git a/arch/tile/include/uapi/arch/chip_tile64.h b/arch/tile/include/uapi/arch/chip_tile64.h
deleted file mode 100644
index 261aaba092d4..000000000000
--- a/arch/tile/include/uapi/arch/chip_tile64.h
+++ /dev/null
@@ -1,258 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15/*
16 * @file
17 * Global header file.
18 * This header file specifies defines for TILE64.
19 */
20
21#ifndef __ARCH_CHIP_H__
22#define __ARCH_CHIP_H__
23
24/** Specify chip version.
25 * When possible, prefer the CHIP_xxx symbols below for future-proofing.
26 * This is intended for cross-compiling; native compilation should
27 * use the predefined __tile_chip__ symbol.
28 */
29#define TILE_CHIP 0
30
31/** Specify chip revision.
32 * This provides for the case of a respin of a particular chip type;
33 * the normal value for this symbol is "0".
34 * This is intended for cross-compiling; native compilation should
35 * use the predefined __tile_chip_rev__ symbol.
36 */
37#define TILE_CHIP_REV 0
38
39/** The name of this architecture. */
40#define CHIP_ARCH_NAME "tile64"
41
42/** The ELF e_machine type for binaries for this chip. */
43#define CHIP_ELF_TYPE() EM_TILE64
44
45/** The alternate ELF e_machine type for binaries for this chip. */
46#define CHIP_COMPAT_ELF_TYPE() 0x2506
47
48/** What is the native word size of the machine? */
49#define CHIP_WORD_SIZE() 32
50
51/** How many bits of a virtual address are used. Extra bits must be
52 * the sign extension of the low bits.
53 */
54#define CHIP_VA_WIDTH() 32
55
56/** How many bits are in a physical address? */
57#define CHIP_PA_WIDTH() 36
58
59/** Size of the L2 cache, in bytes. */
60#define CHIP_L2_CACHE_SIZE() 65536
61
62/** Log size of an L2 cache line in bytes. */
63#define CHIP_L2_LOG_LINE_SIZE() 6
64
65/** Size of an L2 cache line, in bytes. */
66#define CHIP_L2_LINE_SIZE() (1 << CHIP_L2_LOG_LINE_SIZE())
67
68/** Associativity of the L2 cache. */
69#define CHIP_L2_ASSOC() 2
70
71/** Size of the L1 data cache, in bytes. */
72#define CHIP_L1D_CACHE_SIZE() 8192
73
74/** Log size of an L1 data cache line in bytes. */
75#define CHIP_L1D_LOG_LINE_SIZE() 4
76
77/** Size of an L1 data cache line, in bytes. */
78#define CHIP_L1D_LINE_SIZE() (1 << CHIP_L1D_LOG_LINE_SIZE())
79
80/** Associativity of the L1 data cache. */
81#define CHIP_L1D_ASSOC() 2
82
83/** Size of the L1 instruction cache, in bytes. */
84#define CHIP_L1I_CACHE_SIZE() 8192
85
86/** Log size of an L1 instruction cache line in bytes. */
87#define CHIP_L1I_LOG_LINE_SIZE() 6
88
89/** Size of an L1 instruction cache line, in bytes. */
90#define CHIP_L1I_LINE_SIZE() (1 << CHIP_L1I_LOG_LINE_SIZE())
91
92/** Associativity of the L1 instruction cache. */
93#define CHIP_L1I_ASSOC() 1
94
95/** Stride with which flush instructions must be issued. */
96#define CHIP_FLUSH_STRIDE() CHIP_L2_LINE_SIZE()
97
98/** Stride with which inv instructions must be issued. */
99#define CHIP_INV_STRIDE() CHIP_L1D_LINE_SIZE()
100
101/** Stride with which finv instructions must be issued. */
102#define CHIP_FINV_STRIDE() CHIP_L1D_LINE_SIZE()
103
104/** Can the local cache coherently cache data that is homed elsewhere? */
105#define CHIP_HAS_COHERENT_LOCAL_CACHE() 0
106
107/** How many simultaneous outstanding victims can the L2 cache have? */
108#define CHIP_MAX_OUTSTANDING_VICTIMS() 2
109
110/** Does the TLB support the NC and NOALLOC bits? */
111#define CHIP_HAS_NC_AND_NOALLOC_BITS() 0
112
113/** Does the chip support hash-for-home caching? */
114#define CHIP_HAS_CBOX_HOME_MAP() 0
115
116/** Number of entries in the chip's home map tables. */
117/* #define CHIP_CBOX_HOME_MAP_SIZE() -- does not apply to chip 0 */
118
119/** Do uncacheable requests miss in the cache regardless of whether
120 * there is matching data? */
121#define CHIP_HAS_ENFORCED_UNCACHEABLE_REQUESTS() 0
122
123/** Does the mf instruction wait for victims? */
124#define CHIP_HAS_MF_WAITS_FOR_VICTIMS() 1
125
126/** Does the chip have an "inv" instruction that doesn't also flush? */
127#define CHIP_HAS_INV() 0
128
129/** Does the chip have a "wh64" instruction? */
130#define CHIP_HAS_WH64() 0
131
132/** Does this chip have a 'dword_align' instruction? */
133#define CHIP_HAS_DWORD_ALIGN() 0
134
135/** Number of performance counters. */
136#define CHIP_PERFORMANCE_COUNTERS() 2
137
138/** Does this chip have auxiliary performance counters? */
139#define CHIP_HAS_AUX_PERF_COUNTERS() 0
140
141/** Is the CBOX_MSR1 SPR supported? */
142#define CHIP_HAS_CBOX_MSR1() 0
143
144/** Is the TILE_RTF_HWM SPR supported? */
145#define CHIP_HAS_TILE_RTF_HWM() 0
146
147/** Is the TILE_WRITE_PENDING SPR supported? */
148#define CHIP_HAS_TILE_WRITE_PENDING() 0
149
150/** Is the PROC_STATUS SPR supported? */
151#define CHIP_HAS_PROC_STATUS_SPR() 0
152
153/** Is the DSTREAM_PF SPR supported? */
154#define CHIP_HAS_DSTREAM_PF() 0
155
156/** Log of the number of mshims we have. */
157#define CHIP_LOG_NUM_MSHIMS() 2
158
159/** Are the bases of the interrupt vector areas fixed? */
160#define CHIP_HAS_FIXED_INTVEC_BASE() 1
161
162/** Are the interrupt masks split up into 2 SPRs? */
163#define CHIP_HAS_SPLIT_INTR_MASK() 1
164
165/** Is the cycle count split up into 2 SPRs? */
166#define CHIP_HAS_SPLIT_CYCLE() 1
167
168/** Does the chip have a static network? */
169#define CHIP_HAS_SN() 1
170
171/** Does the chip have a static network processor? */
172#define CHIP_HAS_SN_PROC() 1
173
174/** Size of the L1 static network processor instruction cache, in bytes. */
175#define CHIP_L1SNI_CACHE_SIZE() 2048
176
177/** Does the chip have DMA support in each tile? */
178#define CHIP_HAS_TILE_DMA() 1
179
180/** Does the chip have the second revision of the directly accessible
181 * dynamic networks? This encapsulates a number of characteristics,
182 * including the absence of the catch-all, the absence of inline message
183 * tags, the absence of support for network context-switching, and so on.
184 */
185#define CHIP_HAS_REV1_XDN() 0
186
187/** Does the chip have cmpexch and similar (fetchadd, exch, etc.)? */
188#define CHIP_HAS_CMPEXCH() 0
189
190/** Does the chip have memory-mapped I/O support? */
191#define CHIP_HAS_MMIO() 0
192
193/** Does the chip have post-completion interrupts? */
194#define CHIP_HAS_POST_COMPLETION_INTERRUPTS() 0
195
196/** Does the chip have native single step support? */
197#define CHIP_HAS_SINGLE_STEP() 0
198
199#ifndef __OPEN_SOURCE__ /* features only relevant to hypervisor-level code */
200
201/** How many entries are present in the instruction TLB? */
202#define CHIP_ITLB_ENTRIES() 8
203
204/** How many entries are present in the data TLB? */
205#define CHIP_DTLB_ENTRIES() 16
206
207/** How many MAF entries does the XAUI shim have? */
208#define CHIP_XAUI_MAF_ENTRIES() 16
209
210/** Does the memory shim have a source-id table? */
211#define CHIP_HAS_MSHIM_SRCID_TABLE() 1
212
213/** Does the L1 instruction cache clear on reset? */
214#define CHIP_HAS_L1I_CLEAR_ON_RESET() 0
215
216/** Does the chip come out of reset with valid coordinates on all tiles?
217 * Note that if defined, this also implies that the upper left is 1,1.
218 */
219#define CHIP_HAS_VALID_TILE_COORD_RESET() 0
220
221/** Does the chip have unified packet formats? */
222#define CHIP_HAS_UNIFIED_PACKET_FORMATS() 0
223
224/** Does the chip support write reordering? */
225#define CHIP_HAS_WRITE_REORDERING() 0
226
227/** Does the chip support Y-X routing as well as X-Y? */
228#define CHIP_HAS_Y_X_ROUTING() 0
229
230/** Is INTCTRL_3 managed with the correct MPL? */
231#define CHIP_HAS_INTCTRL_3_STATUS_FIX() 0
232
233/** Is it possible to configure the chip to be big-endian? */
234#define CHIP_HAS_BIG_ENDIAN_CONFIG() 0
235
236/** Is the CACHE_RED_WAY_OVERRIDDEN SPR supported? */
237#define CHIP_HAS_CACHE_RED_WAY_OVERRIDDEN() 0
238
239/** Is the DIAG_TRACE_WAY SPR supported? */
240#define CHIP_HAS_DIAG_TRACE_WAY() 0
241
242/** Is the MEM_STRIPE_CONFIG SPR supported? */
243#define CHIP_HAS_MEM_STRIPE_CONFIG() 0
244
245/** Are the TLB_PERF SPRs supported? */
246#define CHIP_HAS_TLB_PERF() 0
247
248/** Is the VDN_SNOOP_SHIM_CTL SPR supported? */
249#define CHIP_HAS_VDN_SNOOP_SHIM_CTL() 0
250
251/** Does the chip support rev1 DMA packets? */
252#define CHIP_HAS_REV1_DMA_PACKETS() 0
253
254/** Does the chip have an IPI shim? */
255#define CHIP_HAS_IPI() 0
256
257#endif /* !__OPEN_SOURCE__ */
258#endif /* __ARCH_CHIP_H__ */
diff --git a/arch/tile/include/uapi/arch/spr_def_32.h b/arch/tile/include/uapi/arch/spr_def_32.h
index c689446e6284..78daa3146d25 100644
--- a/arch/tile/include/uapi/arch/spr_def_32.h
+++ b/arch/tile/include/uapi/arch/spr_def_32.h
@@ -200,8 +200,6 @@
200#define SPR_SIM_CONTROL 0x4e0c 200#define SPR_SIM_CONTROL 0x4e0c
201#define SPR_SNCTL 0x0805 201#define SPR_SNCTL 0x0805
202#define SPR_SNCTL__FRZFABRIC_MASK 0x1 202#define SPR_SNCTL__FRZFABRIC_MASK 0x1
203#define SPR_SNCTL__FRZPROC_MASK 0x2
204#define SPR_SNPC 0x080b
205#define SPR_SNSTATIC 0x080c 203#define SPR_SNSTATIC 0x080c
206#define SPR_SYSTEM_SAVE_0_0 0x4b00 204#define SPR_SYSTEM_SAVE_0_0 0x4b00
207#define SPR_SYSTEM_SAVE_0_1 0x4b01 205#define SPR_SYSTEM_SAVE_0_1 0x4b01
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index f084f1c7afde..088d5c141e68 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -32,12 +32,6 @@
32 32
33#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) 33#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
34 34
35#if !CHIP_HAS_WH64()
36 /* By making this an empty macro, we can use wh64 in the code. */
37 .macro wh64 reg
38 .endm
39#endif
40
41 .macro push_reg reg, ptr=sp, delta=-4 35 .macro push_reg reg, ptr=sp, delta=-4
42 { 36 {
43 sw \ptr, \reg 37 sw \ptr, \reg
@@ -325,18 +319,14 @@ intvec_\vecname:
325 movei r3, -1 /* not used, but set for consistency */ 319 movei r3, -1 /* not used, but set for consistency */
326 } 320 }
327 .else 321 .else
328#if CHIP_HAS_AUX_PERF_COUNTERS()
329 .ifc \c_routine, op_handle_aux_perf_interrupt 322 .ifc \c_routine, op_handle_aux_perf_interrupt
330 { 323 {
331 mfspr r2, AUX_PERF_COUNT_STS 324 mfspr r2, AUX_PERF_COUNT_STS
332 movei r3, -1 /* not used, but set for consistency */ 325 movei r3, -1 /* not used, but set for consistency */
333 } 326 }
334 .else 327 .else
335#endif
336 movei r3, 0 328 movei r3, 0
337#if CHIP_HAS_AUX_PERF_COUNTERS()
338 .endif 329 .endif
339#endif
340 .endif 330 .endif
341 .endif 331 .endif
342 .endif 332 .endif
@@ -561,7 +551,6 @@ intvec_\vecname:
561 .endif 551 .endif
562 mtspr INTERRUPT_CRITICAL_SECTION, zero 552 mtspr INTERRUPT_CRITICAL_SECTION, zero
563 553
564#if CHIP_HAS_WH64()
565 /* 554 /*
566 * Prepare the first 256 stack bytes to be rapidly accessible 555 * Prepare the first 256 stack bytes to be rapidly accessible
567 * without having to fetch the background data. We don't really 556 * without having to fetch the background data. We don't really
@@ -582,7 +571,6 @@ intvec_\vecname:
582 addi r52, r52, -64 571 addi r52, r52, -64
583 } 572 }
584 wh64 r52 573 wh64 r52
585#endif
586 574
587#ifdef CONFIG_TRACE_IRQFLAGS 575#ifdef CONFIG_TRACE_IRQFLAGS
588 .ifnc \function,handle_nmi 576 .ifnc \function,handle_nmi
@@ -1533,12 +1521,10 @@ STD_ENTRY(_sys_clone)
1533 __HEAD 1521 __HEAD
1534 .align 64 1522 .align 64
1535 /* Align much later jump on the start of a cache line. */ 1523 /* Align much later jump on the start of a cache line. */
1536#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
1537 nop 1524 nop
1538#if PAGE_SIZE >= 0x10000 1525#if PAGE_SIZE >= 0x10000
1539 nop 1526 nop
1540#endif 1527#endif
1541#endif
1542ENTRY(sys_cmpxchg) 1528ENTRY(sys_cmpxchg)
1543 1529
1544 /* 1530 /*
@@ -1572,45 +1558,6 @@ ENTRY(sys_cmpxchg)
1572# error Code here assumes PAGE_OFFSET can be loaded with just hi16() 1558# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
1573#endif 1559#endif
1574 1560
1575#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1576 {
1577 /* Check for unaligned input. */
1578 bnz sp, .Lcmpxchg_badaddr
1579 mm r25, r0, zero, 3, PAGE_SHIFT-1
1580 }
1581 {
1582 crc32_32 r25, zero, r25
1583 moveli r21, lo16(atomic_lock_ptr)
1584 }
1585 {
1586 auli r21, r21, ha16(atomic_lock_ptr)
1587 auli r23, zero, hi16(PAGE_OFFSET) /* hugepage-aligned */
1588 }
1589 {
1590 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
1591 slt_u r23, r0, r23
1592 lw r26, r0 /* see comment in the "#else" for the "lw r26". */
1593 }
1594 {
1595 s2a r21, r20, r21
1596 bbns r23, .Lcmpxchg_badaddr
1597 }
1598 {
1599 lw r21, r21
1600 seqi r23, TREG_SYSCALL_NR_NAME, __NR_FAST_cmpxchg64
1601 andi r25, r25, ATOMIC_HASH_L2_SIZE - 1
1602 }
1603 {
1604 /* Branch away at this point if we're doing a 64-bit cmpxchg. */
1605 bbs r23, .Lcmpxchg64
1606 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
1607 }
1608 {
1609 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1610 j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
1611 }
1612
1613#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1614 { 1561 {
1615 /* Check for unaligned input. */ 1562 /* Check for unaligned input. */
1616 bnz sp, .Lcmpxchg_badaddr 1563 bnz sp, .Lcmpxchg_badaddr
@@ -1635,12 +1582,9 @@ ENTRY(sys_cmpxchg)
1635 1582
1636 /* 1583 /*
1637 * Ensure that the TLB is loaded before we take out the lock. 1584 * Ensure that the TLB is loaded before we take out the lock.
1638 * On tilepro, this will start fetching the value all the way 1585 * This will start fetching the value all the way into our L1
1639 * into our L1 as well (and if it gets modified before we 1586 * as well (and if it gets modified before we grab the lock,
1640 * grab the lock, it will be invalidated from our cache 1587 * it will be invalidated from our cache before we reload it).
1641 * before we reload it). On tile64, we'll start fetching it
1642 * into our L1 if we're the home, and if we're not, we'll
1643 * still at least start fetching it into the home's L2.
1644 */ 1588 */
1645 lw r26, r0 1589 lw r26, r0
1646 } 1590 }
@@ -1683,8 +1627,6 @@ ENTRY(sys_cmpxchg)
1683 j .Lcmpxchg32_tns 1627 j .Lcmpxchg32_tns
1684 } 1628 }
1685 1629
1686#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1687
1688/* Symbol for do_page_fault_ics() to use to compare against the PC. */ 1630/* Symbol for do_page_fault_ics() to use to compare against the PC. */
1689.global __sys_cmpxchg_grab_lock 1631.global __sys_cmpxchg_grab_lock
1690__sys_cmpxchg_grab_lock: 1632__sys_cmpxchg_grab_lock:
@@ -1822,9 +1764,6 @@ __sys_cmpxchg_grab_lock:
1822 .align 64 1764 .align 64
1823.Lcmpxchg64: 1765.Lcmpxchg64:
1824 { 1766 {
1825#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1826 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1827#endif
1828 bzt r23, .Lcmpxchg64_tns 1767 bzt r23, .Lcmpxchg64_tns
1829 } 1768 }
1830 j .Lcmpxchg_badaddr 1769 j .Lcmpxchg_badaddr
@@ -1959,10 +1898,8 @@ int_unalign:
1959 do_page_fault 1898 do_page_fault
1960 int_hand INT_SN_CPL, SN_CPL, bad_intr 1899 int_hand INT_SN_CPL, SN_CPL, bad_intr
1961 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap 1900 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
1962#if CHIP_HAS_AUX_PERF_COUNTERS()
1963 int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \ 1901 int_hand INT_AUX_PERF_COUNT, AUX_PERF_COUNT, \
1964 op_handle_aux_perf_interrupt, handle_nmi 1902 op_handle_aux_perf_interrupt, handle_nmi
1965#endif
1966 1903
1967 /* Synthetic interrupt delivered only by the simulator */ 1904 /* Synthetic interrupt delivered only by the simulator */
1968 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint 1905 int_hand INT_BREAKPOINT, BREAKPOINT, do_breakpoint
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index c3a2335fa6a8..ec755d3f3734 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -511,12 +511,10 @@ intvec_\vecname:
511 .else 511 .else
512 .ifc \c_routine, op_handle_perf_interrupt 512 .ifc \c_routine, op_handle_perf_interrupt
513 mfspr r2, PERF_COUNT_STS 513 mfspr r2, PERF_COUNT_STS
514#if CHIP_HAS_AUX_PERF_COUNTERS()
515 .else 514 .else
516 .ifc \c_routine, op_handle_aux_perf_interrupt 515 .ifc \c_routine, op_handle_aux_perf_interrupt
517 mfspr r2, AUX_PERF_COUNT_STS 516 mfspr r2, AUX_PERF_COUNT_STS
518 .endif 517 .endif
519#endif
520 .endif 518 .endif
521 .endif 519 .endif
522 .endif 520 .endif
diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c
index 0e6c521b8a89..d8ba06058fd0 100644
--- a/arch/tile/kernel/irq.c
+++ b/arch/tile/kernel/irq.c
@@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(available_irqs_lock);
74 74
75/* 75/*
76 * The interrupt handling path, implemented in terms of HV interrupt 76 * The interrupt handling path, implemented in terms of HV interrupt
77 * emulation on TILE64 and TILEPro, and IPI hardware on TILE-Gx. 77 * emulation on TILEPro, and IPI hardware on TILE-Gx.
78 * Entered with interrupts disabled. 78 * Entered with interrupts disabled.
79 */ 79 */
80void tile_dev_intr(struct pt_regs *regs, int intnum) 80void tile_dev_intr(struct pt_regs *regs, int intnum)
@@ -235,7 +235,7 @@ void tile_irq_activate(unsigned int irq, int tile_irq_type)
235{ 235{
236 /* 236 /*
237 * We use handle_level_irq() by default because the pending 237 * We use handle_level_irq() by default because the pending
238 * interrupt vector (whether modeled by the HV on TILE64 and 238 * interrupt vector (whether modeled by the HV on
239 * TILEPro or implemented in hardware on TILE-Gx) has 239 * TILEPro or implemented in hardware on TILE-Gx) has
240 * level-style semantics for each bit. An interrupt fires 240 * level-style semantics for each bit. An interrupt fires
241 * whenever a bit is high, not just at edges. 241 * whenever a bit is high, not just at edges.
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 44cdc4aa59e8..16ed58948757 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -187,16 +187,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
187 memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb)); 187 memset(&p->thread.dma_async_tlb, 0, sizeof(struct async_tlb));
188#endif 188#endif
189 189
190#if CHIP_HAS_SN_PROC()
191 /* Likewise, the new thread is not running static processor code. */
192 p->thread.sn_proc_running = 0;
193 memset(&p->thread.sn_async_tlb, 0, sizeof(struct async_tlb));
194#endif
195
196#if CHIP_HAS_PROC_STATUS_SPR()
197 /* New thread has its miscellaneous processor state bits clear. */ 190 /* New thread has its miscellaneous processor state bits clear. */
198 p->thread.proc_status = 0; 191 p->thread.proc_status = 0;
199#endif
200 192
201#ifdef CONFIG_HARDWALL 193#ifdef CONFIG_HARDWALL
202 /* New thread does not own any networks. */ 194 /* New thread does not own any networks. */
@@ -378,15 +370,11 @@ static void save_arch_state(struct thread_struct *t)
378 t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2); 370 t->system_save[2] = __insn_mfspr(SPR_SYSTEM_SAVE_0_2);
379 t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3); 371 t->system_save[3] = __insn_mfspr(SPR_SYSTEM_SAVE_0_3);
380 t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS); 372 t->intctrl_0 = __insn_mfspr(SPR_INTCTRL_0_STATUS);
381#if CHIP_HAS_PROC_STATUS_SPR()
382 t->proc_status = __insn_mfspr(SPR_PROC_STATUS); 373 t->proc_status = __insn_mfspr(SPR_PROC_STATUS);
383#endif
384#if !CHIP_HAS_FIXED_INTVEC_BASE() 374#if !CHIP_HAS_FIXED_INTVEC_BASE()
385 t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0); 375 t->interrupt_vector_base = __insn_mfspr(SPR_INTERRUPT_VECTOR_BASE_0);
386#endif 376#endif
387#if CHIP_HAS_TILE_RTF_HWM()
388 t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM); 377 t->tile_rtf_hwm = __insn_mfspr(SPR_TILE_RTF_HWM);
389#endif
390#if CHIP_HAS_DSTREAM_PF() 378#if CHIP_HAS_DSTREAM_PF()
391 t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF); 379 t->dstream_pf = __insn_mfspr(SPR_DSTREAM_PF);
392#endif 380#endif
@@ -407,15 +395,11 @@ static void restore_arch_state(const struct thread_struct *t)
407 __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]); 395 __insn_mtspr(SPR_SYSTEM_SAVE_0_2, t->system_save[2]);
408 __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]); 396 __insn_mtspr(SPR_SYSTEM_SAVE_0_3, t->system_save[3]);
409 __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0); 397 __insn_mtspr(SPR_INTCTRL_0_STATUS, t->intctrl_0);
410#if CHIP_HAS_PROC_STATUS_SPR()
411 __insn_mtspr(SPR_PROC_STATUS, t->proc_status); 398 __insn_mtspr(SPR_PROC_STATUS, t->proc_status);
412#endif
413#if !CHIP_HAS_FIXED_INTVEC_BASE() 399#if !CHIP_HAS_FIXED_INTVEC_BASE()
414 __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base); 400 __insn_mtspr(SPR_INTERRUPT_VECTOR_BASE_0, t->interrupt_vector_base);
415#endif 401#endif
416#if CHIP_HAS_TILE_RTF_HWM()
417 __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm); 402 __insn_mtspr(SPR_TILE_RTF_HWM, t->tile_rtf_hwm);
418#endif
419#if CHIP_HAS_DSTREAM_PF() 403#if CHIP_HAS_DSTREAM_PF()
420 __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf); 404 __insn_mtspr(SPR_DSTREAM_PF, t->dstream_pf);
421#endif 405#endif
@@ -424,26 +408,11 @@ static void restore_arch_state(const struct thread_struct *t)
424 408
425void _prepare_arch_switch(struct task_struct *next) 409void _prepare_arch_switch(struct task_struct *next)
426{ 410{
427#if CHIP_HAS_SN_PROC()
428 int snctl;
429#endif
430#if CHIP_HAS_TILE_DMA() 411#if CHIP_HAS_TILE_DMA()
431 struct tile_dma_state *dma = &current->thread.tile_dma_state; 412 struct tile_dma_state *dma = &current->thread.tile_dma_state;
432 if (dma->enabled) 413 if (dma->enabled)
433 save_tile_dma_state(dma); 414 save_tile_dma_state(dma);
434#endif 415#endif
435#if CHIP_HAS_SN_PROC()
436 /*
437 * Suspend the static network processor if it was running.
438 * We do not suspend the fabric itself, just like we don't
439 * try to suspend the UDN.
440 */
441 snctl = __insn_mfspr(SPR_SNCTL);
442 current->thread.sn_proc_running =
443 (snctl & SPR_SNCTL__FRZPROC_MASK) == 0;
444 if (current->thread.sn_proc_running)
445 __insn_mtspr(SPR_SNCTL, snctl | SPR_SNCTL__FRZPROC_MASK);
446#endif
447} 416}
448 417
449 418
@@ -471,17 +440,6 @@ struct task_struct *__sched _switch_to(struct task_struct *prev,
471 /* Restore other arch state. */ 440 /* Restore other arch state. */
472 restore_arch_state(&next->thread); 441 restore_arch_state(&next->thread);
473 442
474#if CHIP_HAS_SN_PROC()
475 /*
476 * Restart static network processor in the new process
477 * if it was running before.
478 */
479 if (next->thread.sn_proc_running) {
480 int snctl = __insn_mfspr(SPR_SNCTL);
481 __insn_mtspr(SPR_SNCTL, snctl & ~SPR_SNCTL__FRZPROC_MASK);
482 }
483#endif
484
485#ifdef CONFIG_HARDWALL 443#ifdef CONFIG_HARDWALL
486 /* Enable or disable access to the network registers appropriately. */ 444 /* Enable or disable access to the network registers appropriately. */
487 hardwall_switch_tasks(prev, next); 445 hardwall_switch_tasks(prev, next);
@@ -523,7 +481,7 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
523 schedule(); 481 schedule();
524 return 1; 482 return 1;
525 } 483 }
526#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 484#if CHIP_HAS_TILE_DMA()
527 if (thread_info_flags & _TIF_ASYNC_TLB) { 485 if (thread_info_flags & _TIF_ASYNC_TLB) {
528 do_async_page_fault(regs); 486 do_async_page_fault(regs);
529 return 1; 487 return 1;
diff --git a/arch/tile/kernel/relocate_kernel_32.S b/arch/tile/kernel/relocate_kernel_32.S
index f7fd37b64a78..e44fbcf8cbd5 100644
--- a/arch/tile/kernel/relocate_kernel_32.S
+++ b/arch/tile/kernel/relocate_kernel_32.S
@@ -77,7 +77,6 @@ STD_ENTRY(relocate_new_kernel)
77 move r30, sp 77 move r30, sp
78 addi sp, sp, -8 78 addi sp, sp, -8
79 79
80#if CHIP_HAS_CBOX_HOME_MAP()
81 /* 80 /*
82 * On TILEPro, we need to flush all tiles' caches, since we may 81 * On TILEPro, we need to flush all tiles' caches, since we may
83 * have been doing hash-for-home caching there. Note that we 82 * have been doing hash-for-home caching there. Note that we
@@ -113,7 +112,6 @@ STD_ENTRY(relocate_new_kernel)
113 } 112 }
114 113
115 jalr r20 114 jalr r20
116#endif
117 115
118 /* r33 is destination pointer, default to zero */ 116 /* r33 is destination pointer, default to zero */
119 117
diff --git a/arch/tile/kernel/relocate_kernel_64.S b/arch/tile/kernel/relocate_kernel_64.S
index 02bc44621021..d9d8cf6176e8 100644
--- a/arch/tile/kernel/relocate_kernel_64.S
+++ b/arch/tile/kernel/relocate_kernel_64.S
@@ -78,7 +78,6 @@ STD_ENTRY(relocate_new_kernel)
78 move r30, sp 78 move r30, sp
79 addi sp, sp, -16 79 addi sp, sp, -16
80 80
81#if CHIP_HAS_CBOX_HOME_MAP()
82 /* 81 /*
83 * On TILE-GX, we need to flush all tiles' caches, since we may 82 * On TILE-GX, we need to flush all tiles' caches, since we may
84 * have been doing hash-for-home caching there. Note that we 83 * have been doing hash-for-home caching there. Note that we
@@ -116,7 +115,6 @@ STD_ENTRY(relocate_new_kernel)
116 shl16insli r20, r20, hw0(hv_flush_remote) 115 shl16insli r20, r20, hw0(hv_flush_remote)
117 116
118 jalr r20 117 jalr r20
119#endif
120 118
121 /* r33 is destination pointer, default to zero */ 119 /* r33 is destination pointer, default to zero */
122 120
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index b79c312ca3cb..128a2d0b8650 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -1046,9 +1046,6 @@ void __cpuinit setup_cpu(int boot)
1046 arch_local_irq_unmask(INT_DMATLB_MISS); 1046 arch_local_irq_unmask(INT_DMATLB_MISS);
1047 arch_local_irq_unmask(INT_DMATLB_ACCESS); 1047 arch_local_irq_unmask(INT_DMATLB_ACCESS);
1048#endif 1048#endif
1049#if CHIP_HAS_SN_PROC()
1050 arch_local_irq_unmask(INT_SNITLB_MISS);
1051#endif
1052#ifdef __tilegx__ 1049#ifdef __tilegx__
1053 arch_local_irq_unmask(INT_SINGLE_STEP_K); 1050 arch_local_irq_unmask(INT_SINGLE_STEP_K);
1054#endif 1051#endif
@@ -1063,10 +1060,6 @@ void __cpuinit setup_cpu(int boot)
1063 /* Static network is not restricted. */ 1060 /* Static network is not restricted. */
1064 __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1); 1061 __insn_mtspr(SPR_MPL_SN_ACCESS_SET_0, 1);
1065#endif 1062#endif
1066#if CHIP_HAS_SN_PROC()
1067 __insn_mtspr(SPR_MPL_SN_NOTIFY_SET_0, 1);
1068 __insn_mtspr(SPR_MPL_SN_CPL_SET_0, 1);
1069#endif
1070 1063
1071 /* 1064 /*
1072 * Set the MPL for interrupt control 0 & 1 to the corresponding 1065 * Set the MPL for interrupt control 0 & 1 to the corresponding
@@ -1291,7 +1284,6 @@ static void __init validate_va(void)
1291struct cpumask __write_once cpu_lotar_map; 1284struct cpumask __write_once cpu_lotar_map;
1292EXPORT_SYMBOL(cpu_lotar_map); 1285EXPORT_SYMBOL(cpu_lotar_map);
1293 1286
1294#if CHIP_HAS_CBOX_HOME_MAP()
1295/* 1287/*
1296 * hash_for_home_map lists all the tiles that hash-for-home data 1288 * hash_for_home_map lists all the tiles that hash-for-home data
1297 * will be cached on. Note that this may includes tiles that are not 1289 * will be cached on. Note that this may includes tiles that are not
@@ -1301,7 +1293,6 @@ EXPORT_SYMBOL(cpu_lotar_map);
1301 */ 1293 */
1302struct cpumask hash_for_home_map; 1294struct cpumask hash_for_home_map;
1303EXPORT_SYMBOL(hash_for_home_map); 1295EXPORT_SYMBOL(hash_for_home_map);
1304#endif
1305 1296
1306/* 1297/*
1307 * cpu_cacheable_map lists all the cpus whose caches the hypervisor can 1298 * cpu_cacheable_map lists all the cpus whose caches the hypervisor can
@@ -1394,7 +1385,6 @@ static void __init setup_cpu_maps(void)
1394 cpu_lotar_map = *cpu_possible_mask; 1385 cpu_lotar_map = *cpu_possible_mask;
1395 } 1386 }
1396 1387
1397#if CHIP_HAS_CBOX_HOME_MAP()
1398 /* Retrieve set of CPUs used for hash-for-home caching */ 1388 /* Retrieve set of CPUs used for hash-for-home caching */
1399 rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE, 1389 rc = hv_inquire_tiles(HV_INQ_TILES_HFH_CACHE,
1400 (HV_VirtAddr) hash_for_home_map.bits, 1390 (HV_VirtAddr) hash_for_home_map.bits,
@@ -1402,9 +1392,6 @@ static void __init setup_cpu_maps(void)
1402 if (rc < 0) 1392 if (rc < 0)
1403 early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc); 1393 early_panic("hv_inquire_tiles(HFH_CACHE) failed: rc %d\n", rc);
1404 cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map); 1394 cpumask_or(&cpu_cacheable_map, cpu_possible_mask, &hash_for_home_map);
1405#else
1406 cpu_cacheable_map = *cpu_possible_mask;
1407#endif
1408} 1395}
1409 1396
1410 1397
diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c
index 5ef2e9eae5c5..de07fa7d1315 100644
--- a/arch/tile/kernel/single_step.c
+++ b/arch/tile/kernel/single_step.c
@@ -546,7 +546,6 @@ void single_step_once(struct pt_regs *regs)
546 } 546 }
547 break; 547 break;
548 548
549#if CHIP_HAS_WH64()
550 /* postincrement operations */ 549 /* postincrement operations */
551 case IMM_0_OPCODE_X1: 550 case IMM_0_OPCODE_X1:
552 switch (get_ImmOpcodeExtension_X1(bundle)) { 551 switch (get_ImmOpcodeExtension_X1(bundle)) {
@@ -581,7 +580,6 @@ void single_step_once(struct pt_regs *regs)
581 break; 580 break;
582 } 581 }
583 break; 582 break;
584#endif /* CHIP_HAS_WH64() */
585 } 583 }
586 584
587 if (state->update) { 585 if (state->update) {
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
index 9adfd76fbdd8..c4211cbb2021 100644
--- a/arch/tile/lib/Makefile
+++ b/arch/tile/lib/Makefile
@@ -7,7 +7,7 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o uaccess.o \
7 strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o 7 strchr_$(BITS).o strlen_$(BITS).o strnlen_$(BITS).o
8 8
9lib-$(CONFIG_TILEGX) += memcpy_user_64.o 9lib-$(CONFIG_TILEGX) += memcpy_user_64.o
10lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o memcpy_tile64.o 10lib-$(CONFIG_TILEPRO) += atomic_32.o atomic_asm_32.o
11lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o 11lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
12 12
13obj-$(CONFIG_MODULES) += exports.o 13obj-$(CONFIG_MODULES) += exports.o
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 42eacb1f737a..5d91d1860640 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -20,50 +20,12 @@
20#include <linux/atomic.h> 20#include <linux/atomic.h>
21#include <arch/chip.h> 21#include <arch/chip.h>
22 22
23/* See <asm/atomic_32.h> */
24#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
25
26/*
27 * A block of memory containing locks for atomic ops. Each instance of this
28 * struct will be homed on a different CPU.
29 */
30struct atomic_locks_on_cpu {
31 int lock[ATOMIC_HASH_L2_SIZE];
32} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
33
34static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
35
36/* The locks we'll use until __init_atomic_per_cpu is called. */
37static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
38
39/* Hash into this vector to get a pointer to lock for the given atomic. */
40struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
41 __write_once = {
42 [0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
43};
44
45#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
46
47/* This page is remapped on startup to be hash-for-home. */ 23/* This page is remapped on startup to be hash-for-home. */
48int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss; 24int atomic_locks[PAGE_SIZE / sizeof(int)] __page_aligned_bss;
49 25
50#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
51
52int *__atomic_hashed_lock(volatile void *v) 26int *__atomic_hashed_lock(volatile void *v)
53{ 27{
54 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */ 28 /* NOTE: this code must match "sys_cmpxchg" in kernel/intvec_32.S */
55#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
56 unsigned long i =
57 (unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
58 unsigned long n = __insn_crc32_32(0, i);
59
60 /* Grab high bits for L1 index. */
61 unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
62 /* Grab low bits for L2 index. */
63 unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
64
65 return &atomic_lock_ptr[l1_index]->lock[l2_index];
66#else
67 /* 29 /*
68 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index. 30 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
69 * Using mm works here because atomic_locks is page aligned. 31 * Using mm works here because atomic_locks is page aligned.
@@ -72,26 +34,13 @@ int *__atomic_hashed_lock(volatile void *v)
72 (unsigned long)atomic_locks, 34 (unsigned long)atomic_locks,
73 2, (ATOMIC_HASH_SHIFT + 2) - 1); 35 2, (ATOMIC_HASH_SHIFT + 2) - 1);
74 return (int *)ptr; 36 return (int *)ptr;
75#endif
76} 37}
77 38
78#ifdef CONFIG_SMP 39#ifdef CONFIG_SMP
79/* Return whether the passed pointer is a valid atomic lock pointer. */ 40/* Return whether the passed pointer is a valid atomic lock pointer. */
80static int is_atomic_lock(int *p) 41static int is_atomic_lock(int *p)
81{ 42{
82#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
83 int i;
84 for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
85
86 if (p >= &atomic_lock_ptr[i]->lock[0] &&
87 p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
88 return 1;
89 }
90 }
91 return 0;
92#else
93 return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE]; 43 return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
94#endif
95} 44}
96 45
97void __atomic_fault_unlock(int *irqlock_word) 46void __atomic_fault_unlock(int *irqlock_word)
@@ -210,43 +159,6 @@ struct __get_user __atomic_bad_address(int __user *addr)
210 159
211void __init __init_atomic_per_cpu(void) 160void __init __init_atomic_per_cpu(void)
212{ 161{
213#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
214
215 unsigned int i;
216 int actual_cpu;
217
218 /*
219 * Before this is called from setup, we just have one lock for
220 * all atomic objects/operations. Here we replace the
221 * elements of atomic_lock_ptr so that they point at per_cpu
222 * integers. This seemingly over-complex approach stems from
223 * the fact that DEFINE_PER_CPU defines an entry for each cpu
224 * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1. But
225 * for efficient hashing of atomics to their locks we want a
226 * compile time constant power of 2 for the size of this
227 * table, so we use ATOMIC_HASH_SIZE.
228 *
229 * Here we populate atomic_lock_ptr from the per cpu
230 * atomic_lock_pool, interspersing by actual cpu so that
231 * subsequent elements are homed on consecutive cpus.
232 */
233
234 actual_cpu = cpumask_first(cpu_possible_mask);
235
236 for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
237 /*
238 * Preincrement to slightly bias against using cpu 0,
239 * which has plenty of stuff homed on it already.
240 */
241 actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
242 if (actual_cpu >= nr_cpu_ids)
243 actual_cpu = cpumask_first(cpu_possible_mask);
244
245 atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
246 }
247
248#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
249
250 /* Validate power-of-two and "bigger than cpus" assumption */ 162 /* Validate power-of-two and "bigger than cpus" assumption */
251 BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); 163 BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
252 BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); 164 BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
@@ -270,6 +182,4 @@ void __init __init_atomic_per_cpu(void)
270 * That should not produce more indices than ATOMIC_HASH_SIZE. 182 * That should not produce more indices than ATOMIC_HASH_SIZE.
271 */ 183 */
272 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); 184 BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
273
274#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
275} 185}
diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S
index 8ba7626cfeb1..a2771ae5da53 100644
--- a/arch/tile/lib/memcpy_32.S
+++ b/arch/tile/lib/memcpy_32.S
@@ -22,14 +22,6 @@
22 22
23#include <linux/linkage.h> 23#include <linux/linkage.h>
24 24
25/* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27#define memcpy __memcpy_asm
28#define __copy_to_user_inatomic __copy_to_user_inatomic_asm
29#define __copy_from_user_inatomic __copy_from_user_inatomic_asm
30#define __copy_from_user_zeroing __copy_from_user_zeroing_asm
31#endif
32
33#define IS_MEMCPY 0 25#define IS_MEMCPY 0
34#define IS_COPY_FROM_USER 1 26#define IS_COPY_FROM_USER 1
35#define IS_COPY_FROM_USER_ZEROING 2 27#define IS_COPY_FROM_USER_ZEROING 2
@@ -159,12 +151,9 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 }
159 151
160 { addi r3, r1, 60; andi r9, r9, -64 } 152 { addi r3, r1, 60; andi r9, r9, -64 }
161 153
162#if CHIP_HAS_WH64()
163 /* No need to prefetch dst, we'll just do the wh64 154 /* No need to prefetch dst, we'll just do the wh64
164 * right before we copy a line. 155 * right before we copy a line.
165 */ 156 */
166#endif
167
168EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } 157EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 }
169 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 158 /* Intentionally stall for a few cycles to leave L2 cache alone. */
170 { bnzt zero, .; move r27, lr } 159 { bnzt zero, .; move r27, lr }
@@ -172,21 +161,6 @@ EX: { lw r6, r3; addi r3, r3, 64 }
172 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 161 /* Intentionally stall for a few cycles to leave L2 cache alone. */
173 { bnzt zero, . } 162 { bnzt zero, . }
174EX: { lw r7, r3; addi r3, r3, 64 } 163EX: { lw r7, r3; addi r3, r3, 64 }
175#if !CHIP_HAS_WH64()
176 /* Prefetch the dest */
177 /* Intentionally stall for a few cycles to leave L2 cache alone. */
178 { bnzt zero, . }
179 /* Use a real load to cause a TLB miss if necessary. We aren't using
180 * r28, so this should be fine.
181 */
182EX: { lw r28, r9; addi r9, r9, 64 }
183 /* Intentionally stall for a few cycles to leave L2 cache alone. */
184 { bnzt zero, . }
185 { prefetch r9; addi r9, r9, 64 }
186 /* Intentionally stall for a few cycles to leave L2 cache alone. */
187 { bnzt zero, . }
188 { prefetch r9; addi r9, r9, 64 }
189#endif
190 /* Intentionally stall for a few cycles to leave L2 cache alone. */ 164 /* Intentionally stall for a few cycles to leave L2 cache alone. */
191 { bz zero, .Lbig_loop2 } 165 { bz zero, .Lbig_loop2 }
192 166
@@ -287,13 +261,8 @@ EX: { lw r7, r3; addi r3, r3, 64 }
287 /* Fill second L1D line. */ 261 /* Fill second L1D line. */
288EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 262EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */
289 263
290#if CHIP_HAS_WH64()
291 /* Prepare destination line for writing. */ 264 /* Prepare destination line for writing. */
292EX: { wh64 r9; addi r9, r9, 64 } 265EX: { wh64 r9; addi r9, r9, 64 }
293#else
294 /* Prefetch dest line */
295 { prefetch r9; addi r9, r9, 64 }
296#endif
297 /* Load seven words that are L1D hits to cover wh64 L2 usage. */ 266 /* Load seven words that are L1D hits to cover wh64 L2 usage. */
298 267
299 /* Load the three remaining words from the last L1D line, which 268 /* Load the three remaining words from the last L1D line, which
@@ -331,16 +300,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */
331EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 300EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */
332EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 301EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */
333EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 302EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */
334#if CHIP_HAS_WH64()
335EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 303EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */
336#else
337 /* Back up the r9 to a cache line we are already storing to
338 * if it gets past the end of the dest vector. Strictly speaking,
339 * we don't need to back up to the start of a cache line, but it's free
340 * and tidy, so why not?
341 */
342EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */
343#endif
344 /* Store second L1D line. */ 304 /* Store second L1D line. */
345EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ 305EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */
346EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ 306EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */
@@ -404,7 +364,6 @@ EX: { sb r0, r3; addi r0, r0, 1; addi r2, r2, -1 }
404 364
405.Ldest_is_word_aligned: 365.Ldest_is_word_aligned:
406 366
407#if CHIP_HAS_DWORD_ALIGN()
408EX: { andi r8, r0, 63; lwadd_na r6, r1, 4} 367EX: { andi r8, r0, 63; lwadd_na r6, r1, 4}
409 { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned } 368 { slti_u r9, r2, 64; bz r8, .Ldest_is_L2_line_aligned }
410 369
@@ -512,26 +471,6 @@ EX: { swadd r0, r13, 4; addi r2, r2, -32 }
512 /* Move r1 back to the point where it corresponds to r0. */ 471 /* Move r1 back to the point where it corresponds to r0. */
513 { addi r1, r1, -4 } 472 { addi r1, r1, -4 }
514 473
515#else /* !CHIP_HAS_DWORD_ALIGN() */
516
517 /* Compute right/left shift counts and load initial source words. */
518 { andi r5, r1, -4; andi r3, r1, 3 }
519EX: { lw r6, r5; addi r5, r5, 4; shli r3, r3, 3 }
520EX: { lw r7, r5; addi r5, r5, 4; sub r4, zero, r3 }
521
522 /* Load and store one word at a time, using shifts and ORs
523 * to correct for the misaligned src.
524 */
525.Lcopy_unaligned_src_loop:
526 { shr r6, r6, r3; shl r8, r7, r4 }
527EX: { lw r7, r5; or r8, r8, r6; move r6, r7 }
528EX: { sw r0, r8; addi r0, r0, 4; addi r2, r2, -4 }
529 { addi r5, r5, 4; slti_u r8, r2, 8 }
530 { bzt r8, .Lcopy_unaligned_src_loop; addi r1, r1, 4 }
531
532 { bz r2, .Lcopy_unaligned_done }
533#endif /* !CHIP_HAS_DWORD_ALIGN() */
534
535 /* Fall through */ 474 /* Fall through */
536 475
537/* 476/*
diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c
deleted file mode 100644
index 0290c222847b..000000000000
--- a/arch/tile/lib/memcpy_tile64.c
+++ /dev/null
@@ -1,280 +0,0 @@
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/string.h>
16#include <linux/smp.h>
17#include <linux/module.h>
18#include <linux/uaccess.h>
19#include <asm/fixmap.h>
20#include <asm/kmap_types.h>
21#include <asm/tlbflush.h>
22#include <hv/hypervisor.h>
23#include <arch/chip.h>
24
25
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27
28/* Defined in memcpy.S */
29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30extern unsigned long __copy_to_user_inatomic_asm(
31 void __user *to, const void *from, unsigned long n);
32extern unsigned long __copy_from_user_inatomic_asm(
33 void *to, const void __user *from, unsigned long n);
34extern unsigned long __copy_from_user_zeroing_asm(
35 void *to, const void __user *from, unsigned long n);
36
37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38
39/* Size above which to consider TLB games for performance */
40#define LARGE_COPY_CUTOFF 2048
41
42/* Communicate to the simulator what we are trying to do. */
43#define sim_allow_multiple_caching(b) \
44 __insn_mtspr(SPR_SIM_CONTROL, \
45 SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46
47/*
48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49 *
50 * We set up our own source and destination PTEs that we fully control.
51 * This is the only way to guarantee that we don't race with another
52 * thread that is modifying the PTE; we can't afford to try the
53 * copy_{to,from}_user() technique of catching the interrupt, since
54 * we must run with interrupts disabled to avoid the risk of some
55 * other code seeing the incoherent data in our cache. (Recall that
56 * our cache is indexed by PA, so even if the other code doesn't use
57 * our kmap_atomic virtual addresses, they'll still hit in cache using
58 * the normal VAs that aren't supposed to hit in cache.)
59 */
60static void memcpy_multicache(void *dest, const void *source,
61 pte_t dst_pte, pte_t src_pte, int len)
62{
63 int idx;
64 unsigned long flags, newsrc, newdst;
65 pmd_t *pmdp;
66 pte_t *ptep;
67 int type0, type1;
68 int cpu = smp_processor_id();
69
70 /*
71 * Disable interrupts so that we don't recurse into memcpy()
72 * in an interrupt handler, nor accidentally reference
73 * the PA of the source from an interrupt routine. Also
74 * notify the simulator that we're playing games so we don't
75 * generate spurious coherency warnings.
76 */
77 local_irq_save(flags);
78 sim_allow_multiple_caching(1);
79
80 /* Set up the new dest mapping */
81 type0 = kmap_atomic_idx_push();
82 idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
83 newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
84 pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
85 ptep = pte_offset_kernel(pmdp, newdst);
86 if (pte_val(*ptep) != pte_val(dst_pte)) {
87 set_pte(ptep, dst_pte);
88 local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
89 }
90
91 /* Set up the new source mapping */
92 type1 = kmap_atomic_idx_push();
93 idx += (type0 - type1);
94 src_pte = hv_pte_set_nc(src_pte);
95 src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc);
99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101
102 /* Actually move the data. */
103 __memcpy_asm((void *)newdst, (const void *)newsrc, len);
104
105 /*
106 * Remap the source as locally-cached and not OLOC'ed so that
107 * we can inval without also invaling the remote cpu's cache.
108 * This also avoids known errata with inv'ing cacheable oloc data.
109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114
115 /*
116 * Do the actual invalidation, covering the full L2 cache line
117 * at the end since __memcpy_asm() is somewhat aggressive.
118 */
119 __inv_buffer((void *)newsrc, len);
120
121 /*
122 * We're done: notify the simulator that all is back to normal,
123 * and re-enable interrupts and pre-emption.
124 */
125 kmap_atomic_idx_pop();
126 kmap_atomic_idx_pop();
127 sim_allow_multiple_caching(0);
128 local_irq_restore(flags);
129}
130
131/*
132 * Identify large copies from remotely-cached memory, and copy them
133 * via memcpy_multicache() if they look good, otherwise fall back
134 * to the particular kind of copying passed as the memcpy_t function.
135 */
136static unsigned long fast_copy(void *dest, const void *source, int len,
137 memcpy_t func)
138{
139 int cpu = get_cpu();
140 unsigned long retval;
141
142 /*
143 * Check if it's big enough to bother with. We may end up doing a
144 * small copy via TLB manipulation if we're near a page boundary,
145 * but presumably we'll make it up when we hit the second page.
146 */
147 while (len >= LARGE_COPY_CUTOFF) {
148 int copy_size, bytes_left_on_page;
149 pte_t *src_ptep, *dst_ptep;
150 pte_t src_pte, dst_pte;
151 struct page *src_page, *dst_page;
152
153 /* Is the source page oloc'ed to a remote cpu? */
154retry_source:
155 src_ptep = virt_to_pte(current->mm, (unsigned long)source);
156 if (src_ptep == NULL)
157 break;
158 src_pte = *src_ptep;
159 if (!hv_pte_get_present(src_pte) ||
160 !hv_pte_get_readable(src_pte) ||
161 hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
162 break;
163 if (get_remote_cache_cpu(src_pte) == cpu)
164 break;
165 src_page = pfn_to_page(pte_pfn(src_pte));
166 get_page(src_page);
167 if (pte_val(src_pte) != pte_val(*src_ptep)) {
168 put_page(src_page);
169 goto retry_source;
170 }
171 if (pte_huge(src_pte)) {
172 /* Adjust the PTE to correspond to a small page */
173 int pfn = pte_pfn(src_pte);
174 pfn += (((unsigned long)source & (HPAGE_SIZE-1))
175 >> PAGE_SHIFT);
176 src_pte = pfn_pte(pfn, src_pte);
177 src_pte = pte_mksmall(src_pte);
178 }
179
180 /* Is the destination page writable? */
181retry_dest:
182 dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
183 if (dst_ptep == NULL) {
184 put_page(src_page);
185 break;
186 }
187 dst_pte = *dst_ptep;
188 if (!hv_pte_get_present(dst_pte) ||
189 !hv_pte_get_writable(dst_pte)) {
190 put_page(src_page);
191 break;
192 }
193 dst_page = pfn_to_page(pte_pfn(dst_pte));
194 if (dst_page == src_page) {
195 /*
196 * Source and dest are on the same page; this
197 * potentially exposes us to incoherence if any
198 * part of src and dest overlap on a cache line.
199 * Just give up rather than trying to be precise.
200 */
201 put_page(src_page);
202 break;
203 }
204 get_page(dst_page);
205 if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
206 put_page(dst_page);
207 goto retry_dest;
208 }
209 if (pte_huge(dst_pte)) {
210 /* Adjust the PTE to correspond to a small page */
211 int pfn = pte_pfn(dst_pte);
212 pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
213 >> PAGE_SHIFT);
214 dst_pte = pfn_pte(pfn, dst_pte);
215 dst_pte = pte_mksmall(dst_pte);
216 }
217
218 /* All looks good: create a cachable PTE and copy from it */
219 copy_size = len;
220 bytes_left_on_page =
221 PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
222 if (copy_size > bytes_left_on_page)
223 copy_size = bytes_left_on_page;
224 bytes_left_on_page =
225 PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
226 if (copy_size > bytes_left_on_page)
227 copy_size = bytes_left_on_page;
228 memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
229
230 /* Release the pages */
231 put_page(dst_page);
232 put_page(src_page);
233
234 /* Continue on the next page */
235 dest += copy_size;
236 source += copy_size;
237 len -= copy_size;
238 }
239
240 retval = func(dest, source, len);
241 put_cpu();
242 return retval;
243}
244
245void *memcpy(void *to, const void *from, __kernel_size_t n)
246{
247 if (n < LARGE_COPY_CUTOFF)
248 return (void *)__memcpy_asm(to, from, n);
249 else
250 return (void *)fast_copy(to, from, n, __memcpy_asm);
251}
252
253unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
254 unsigned long n)
255{
256 if (n < LARGE_COPY_CUTOFF)
257 return __copy_to_user_inatomic_asm(to, from, n);
258 else
259 return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
260}
261
262unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
263 unsigned long n)
264{
265 if (n < LARGE_COPY_CUTOFF)
266 return __copy_from_user_inatomic_asm(to, from, n);
267 else
268 return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
269}
270
271unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
272 unsigned long n)
273{
274 if (n < LARGE_COPY_CUTOFF)
275 return __copy_from_user_zeroing_asm(to, from, n);
276 else
277 return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
278}
279
280#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c
index 9a7837d11f7d..2042bfe6595f 100644
--- a/arch/tile/lib/memset_32.c
+++ b/arch/tile/lib/memset_32.c
@@ -23,11 +23,7 @@ void *memset(void *s, int c, size_t n)
23 int n32; 23 int n32;
24 uint32_t v16, v32; 24 uint32_t v16, v32;
25 uint8_t *out8 = s; 25 uint8_t *out8 = s;
26#if !CHIP_HAS_WH64()
27 int ahead32;
28#else
29 int to_align32; 26 int to_align32;
30#endif
31 27
32 /* Experimentation shows that a trivial tight loop is a win up until 28 /* Experimentation shows that a trivial tight loop is a win up until
33 * around a size of 20, where writing a word at a time starts to win. 29 * around a size of 20, where writing a word at a time starts to win.
@@ -58,21 +54,6 @@ void *memset(void *s, int c, size_t n)
58 return s; 54 return s;
59 } 55 }
60 56
61#if !CHIP_HAS_WH64()
62 /* Use a spare issue slot to start prefetching the first cache
63 * line early. This instruction is free as the store can be buried
64 * in otherwise idle issue slots doing ALU ops.
65 */
66 __insn_prefetch(out8);
67
68 /* We prefetch the end so that a short memset that spans two cache
69 * lines gets some prefetching benefit. Again we believe this is free
70 * to issue.
71 */
72 __insn_prefetch(&out8[n - 1]);
73#endif /* !CHIP_HAS_WH64() */
74
75
76 /* Align 'out8'. We know n >= 3 so this won't write past the end. */ 57 /* Align 'out8'. We know n >= 3 so this won't write past the end. */
77 while (((uintptr_t) out8 & 3) != 0) { 58 while (((uintptr_t) out8 & 3) != 0) {
78 *out8++ = c; 59 *out8++ = c;
@@ -93,90 +74,6 @@ void *memset(void *s, int c, size_t n)
93 /* This must be at least 8 or the following loop doesn't work. */ 74 /* This must be at least 8 or the following loop doesn't work. */
94#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4) 75#define CACHE_LINE_SIZE_IN_WORDS (CHIP_L2_LINE_SIZE() / 4)
95 76
96#if !CHIP_HAS_WH64()
97
98 ahead32 = CACHE_LINE_SIZE_IN_WORDS;
99
100 /* We already prefetched the first and last cache lines, so
101 * we only need to do more prefetching if we are storing
102 * to more than two cache lines.
103 */
104 if (n32 > CACHE_LINE_SIZE_IN_WORDS * 2) {
105 int i;
106
107 /* Prefetch the next several cache lines.
108 * This is the setup code for the software-pipelined
109 * loop below.
110 */
111#define MAX_PREFETCH 5
112 ahead32 = n32 & -CACHE_LINE_SIZE_IN_WORDS;
113 if (ahead32 > MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS)
114 ahead32 = MAX_PREFETCH * CACHE_LINE_SIZE_IN_WORDS;
115
116 for (i = CACHE_LINE_SIZE_IN_WORDS;
117 i < ahead32; i += CACHE_LINE_SIZE_IN_WORDS)
118 __insn_prefetch(&out32[i]);
119 }
120
121 if (n32 > ahead32) {
122 while (1) {
123 int j;
124
125 /* Prefetch by reading one word several cache lines
126 * ahead. Since loads are non-blocking this will
127 * cause the full cache line to be read while we are
128 * finishing earlier cache lines. Using a store
129 * here causes microarchitectural performance
130 * problems where a victimizing store miss goes to
131 * the head of the retry FIFO and locks the pipe for
132 * a few cycles. So a few subsequent stores in this
133 * loop go into the retry FIFO, and then later
134 * stores see other stores to the same cache line
135 * are already in the retry FIFO and themselves go
136 * into the retry FIFO, filling it up and grinding
137 * to a halt waiting for the original miss to be
138 * satisfied.
139 */
140 __insn_prefetch(&out32[ahead32]);
141
142#if CACHE_LINE_SIZE_IN_WORDS % 4 != 0
143#error "Unhandled CACHE_LINE_SIZE_IN_WORDS"
144#endif
145
146 n32 -= CACHE_LINE_SIZE_IN_WORDS;
147
148 /* Save icache space by only partially unrolling
149 * this loop.
150 */
151 for (j = CACHE_LINE_SIZE_IN_WORDS / 4; j > 0; j--) {
152 *out32++ = v32;
153 *out32++ = v32;
154 *out32++ = v32;
155 *out32++ = v32;
156 }
157
158 /* To save compiled code size, reuse this loop even
159 * when we run out of prefetching to do by dropping
160 * ahead32 down.
161 */
162 if (n32 <= ahead32) {
163 /* Not even a full cache line left,
164 * so stop now.
165 */
166 if (n32 < CACHE_LINE_SIZE_IN_WORDS)
167 break;
168
169 /* Choose a small enough value that we don't
170 * prefetch past the end. There's no sense
171 * in touching cache lines we don't have to.
172 */
173 ahead32 = CACHE_LINE_SIZE_IN_WORDS - 1;
174 }
175 }
176 }
177
178#else /* CHIP_HAS_WH64() */
179
180 /* Determine how many words we need to emit before the 'out32' 77 /* Determine how many words we need to emit before the 'out32'
181 * pointer becomes aligned modulo the cache line size. 78 * pointer becomes aligned modulo the cache line size.
182 */ 79 */
@@ -233,8 +130,6 @@ void *memset(void *s, int c, size_t n)
233 n32 &= CACHE_LINE_SIZE_IN_WORDS - 1; 130 n32 &= CACHE_LINE_SIZE_IN_WORDS - 1;
234 } 131 }
235 132
236#endif /* CHIP_HAS_WH64() */
237
238 /* Now handle any leftover values. */ 133 /* Now handle any leftover values. */
239 if (n32 != 0) { 134 if (n32 != 0) {
240 do { 135 do {
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 39c48cbe0a96..111d5a9b76f1 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -466,28 +466,15 @@ good_area:
466 } 466 }
467 } 467 }
468 468
469#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
470 /*
471 * If this was an asynchronous fault,
472 * restart the appropriate engine.
473 */
474 switch (fault_num) {
475#if CHIP_HAS_TILE_DMA() 469#if CHIP_HAS_TILE_DMA()
470 /* If this was a DMA TLB fault, restart the DMA engine. */
471 switch (fault_num) {
476 case INT_DMATLB_MISS: 472 case INT_DMATLB_MISS:
477 case INT_DMATLB_MISS_DWNCL: 473 case INT_DMATLB_MISS_DWNCL:
478 case INT_DMATLB_ACCESS: 474 case INT_DMATLB_ACCESS:
479 case INT_DMATLB_ACCESS_DWNCL: 475 case INT_DMATLB_ACCESS_DWNCL:
480 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK); 476 __insn_mtspr(SPR_DMA_CTR, SPR_DMA_CTR__REQUEST_MASK);
481 break; 477 break;
482#endif
483#if CHIP_HAS_SN_PROC()
484 case INT_SNITLB_MISS:
485 case INT_SNITLB_MISS_DWNCL:
486 __insn_mtspr(SPR_SNCTL,
487 __insn_mfspr(SPR_SNCTL) &
488 ~SPR_SNCTL__FRZPROC_MASK);
489 break;
490#endif
491 } 478 }
492#endif 479#endif
493 480
@@ -804,10 +791,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
804 case INT_DMATLB_MISS: 791 case INT_DMATLB_MISS:
805 case INT_DMATLB_MISS_DWNCL: 792 case INT_DMATLB_MISS_DWNCL:
806#endif 793#endif
807#if CHIP_HAS_SN_PROC()
808 case INT_SNITLB_MISS:
809 case INT_SNITLB_MISS_DWNCL:
810#endif
811 is_page_fault = 1; 794 is_page_fault = 1;
812 break; 795 break;
813 796
@@ -823,7 +806,7 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
823 panic("Bad fault number %d in do_page_fault", fault_num); 806 panic("Bad fault number %d in do_page_fault", fault_num);
824 } 807 }
825 808
826#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 809#if CHIP_HAS_TILE_DMA()
827 if (!user_mode(regs)) { 810 if (!user_mode(regs)) {
828 struct async_tlb *async; 811 struct async_tlb *async;
829 switch (fault_num) { 812 switch (fault_num) {
@@ -835,12 +818,6 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
835 async = &current->thread.dma_async_tlb; 818 async = &current->thread.dma_async_tlb;
836 break; 819 break;
837#endif 820#endif
838#if CHIP_HAS_SN_PROC()
839 case INT_SNITLB_MISS:
840 case INT_SNITLB_MISS_DWNCL:
841 async = &current->thread.sn_async_tlb;
842 break;
843#endif
844 default: 821 default:
845 async = NULL; 822 async = NULL;
846 } 823 }
@@ -873,14 +850,22 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
873} 850}
874 851
875 852
876#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() 853#if CHIP_HAS_TILE_DMA()
877/* 854/*
878 * Check an async_tlb structure to see if a deferred fault is waiting, 855 * This routine effectively re-issues asynchronous page faults
879 * and if so pass it to the page-fault code. 856 * when we are returning to user space.
880 */ 857 */
881static void handle_async_page_fault(struct pt_regs *regs, 858void do_async_page_fault(struct pt_regs *regs)
882 struct async_tlb *async)
883{ 859{
860 struct async_tlb *async = &current->thread.dma_async_tlb;
861
862 /*
863 * Clear thread flag early. If we re-interrupt while processing
864 * code here, we will reset it and recall this routine before
865 * returning to user space.
866 */
867 clear_thread_flag(TIF_ASYNC_TLB);
868
884 if (async->fault_num) { 869 if (async->fault_num) {
885 /* 870 /*
886 * Clear async->fault_num before calling the page-fault 871 * Clear async->fault_num before calling the page-fault
@@ -894,28 +879,7 @@ static void handle_async_page_fault(struct pt_regs *regs,
894 async->address, async->is_write); 879 async->address, async->is_write);
895 } 880 }
896} 881}
897 882#endif /* CHIP_HAS_TILE_DMA() */
898/*
899 * This routine effectively re-issues asynchronous page faults
900 * when we are returning to user space.
901 */
902void do_async_page_fault(struct pt_regs *regs)
903{
904 /*
905 * Clear thread flag early. If we re-interrupt while processing
906 * code here, we will reset it and recall this routine before
907 * returning to user space.
908 */
909 clear_thread_flag(TIF_ASYNC_TLB);
910
911#if CHIP_HAS_TILE_DMA()
912 handle_async_page_fault(regs, &current->thread.dma_async_tlb);
913#endif
914#if CHIP_HAS_SN_PROC()
915 handle_async_page_fault(regs, &current->thread.sn_async_tlb);
916#endif
917}
918#endif /* CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC() */
919 883
920 884
921void vmalloc_sync_all(void) 885void vmalloc_sync_all(void)
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index e3ee55b0327a..004ba568d93f 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -43,12 +43,9 @@
43#include "migrate.h" 43#include "migrate.h"
44 44
45 45
46#if CHIP_HAS_COHERENT_LOCAL_CACHE()
47
48/* 46/*
49 * The noallocl2 option suppresses all use of the L2 cache to cache 47 * The noallocl2 option suppresses all use of the L2 cache to cache
50 * locally from a remote home. There's no point in using it if we 48 * locally from a remote home.
51 * don't have coherent local caching, though.
52 */ 49 */
53static int __write_once noallocl2; 50static int __write_once noallocl2;
54static int __init set_noallocl2(char *str) 51static int __init set_noallocl2(char *str)
@@ -58,12 +55,6 @@ static int __init set_noallocl2(char *str)
58} 55}
59early_param("noallocl2", set_noallocl2); 56early_param("noallocl2", set_noallocl2);
60 57
61#else
62
63#define noallocl2 0
64
65#endif
66
67 58
68/* 59/*
69 * Update the irq_stat for cpus that we are going to interrupt 60 * Update the irq_stat for cpus that we are going to interrupt
@@ -265,10 +256,8 @@ static int pte_to_home(pte_t pte)
265 return PAGE_HOME_INCOHERENT; 256 return PAGE_HOME_INCOHERENT;
266 case HV_PTE_MODE_UNCACHED: 257 case HV_PTE_MODE_UNCACHED:
267 return PAGE_HOME_UNCACHED; 258 return PAGE_HOME_UNCACHED;
268#if CHIP_HAS_CBOX_HOME_MAP()
269 case HV_PTE_MODE_CACHE_HASH_L3: 259 case HV_PTE_MODE_CACHE_HASH_L3:
270 return PAGE_HOME_HASH; 260 return PAGE_HOME_HASH;
271#endif
272 } 261 }
273 panic("Bad PTE %#llx\n", pte.val); 262 panic("Bad PTE %#llx\n", pte.val);
274} 263}
@@ -325,20 +314,16 @@ pte_t pte_set_home(pte_t pte, int home)
325 HV_PTE_MODE_CACHE_NO_L3); 314 HV_PTE_MODE_CACHE_NO_L3);
326 } 315 }
327 } else 316 } else
328#if CHIP_HAS_CBOX_HOME_MAP()
329 if (hash_default) 317 if (hash_default)
330 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); 318 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
331 else 319 else
332#endif
333 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3); 320 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_NO_L3);
334 pte = hv_pte_set_nc(pte); 321 pte = hv_pte_set_nc(pte);
335 break; 322 break;
336 323
337#if CHIP_HAS_CBOX_HOME_MAP()
338 case PAGE_HOME_HASH: 324 case PAGE_HOME_HASH:
339 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3); 325 pte = hv_pte_set_mode(pte, HV_PTE_MODE_CACHE_HASH_L3);
340 break; 326 break;
341#endif
342 327
343 default: 328 default:
344 BUG_ON(home < 0 || home >= NR_CPUS || 329 BUG_ON(home < 0 || home >= NR_CPUS ||
@@ -348,7 +333,6 @@ pte_t pte_set_home(pte_t pte, int home)
348 break; 333 break;
349 } 334 }
350 335
351#if CHIP_HAS_NC_AND_NOALLOC_BITS()
352 if (noallocl2) 336 if (noallocl2)
353 pte = hv_pte_set_no_alloc_l2(pte); 337 pte = hv_pte_set_no_alloc_l2(pte);
354 338
@@ -357,7 +341,6 @@ pte_t pte_set_home(pte_t pte, int home)
357 hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) { 341 hv_pte_get_mode(pte) == HV_PTE_MODE_CACHE_NO_L3) {
358 pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED); 342 pte = hv_pte_set_mode(pte, HV_PTE_MODE_UNCACHED);
359 } 343 }
360#endif
361 344
362 /* Checking this case here gives a better panic than from the hv. */ 345 /* Checking this case here gives a better panic than from the hv. */
363 BUG_ON(hv_pte_get_mode(pte) == 0); 346 BUG_ON(hv_pte_get_mode(pte) == 0);
@@ -373,16 +356,10 @@ EXPORT_SYMBOL(pte_set_home);
373 * so they're not suitable for anything but infrequent use. 356 * so they're not suitable for anything but infrequent use.
374 */ 357 */
375 358
376#if CHIP_HAS_CBOX_HOME_MAP()
377static inline int initial_page_home(void) { return PAGE_HOME_HASH; }
378#else
379static inline int initial_page_home(void) { return 0; }
380#endif
381
382int page_home(struct page *page) 359int page_home(struct page *page)
383{ 360{
384 if (PageHighMem(page)) { 361 if (PageHighMem(page)) {
385 return initial_page_home(); 362 return PAGE_HOME_HASH;
386 } else { 363 } else {
387 unsigned long kva = (unsigned long)page_address(page); 364 unsigned long kva = (unsigned long)page_address(page);
388 return pte_to_home(*virt_to_kpte(kva)); 365 return pte_to_home(*virt_to_kpte(kva));
@@ -438,7 +415,7 @@ struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
438void __homecache_free_pages(struct page *page, unsigned int order) 415void __homecache_free_pages(struct page *page, unsigned int order)
439{ 416{
440 if (put_page_testzero(page)) { 417 if (put_page_testzero(page)) {
441 homecache_change_page_home(page, order, initial_page_home()); 418 homecache_change_page_home(page, order, PAGE_HOME_HASH);
442 if (order == 0) { 419 if (order == 0) {
443 free_hot_cold_page(page, 0); 420 free_hot_cold_page(page, 0);
444 } else { 421 } else {
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index c8f58c12866d..22e41cf5a2a9 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -106,10 +106,8 @@ pte_t *get_prealloc_pte(unsigned long pfn)
106 */ 106 */
107static int initial_heap_home(void) 107static int initial_heap_home(void)
108{ 108{
109#if CHIP_HAS_CBOX_HOME_MAP()
110 if (hash_default) 109 if (hash_default)
111 return PAGE_HOME_HASH; 110 return PAGE_HOME_HASH;
112#endif
113 return smp_processor_id(); 111 return smp_processor_id();
114} 112}
115 113
@@ -190,14 +188,11 @@ static void __init page_table_range_init(unsigned long start,
190} 188}
191 189
192 190
193#if CHIP_HAS_CBOX_HOME_MAP()
194
195static int __initdata ktext_hash = 1; /* .text pages */ 191static int __initdata ktext_hash = 1; /* .text pages */
196static int __initdata kdata_hash = 1; /* .data and .bss pages */ 192static int __initdata kdata_hash = 1; /* .data and .bss pages */
197int __write_once hash_default = 1; /* kernel allocator pages */ 193int __write_once hash_default = 1; /* kernel allocator pages */
198EXPORT_SYMBOL(hash_default); 194EXPORT_SYMBOL(hash_default);
199int __write_once kstack_hash = 1; /* if no homecaching, use h4h */ 195int __write_once kstack_hash = 1; /* if no homecaching, use h4h */
200#endif /* CHIP_HAS_CBOX_HOME_MAP */
201 196
202/* 197/*
203 * CPUs to use to for striping the pages of kernel data. If hash-for-home 198 * CPUs to use to for striping the pages of kernel data. If hash-for-home
@@ -215,14 +210,12 @@ int __write_once kdata_huge; /* if no homecaching, small pages */
215static pgprot_t __init construct_pgprot(pgprot_t prot, int home) 210static pgprot_t __init construct_pgprot(pgprot_t prot, int home)
216{ 211{
217 prot = pte_set_home(prot, home); 212 prot = pte_set_home(prot, home);
218#if CHIP_HAS_CBOX_HOME_MAP()
219 if (home == PAGE_HOME_IMMUTABLE) { 213 if (home == PAGE_HOME_IMMUTABLE) {
220 if (ktext_hash) 214 if (ktext_hash)
221 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3); 215 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_HASH_L3);
222 else 216 else
223 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3); 217 prot = hv_pte_set_mode(prot, HV_PTE_MODE_CACHE_NO_L3);
224 } 218 }
225#endif
226 return prot; 219 return prot;
227} 220}
228 221
@@ -236,20 +229,15 @@ static pgprot_t __init init_pgprot(ulong address)
236 unsigned long page; 229 unsigned long page;
237 enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET }; 230 enum { CODE_DELTA = MEM_SV_START - PAGE_OFFSET };
238 231
239#if CHIP_HAS_CBOX_HOME_MAP()
240 /* For kdata=huge, everything is just hash-for-home. */ 232 /* For kdata=huge, everything is just hash-for-home. */
241 if (kdata_huge) 233 if (kdata_huge)
242 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 234 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
243#endif
244 235
245 /* We map the aliased pages of permanent text inaccessible. */ 236 /* We map the aliased pages of permanent text inaccessible. */
246 if (address < (ulong) _sinittext - CODE_DELTA) 237 if (address < (ulong) _sinittext - CODE_DELTA)
247 return PAGE_NONE; 238 return PAGE_NONE;
248 239
249 /* 240 /* We map read-only data non-coherent for performance. */
250 * We map read-only data non-coherent for performance. We could
251 * use neighborhood caching on TILE64, but it's not clear it's a win.
252 */
253 if ((address >= (ulong) __start_rodata && 241 if ((address >= (ulong) __start_rodata &&
254 address < (ulong) __end_rodata) || 242 address < (ulong) __end_rodata) ||
255 address == (ulong) empty_zero_page) { 243 address == (ulong) empty_zero_page) {
@@ -257,12 +245,10 @@ static pgprot_t __init init_pgprot(ulong address)
257 } 245 }
258 246
259#ifndef __tilegx__ 247#ifndef __tilegx__
260#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
261 /* Force the atomic_locks[] array page to be hash-for-home. */ 248 /* Force the atomic_locks[] array page to be hash-for-home. */
262 if (address == (ulong) atomic_locks) 249 if (address == (ulong) atomic_locks)
263 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 250 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
264#endif 251#endif
265#endif
266 252
267 /* 253 /*
268 * Everything else that isn't data or bss is heap, so mark it 254 * Everything else that isn't data or bss is heap, so mark it
@@ -280,11 +266,9 @@ static pgprot_t __init init_pgprot(ulong address)
280 if (address >= (ulong) _end || address < (ulong) _einitdata) 266 if (address >= (ulong) _end || address < (ulong) _einitdata)
281 return construct_pgprot(PAGE_KERNEL, initial_heap_home()); 267 return construct_pgprot(PAGE_KERNEL, initial_heap_home());
282 268
283#if CHIP_HAS_CBOX_HOME_MAP()
284 /* Use hash-for-home if requested for data/bss. */ 269 /* Use hash-for-home if requested for data/bss. */
285 if (kdata_hash) 270 if (kdata_hash)
286 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH); 271 return construct_pgprot(PAGE_KERNEL, PAGE_HOME_HASH);
287#endif
288 272
289 /* 273 /*
290 * Make the w1data homed like heap to start with, to avoid 274 * Make the w1data homed like heap to start with, to avoid
@@ -311,11 +295,9 @@ static pgprot_t __init init_pgprot(ulong address)
311 if (page == (ulong)empty_zero_page) 295 if (page == (ulong)empty_zero_page)
312 continue; 296 continue;
313#ifndef __tilegx__ 297#ifndef __tilegx__
314#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
315 if (page == (ulong)atomic_locks) 298 if (page == (ulong)atomic_locks)
316 continue; 299 continue;
317#endif 300#endif
318#endif
319 cpu = cpumask_next(cpu, &kdata_mask); 301 cpu = cpumask_next(cpu, &kdata_mask);
320 if (cpu == NR_CPUS) 302 if (cpu == NR_CPUS)
321 cpu = cpumask_first(&kdata_mask); 303 cpu = cpumask_first(&kdata_mask);
@@ -358,7 +340,7 @@ static int __init setup_ktext(char *str)
358 340
359 ktext_arg_seen = 1; 341 ktext_arg_seen = 1;
360 342
361 /* Default setting on Tile64: use a huge page */ 343 /* Default setting: use a huge page */
362 if (strcmp(str, "huge") == 0) 344 if (strcmp(str, "huge") == 0)
363 pr_info("ktext: using one huge locally cached page\n"); 345 pr_info("ktext: using one huge locally cached page\n");
364 346
@@ -404,10 +386,8 @@ static inline pgprot_t ktext_set_nocache(pgprot_t prot)
404{ 386{
405 if (!ktext_nocache) 387 if (!ktext_nocache)
406 prot = hv_pte_set_nc(prot); 388 prot = hv_pte_set_nc(prot);
407#if CHIP_HAS_NC_AND_NOALLOC_BITS()
408 else 389 else
409 prot = hv_pte_set_no_alloc_l2(prot); 390 prot = hv_pte_set_no_alloc_l2(prot);
410#endif
411 return prot; 391 return prot;
412} 392}
413 393
@@ -440,7 +420,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
440 struct cpumask kstripe_mask; 420 struct cpumask kstripe_mask;
441 int rc, i; 421 int rc, i;
442 422
443#if CHIP_HAS_CBOX_HOME_MAP()
444 if (ktext_arg_seen && ktext_hash) { 423 if (ktext_arg_seen && ktext_hash) {
445 pr_warning("warning: \"ktext\" boot argument ignored" 424 pr_warning("warning: \"ktext\" boot argument ignored"
446 " if \"kcache_hash\" sets up text hash-for-home\n"); 425 " if \"kcache_hash\" sets up text hash-for-home\n");
@@ -457,7 +436,6 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
457 " kcache_hash=all or =allbutstack\n"); 436 " kcache_hash=all or =allbutstack\n");
458 kdata_huge = 0; 437 kdata_huge = 0;
459 } 438 }
460#endif
461 439
462 /* 440 /*
463 * Set up a mask for cpus to use for kernel striping. 441 * Set up a mask for cpus to use for kernel striping.
@@ -585,13 +563,11 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
585 } else { 563 } else {
586 pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC); 564 pte_t pteval = pfn_pte(0, PAGE_KERNEL_EXEC);
587 pteval = pte_mkhuge(pteval); 565 pteval = pte_mkhuge(pteval);
588#if CHIP_HAS_CBOX_HOME_MAP()
589 if (ktext_hash) { 566 if (ktext_hash) {
590 pteval = hv_pte_set_mode(pteval, 567 pteval = hv_pte_set_mode(pteval,
591 HV_PTE_MODE_CACHE_HASH_L3); 568 HV_PTE_MODE_CACHE_HASH_L3);
592 pteval = ktext_set_nocache(pteval); 569 pteval = ktext_set_nocache(pteval);
593 } else 570 } else
594#endif /* CHIP_HAS_CBOX_HOME_MAP() */
595 if (cpumask_weight(&ktext_mask) == 1) { 571 if (cpumask_weight(&ktext_mask) == 1) {
596 pteval = set_remote_cache_cpu(pteval, 572 pteval = set_remote_cache_cpu(pteval,
597 cpumask_first(&ktext_mask)); 573 cpumask_first(&ktext_mask));
@@ -938,26 +914,6 @@ void __init pgtable_cache_init(void)
938 panic("pgtable_cache_init(): Cannot create pgd cache"); 914 panic("pgtable_cache_init(): Cannot create pgd cache");
939} 915}
940 916
941#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
942/*
943 * The __w1data area holds data that is only written during initialization,
944 * and is read-only and thus freely cacheable thereafter. Fix the page
945 * table entries that cover that region accordingly.
946 */
947static void mark_w1data_ro(void)
948{
949 /* Loop over page table entries */
950 unsigned long addr = (unsigned long)__w1data_begin;
951 BUG_ON((addr & (PAGE_SIZE-1)) != 0);
952 for (; addr <= (unsigned long)__w1data_end - 1; addr += PAGE_SIZE) {
953 unsigned long pfn = kaddr_to_pfn((void *)addr);
954 pte_t *ptep = virt_to_kpte(addr);
955 BUG_ON(pte_huge(*ptep)); /* not relevant for kdata_huge */
956 set_pte_at(&init_mm, addr, ptep, pfn_pte(pfn, PAGE_KERNEL_RO));
957 }
958}
959#endif
960
961#ifdef CONFIG_DEBUG_PAGEALLOC 917#ifdef CONFIG_DEBUG_PAGEALLOC
962static long __write_once initfree; 918static long __write_once initfree;
963#else 919#else
@@ -1026,10 +982,7 @@ void free_initmem(void)
1026 /* 982 /*
1027 * Evict the dirty initdata on the boot cpu, evict the w1data 983 * Evict the dirty initdata on the boot cpu, evict the w1data
1028 * wherever it's homed, and evict all the init code everywhere. 984 * wherever it's homed, and evict all the init code everywhere.
1029 * We are guaranteed that no one will touch the init pages any 985 * We are guaranteed that no one will touch the init pages any more.
1030 * more, and although other cpus may be touching the w1data,
1031 * we only actually change the caching on tile64, which won't
1032 * be keeping local copies in the other tiles' caches anyway.
1033 */ 986 */
1034 homecache_evict(&cpu_cacheable_map); 987 homecache_evict(&cpu_cacheable_map);
1035 988
@@ -1045,21 +998,6 @@ void free_initmem(void)
1045 free_init_pages("unused kernel text", 998 free_init_pages("unused kernel text",
1046 (unsigned long)_sinittext - text_delta, 999 (unsigned long)_sinittext - text_delta,
1047 (unsigned long)_einittext - text_delta); 1000 (unsigned long)_einittext - text_delta);
1048
1049#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
1050 /*
1051 * Upgrade the .w1data section to globally cached.
1052 * We don't do this on tilepro, since the cache architecture
1053 * pretty much makes it irrelevant, and in any case we end
1054 * up having racing issues with other tiles that may touch
1055 * the data after we flush the cache but before we update
1056 * the PTEs and flush the TLBs, causing sharer shootdowns
1057 * later. Even though this is to clean data, it seems like
1058 * an unnecessary complication.
1059 */
1060 mark_w1data_ro();
1061#endif
1062
1063 /* Do a global TLB flush so everyone sees the changes. */ 1001 /* Do a global TLB flush so everyone sees the changes. */
1064 flush_tlb_all(); 1002 flush_tlb_all();
1065} 1003}