aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arc/include/asm/arcregs.h80
-rw-r--r--arch/arc/include/asm/cache.h54
-rw-r--r--arch/arc/include/asm/cachectl.h28
-rw-r--r--arch/arc/include/asm/cacheflush.h67
-rw-r--r--arch/arc/mm/cache_arc700.c725
5 files changed, 954 insertions, 0 deletions
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 5131bb3d4fcd..c6e28053fb70 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -58,6 +58,33 @@
58#define TIMER_CTRL_IE (1 << 0) /* Interupt when Count reachs limit */ 58#define TIMER_CTRL_IE (1 << 0) /* Interupt when Count reachs limit */
59#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */ 59#define TIMER_CTRL_NH (1 << 1) /* Count only when CPU NOT halted */
60 60
61/* Instruction cache related Auxiliary registers */
62#define ARC_REG_IC_BCR 0x77 /* Build Config reg */
63#define ARC_REG_IC_IVIC 0x10
64#define ARC_REG_IC_CTRL 0x11
65#define ARC_REG_IC_IVIL 0x19
66#if (CONFIG_ARC_MMU_VER > 2)
67#define ARC_REG_IC_PTAG 0x1E
68#endif
69
70/* Bit val in IC_CTRL */
71#define IC_CTRL_CACHE_DISABLE 0x1
72
73/* Data cache related Auxiliary registers */
74#define ARC_REG_DC_BCR 0x72
75#define ARC_REG_DC_IVDC 0x47
76#define ARC_REG_DC_CTRL 0x48
77#define ARC_REG_DC_IVDL 0x4A
78#define ARC_REG_DC_FLSH 0x4B
79#define ARC_REG_DC_FLDL 0x4C
80#if (CONFIG_ARC_MMU_VER > 2)
81#define ARC_REG_DC_PTAG 0x5C
82#endif
83
84/* Bit val in DC_CTRL */
85#define DC_CTRL_INV_MODE_FLUSH 0x40
86#define DC_CTRL_FLUSH_STATUS 0x100
87
61/* 88/*
62 * Floating Pt Registers 89 * Floating Pt Registers
63 * Status regs are read-only (build-time) so need not be saved/restored 90 * Status regs are read-only (build-time) so need not be saved/restored
@@ -132,6 +159,31 @@
132 159
133#endif 160#endif
134 161
162#define READ_BCR(reg, into) \
163{ \
164 unsigned int tmp; \
165 tmp = read_aux_reg(reg); \
166 if (sizeof(tmp) == sizeof(into)) { \
167 into = *((typeof(into) *)&tmp); \
168 } else { \
169 extern void bogus_undefined(void); \
170 bogus_undefined(); \
171 } \
172}
173
174#define WRITE_BCR(reg, into) \
175{ \
176 unsigned int tmp; \
177 if (sizeof(tmp) == sizeof(into)) { \
178 tmp = (*(unsigned int *)(into)); \
179 write_aux_reg(reg, tmp); \
180 } else { \
181 extern void bogus_undefined(void); \
182 bogus_undefined(); \
183 } \
184}
185
186
135#ifdef CONFIG_ARC_FPU_SAVE_RESTORE 187#ifdef CONFIG_ARC_FPU_SAVE_RESTORE
136/* These DPFP regs need to be saved/restored across ctx-sw */ 188/* These DPFP regs need to be saved/restored across ctx-sw */
137struct arc_fpu { 189struct arc_fpu {
@@ -141,6 +193,34 @@ struct arc_fpu {
141}; 193};
142#endif 194#endif
143 195
196/*
197 ***************************************************************
198 * Build Configuration Registers, with encoded hardware config
199 */
200
201struct bcr_cache {
202#ifdef CONFIG_CPU_BIG_ENDIAN
203 unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
204#else
205 unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
206#endif
207};
208
209/*
210 *******************************************************************
211 * Generic structures to hold build configuration used at runtime
212 */
213
214struct cpuinfo_arc_cache {
215 unsigned int has_aliasing, sz, line_len, assoc, ver;
216};
217
218struct cpuinfo_arc {
219 struct cpuinfo_arc_cache icache, dcache;
220};
221
222extern struct cpuinfo_arc cpuinfo_arc700[];
223
144#endif /* __ASEMBLY__ */ 224#endif /* __ASEMBLY__ */
145 225
146#endif /* __KERNEL__ */ 226#endif /* __KERNEL__ */
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h
index 30c72a4d2d9f..6632273861fd 100644
--- a/arch/arc/include/asm/cache.h
+++ b/arch/arc/include/asm/cache.h
@@ -18,4 +18,58 @@
18 18
19#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) 19#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
20 20
21#define ARC_ICACHE_WAYS 2
22#define ARC_DCACHE_WAYS 4
23
24/* Helpers */
25#define ARC_ICACHE_LINE_LEN L1_CACHE_BYTES
26#define ARC_DCACHE_LINE_LEN L1_CACHE_BYTES
27
28#define ICACHE_LINE_MASK (~(ARC_ICACHE_LINE_LEN - 1))
29#define DCACHE_LINE_MASK (~(ARC_DCACHE_LINE_LEN - 1))
30
31#if ARC_ICACHE_LINE_LEN != ARC_DCACHE_LINE_LEN
32#error "Need to fix some code as I/D cache lines not same"
33#else
34#define is_not_cache_aligned(p) ((unsigned long)p & (~DCACHE_LINE_MASK))
35#endif
36
37#ifndef __ASSEMBLY__
38
39/* Uncached access macros */
40#define arc_read_uncached_32(ptr) \
41({ \
42 unsigned int __ret; \
43 __asm__ __volatile__( \
44 " ld.di %0, [%1] \n" \
45 : "=r"(__ret) \
46 : "r"(ptr)); \
47 __ret; \
48})
49
50#define arc_write_uncached_32(ptr, data)\
51({ \
52 __asm__ __volatile__( \
53 " st.di %0, [%1] \n" \
54 : \
55 : "r"(data), "r"(ptr)); \
56})
57
58/* used to give SHMLBA a value to avoid Cache Aliasing */
59extern unsigned int ARC_shmlba;
60
61#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
62
63/*
64 * ARC700 doesn't cache any access in top 256M.
65 * Ideal for wiring memory mapped peripherals as we don't need to do
66 * explicit uncached accesses (LD.di/ST.di) hence more portable drivers
67 */
68#define ARC_UNCACHED_ADDR_SPACE 0xc0000000
69
70extern void arc_cache_init(void);
71extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
72extern void __init read_decode_cache_bcr(void);
73#endif
74
21#endif /* _ASM_CACHE_H */ 75#endif /* _ASM_CACHE_H */
diff --git a/arch/arc/include/asm/cachectl.h b/arch/arc/include/asm/cachectl.h
new file mode 100644
index 000000000000..51c73f0255b3
--- /dev/null
+++ b/arch/arc/include/asm/cachectl.h
@@ -0,0 +1,28 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef __ARC_ASM_CACHECTL_H
10#define __ARC_ASM_CACHECTL_H
11
12/*
13 * ARC ABI flags defined for Android's finegrained cacheflush requirements
14 */
15#define CF_I_INV 0x0002
16#define CF_D_FLUSH 0x0010
17#define CF_D_FLUSH_INV 0x0020
18
19#define CF_DEFAULT (CF_I_INV | CF_D_FLUSH)
20
21/*
22 * Standard flags expected by cacheflush system call users
23 */
24#define ICACHE CF_I_INV
25#define DCACHE CF_D_FLUSH
26#define BCACHE (CF_I_INV | CF_D_FLUSH)
27
28#endif
diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
new file mode 100644
index 000000000000..97ee96f26505
--- /dev/null
+++ b/arch/arc/include/asm/cacheflush.h
@@ -0,0 +1,67 @@
1/*
2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
9 * -flush_cache_dup_mm (fork)
10 * -likewise for flush_cache_mm (exit/execve)
11 * -likewise for flush_cache_{range,page} (munmap, exit, COW-break)
12 *
13 * vineetg: April 2008
14 * -Added a critical CacheLine flush to copy_to_user_page( ) which
15 * was causing gdbserver to not setup breakpoints consistently
16 */
17
18#ifndef _ASM_CACHEFLUSH_H
19#define _ASM_CACHEFLUSH_H
20
21#include <linux/mm.h>
22
23void flush_cache_all(void);
24
25void flush_icache_range(unsigned long start, unsigned long end);
26void flush_icache_page(struct vm_area_struct *vma, struct page *page);
27void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
28 int len);
29
30#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
31
32void flush_dcache_page(struct page *page);
33
34void dma_cache_wback_inv(unsigned long start, unsigned long sz);
35void dma_cache_inv(unsigned long start, unsigned long sz);
36void dma_cache_wback(unsigned long start, unsigned long sz);
37
38#define flush_dcache_mmap_lock(mapping) do { } while (0)
39#define flush_dcache_mmap_unlock(mapping) do { } while (0)
40
41/* TBD: optimize this */
42#define flush_cache_vmap(start, end) flush_cache_all()
43#define flush_cache_vunmap(start, end) flush_cache_all()
44
45/*
46 * VM callbacks when entire/range of user-space V-P mappings are
47 * torn-down/get-invalidated
48 *
49 * Currently we don't support D$ aliasing configs for our VIPT caches
50 * NOPS for VIPT Cache with non-aliasing D$ configurations only
51 */
52#define flush_cache_dup_mm(mm) /* called on fork */
53#define flush_cache_mm(mm) /* called on munmap/exit */
54#define flush_cache_range(mm, u_vstart, u_vend)
55#define flush_cache_page(vma, u_vaddr, pfn) /* PF handling/COW-break */
56
57#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
58do { \
59 memcpy(dst, src, len); \
60 if (vma->vm_flags & VM_EXEC) \
61 flush_icache_range_vaddr((unsigned long)(dst), vaddr, len);\
62} while (0)
63
64#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
65 memcpy(dst, src, len); \
66
67#endif
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
new file mode 100644
index 000000000000..670f65bb2ef1
--- /dev/null
+++ b/arch/arc/mm/cache_arc700.c
@@ -0,0 +1,725 @@
1/*
2 * ARC700 VIPT Cache Management
3 *
4 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 *
10 * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
11 * -flush_cache_dup_mm (fork)
12 * -likewise for flush_cache_mm (exit/execve)
13 * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
14 *
15 * vineetg: Apr 2011
16 * -Now that MMU can support larger pg sz (16K), the determiniation of
17 * aliasing shd not be based on assumption of 8k pg
18 *
19 * vineetg: Mar 2011
20 * -optimised version of flush_icache_range( ) for making I/D coherent
21 * when vaddr is available (agnostic of num of aliases)
22 *
23 * vineetg: Mar 2011
24 * -Added documentation about I-cache aliasing on ARC700 and the way it
25 * was handled up until MMU V2.
26 * -Spotted a three year old bug when killing the 4 aliases, which needs
27 * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
28 * instead of paddr | {0x00, 0x01, 0x10, 0x11}
29 * (Rajesh you owe me one now)
30 *
31 * vineetg: Dec 2010
32 * -Off-by-one error when computing num_of_lines to flush
33 * This broke signal handling with bionic which uses synthetic sigret stub
34 *
35 * vineetg: Mar 2010
36 * -GCC can't generate ZOL for core cache flush loops.
37 * Conv them into iterations based as opposed to while (start < end) types
38 *
39 * Vineetg: July 2009
40 * -In I-cache flush routine we used to chk for aliasing for every line INV.
41 * Instead now we setup routines per cache geometry and invoke them
42 * via function pointers.
43 *
44 * Vineetg: Jan 2009
45 * -Cache Line flush routines used to flush an extra line beyond end addr
46 * because check was while (end >= start) instead of (end > start)
47 * =Some call sites had to work around by doing -1, -4 etc to end param
48 * =Some callers didnt care. This was spec bad in case of INV routines
49 * which would discard valid data (cause of the horrible ext2 bug
50 * in ARC IDE driver)
51 *
52 * vineetg: June 11th 2008: Fixed flush_icache_range( )
53 * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
54 * to be flushed, which it was not doing.
55 * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
56 * however ARC cache maintenance OPs require PHY addr. Thus need to do
57 * vmalloc_to_phy.
58 * -Also added optimisation there, that for range > PAGE SIZE we flush the
59 * entire cache in one shot rather than line by line. For e.g. a module
60 * with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
61 * while cache is only 16 or 32k.
62 */
63
64#include <linux/module.h>
65#include <linux/mm.h>
66#include <linux/sched.h>
67#include <linux/cache.h>
68#include <linux/mmu_context.h>
69#include <linux/syscalls.h>
70#include <linux/uaccess.h>
71#include <asm/cacheflush.h>
72#include <asm/cachectl.h>
73#include <asm/setup.h>
74
75
76#ifdef CONFIG_ARC_HAS_ICACHE
77static void __ic_line_inv_no_alias(unsigned long, int);
78static void __ic_line_inv_2_alias(unsigned long, int);
79static void __ic_line_inv_4_alias(unsigned long, int);
80
81/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
82static void (*___flush_icache_rtn) (unsigned long, int);
83#endif
84
85/*
86 * Read the Cache Build Confuration Registers, Decode them and save into
87 * the cpuinfo structure for later use.
88 * No Validation done here, simply read/convert the BCRs
89 */
90void __init read_decode_cache_bcr(void)
91{
92 struct bcr_cache ibcr, dbcr;
93 struct cpuinfo_arc_cache *p_ic, *p_dc;
94 unsigned int cpu = smp_processor_id();
95
96 p_ic = &cpuinfo_arc700[cpu].icache;
97 READ_BCR(ARC_REG_IC_BCR, ibcr);
98
99 if (ibcr.config == 0x3)
100 p_ic->assoc = 2;
101 p_ic->line_len = 8 << ibcr.line_len;
102 p_ic->sz = 0x200 << ibcr.sz;
103 p_ic->ver = ibcr.ver;
104
105 p_dc = &cpuinfo_arc700[cpu].dcache;
106 READ_BCR(ARC_REG_DC_BCR, dbcr);
107
108 if (dbcr.config == 0x2)
109 p_dc->assoc = 4;
110 p_dc->line_len = 16 << dbcr.line_len;
111 p_dc->sz = 0x200 << dbcr.sz;
112 p_dc->ver = dbcr.ver;
113}
114
115/*
116 * 1. Validate the Cache Geomtery (compile time config matches hardware)
117 * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
118 * (aliasing D-cache configurations are not supported YET)
119 * 3. Enable the Caches, setup default flush mode for D-Cache
120 * 3. Calculate the SHMLBA used by user space
121 */
122void __init arc_cache_init(void)
123{
124 unsigned int temp;
125#ifdef CONFIG_ARC_CACHE
126 unsigned int cpu = smp_processor_id();
127#endif
128#ifdef CONFIG_ARC_HAS_ICACHE
129 struct cpuinfo_arc_cache *ic;
130#endif
131#ifdef CONFIG_ARC_HAS_DCACHE
132 struct cpuinfo_arc_cache *dc;
133#endif
134 int way_pg_ratio = way_pg_ratio;
135
136#ifdef CONFIG_ARC_HAS_ICACHE
137 ic = &cpuinfo_arc700[cpu].icache;
138
139 /*
140 * if Cache way size is <= page size then no aliasing exhibited
141 * otherwise ratio determines num of aliases.
142 * e.g. 32K I$, 2 way set assoc, 8k pg size
143 * way-sz = 32k/2 = 16k
144 * way-pg-ratio = 16k/8k = 2, so 2 aliases possible
145 * (meaning 1 line could be in 2 possible locations).
146 */
147 way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
148 switch (way_pg_ratio) {
149 case 0:
150 case 1:
151 ___flush_icache_rtn = __ic_line_inv_no_alias;
152 break;
153 case 2:
154 ___flush_icache_rtn = __ic_line_inv_2_alias;
155 break;
156 case 4:
157 ___flush_icache_rtn = __ic_line_inv_4_alias;
158 break;
159 default:
160 panic("Unsupported I-Cache Sz\n");
161 }
162#endif
163
164 /* Enable/disable I-Cache */
165 temp = read_aux_reg(ARC_REG_IC_CTRL);
166
167#ifdef CONFIG_ARC_HAS_ICACHE
168 temp &= ~IC_CTRL_CACHE_DISABLE;
169#else
170 temp |= IC_CTRL_CACHE_DISABLE;
171#endif
172
173 write_aux_reg(ARC_REG_IC_CTRL, temp);
174
175#ifdef CONFIG_ARC_HAS_DCACHE
176 dc = &cpuinfo_arc700[cpu].dcache;
177
178 /* check for D-Cache aliasing */
179 if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE)
180 panic("D$ aliasing not handled right now\n");
181#endif
182
183 /* Set the default Invalidate Mode to "simpy discard dirty lines"
184 * as this is more frequent then flush before invalidate
185 * Ofcourse we toggle this default behviour when desired
186 */
187 temp = read_aux_reg(ARC_REG_DC_CTRL);
188 temp &= ~DC_CTRL_INV_MODE_FLUSH;
189
190#ifdef CONFIG_ARC_HAS_DCACHE
191 /* Enable D-Cache: Clear Bit 0 */
192 write_aux_reg(ARC_REG_DC_CTRL, temp & ~IC_CTRL_CACHE_DISABLE);
193#else
194 /* Flush D cache */
195 write_aux_reg(ARC_REG_DC_FLSH, 0x1);
196 /* Disable D cache */
197 write_aux_reg(ARC_REG_DC_CTRL, temp | IC_CTRL_CACHE_DISABLE);
198#endif
199
200 return;
201}
202
203#define OP_INV 0x1
204#define OP_FLUSH 0x2
205#define OP_FLUSH_N_INV 0x3
206
207#ifdef CONFIG_ARC_HAS_DCACHE
208
209/***************************************************************
210 * Machine specific helpers for Entire D-Cache or Per Line ops
211 */
212
213static inline void wait_for_flush(void)
214{
215 while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS)
216 ;
217}
218
219/*
220 * Operation on Entire D-Cache
221 * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
222 * Note that constant propagation ensures all the checks are gone
223 * in generated code
224 */
225static inline void __dc_entire_op(const int cacheop)
226{
227 unsigned long flags, tmp = tmp;
228 int aux;
229
230 local_irq_save(flags);
231
232 if (cacheop == OP_FLUSH_N_INV) {
233 /* Dcache provides 2 cmd: FLUSH or INV
234 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
235 * flush-n-inv is achieved by INV cmd but with IM=1
236 * Default INV sub-mode is DISCARD, which needs to be toggled
237 */
238 tmp = read_aux_reg(ARC_REG_DC_CTRL);
239 write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
240 }
241
242 if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
243 aux = ARC_REG_DC_IVDC;
244 else
245 aux = ARC_REG_DC_FLSH;
246
247 write_aux_reg(aux, 0x1);
248
249 if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
250 wait_for_flush();
251
252 /* Switch back the DISCARD ONLY Invalidate mode */
253 if (cacheop == OP_FLUSH_N_INV)
254 write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
255
256 local_irq_restore(flags);
257}
258
259/*
260 * Per Line Operation on D-Cache
261 * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete
262 * It's sole purpose is to help gcc generate ZOL
263 */
264static inline void __dc_line_loop(unsigned long start, unsigned long sz,
265 int aux_reg)
266{
267 int num_lines, slack;
268
269 /* Ensure we properly floor/ceil the non-line aligned/sized requests
270 * and have @start - aligned to cache line and integral @num_lines.
271 * This however can be avoided for page sized since:
272 * -@start will be cache-line aligned already (being page aligned)
273 * -@sz will be integral multiple of line size (being page sized).
274 */
275 if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
276 slack = start & ~DCACHE_LINE_MASK;
277 sz += slack;
278 start -= slack;
279 }
280
281 num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN);
282
283 while (num_lines-- > 0) {
284#if (CONFIG_ARC_MMU_VER > 2)
285 /*
286 * Just as for I$, in MMU v3, D$ ops also require
287 * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops
288 * But we pass phy addr for both. This works since Linux
289 * doesn't support aliasing configs for D$, yet.
290 * Thus paddr is enough to provide both tag and index.
291 */
292 write_aux_reg(ARC_REG_DC_PTAG, start);
293#endif
294 write_aux_reg(aux_reg, start);
295 start += ARC_DCACHE_LINE_LEN;
296 }
297}
298
299/*
300 * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
301 */
302static inline void __dc_line_op(unsigned long start, unsigned long sz,
303 const int cacheop)
304{
305 unsigned long flags, tmp = tmp;
306 int aux;
307
308 local_irq_save(flags);
309
310 if (cacheop == OP_FLUSH_N_INV) {
311 /*
312 * Dcache provides 2 cmd: FLUSH or INV
313 * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
314 * flush-n-inv is achieved by INV cmd but with IM=1
315 * Default INV sub-mode is DISCARD, which needs to be toggled
316 */
317 tmp = read_aux_reg(ARC_REG_DC_CTRL);
318 write_aux_reg(ARC_REG_DC_CTRL, tmp | DC_CTRL_INV_MODE_FLUSH);
319 }
320
321 if (cacheop & OP_INV) /* Inv / flush-n-inv use same cmd reg */
322 aux = ARC_REG_DC_IVDL;
323 else
324 aux = ARC_REG_DC_FLDL;
325
326 __dc_line_loop(start, sz, aux);
327
328 if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */
329 wait_for_flush();
330
331 /* Switch back the DISCARD ONLY Invalidate mode */
332 if (cacheop == OP_FLUSH_N_INV)
333 write_aux_reg(ARC_REG_DC_CTRL, tmp & ~DC_CTRL_INV_MODE_FLUSH);
334
335 local_irq_restore(flags);
336}
337
338#else
339
340#define __dc_entire_op(cacheop)
341#define __dc_line_op(start, sz, cacheop)
342
343#endif /* CONFIG_ARC_HAS_DCACHE */
344
345
346#ifdef CONFIG_ARC_HAS_ICACHE
347
348/*
349 * I-Cache Aliasing in ARC700 VIPT caches
350 *
351 * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
352 * to "index" into SET of cache-line and paddr from MMU to match the TAG
353 * in the WAYS of SET.
354 *
355 * However the CDU iterface (to flush/inv) lines from software, only takes
356 * paddr (to have simpler hardware interface). For simpler cases, using paddr
357 * alone suffices.
358 * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
359 * way_sz = cache_sz / num_ways = 16k/2 = 8k
360 * num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
361 * Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
362 * bits req for calc set-index = bits 12:5 (0 based). Since this range fits
363 * inside the bottom 13 bits of paddr, which are same for vaddr and paddr
364 * (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
365 * locate a cache-line.
366 *
367 * However for a difft sized cache, say 32k I$, above math yields need
368 * for 14 bits of vaddr to locate a cache line, which can't be provided by
369 * paddr, since the bit 13 (0 based) might differ between the two.
370 *
371 * This lack of extra bits needed for correct line addressing, defines the
372 * classical problem of Cache aliasing with VIPT architectures
373 * num_aliases = 1 << extra_bits
374 * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
375 * 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
376 * 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
377 *
378 * ------------------
379 * MMU v1/v2 (Fixed Page Size 8k)
380 * ------------------
381 * The solution was to provide CDU with these additonal vaddr bits. These
382 * would be bits [x:13], x would depend on cache-geom.
383 * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
384 * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
385 * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
386 * represent the offset within cache-line. The adv of using this "clumsy"
387 * interface for additional info was no new reg was needed in CDU.
388 *
389 * 17:13 represented the max num of bits passable, actual bits needed were
390 * fewer, based on the num-of-aliases possible.
391 * -for 2 alias possibility, only bit 13 needed (32K cache)
392 * -for 4 alias possibility, bits 14:13 needed (64K cache)
393 *
394 * Since vaddr was not available for all instances of I$ flush req by core
395 * kernel, the only safe way (non-optimal though) was to kill all possible
396 * lines which could represent an alias (even if they didnt represent one
397 * in execution).
398 * e.g. for 64K I$, 4 aliases possible, so we did
399 * flush start
400 * flush start | 0x01
401 * flush start | 0x2
402 * flush start | 0x3
403 *
404 * The penalty was invoking the operation itself, since tag match is anyways
405 * paddr based, a line which didn't represent an alias would not match the
406 * paddr, hence wont be killed
407 *
408 * Note that aliasing concerns are independent of line-sz for a given cache
409 * geometry (size + set_assoc) because the extra bits required by line-sz are
410 * reduced from the set calc.
411 * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
412 * 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
413 * 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
414 *
415 * ------------------
416 * MMU v3
417 * ------------------
418 * This ver of MMU supports var page sizes (1k-16k) - Linux will support
419 * 8k (default), 16k and 4k.
420 * However from hardware perspective, smaller page sizes aggrevate aliasing
421 * meaning more vaddr bits needed to disambiguate the cache-line-op ;
422 * the existing scheme of piggybacking won't work for certain configurations.
423 * Two new registers IC_PTAG and DC_PTAG inttoduced.
424 * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
425 */
426
427/***********************************************************
428 * Machine specific helpers for per line I-Cache invalidate.
429 * 3 routines to accpunt for 1, 2, 4 aliases possible
430 */
431
432static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
433{
434 while (num_lines-- > 0) {
435#if (CONFIG_ARC_MMU_VER > 2)
436 write_aux_reg(ARC_REG_IC_PTAG, start);
437#endif
438 write_aux_reg(ARC_REG_IC_IVIL, start);
439 start += ARC_ICACHE_LINE_LEN;
440 }
441}
442
443static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
444{
445 while (num_lines-- > 0) {
446
447#if (CONFIG_ARC_MMU_VER > 2)
448 /*
449 * MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
450 * reg to pass the "tag" bits and existing IVIL reg only looks
451 * at bits relevant for "index" (details above)
452 * Programming Notes:
453 * -when writing tag to PTAG reg, bit chopping can be avoided,
454 * CDU ignores non-tag bits.
455 * -Ideally "index" must be computed from vaddr, but it is not
456 * avail in these rtns. So to be safe, we kill the lines in all
457 * possible indexes corresp to num of aliases possible for
458 * given cache config.
459 */
460 write_aux_reg(ARC_REG_IC_PTAG, start);
461 write_aux_reg(ARC_REG_IC_IVIL,
462 start & ~(0x1 << PAGE_SHIFT));
463 write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
464#else
465 write_aux_reg(ARC_REG_IC_IVIL, start);
466 write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
467#endif
468 start += ARC_ICACHE_LINE_LEN;
469 }
470}
471
472static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
473{
474 while (num_lines-- > 0) {
475
476#if (CONFIG_ARC_MMU_VER > 2)
477 write_aux_reg(ARC_REG_IC_PTAG, start);
478
479 write_aux_reg(ARC_REG_IC_IVIL,
480 start & ~(0x3 << PAGE_SHIFT));
481 write_aux_reg(ARC_REG_IC_IVIL,
482 start & ~(0x2 << PAGE_SHIFT));
483 write_aux_reg(ARC_REG_IC_IVIL,
484 start & ~(0x1 << PAGE_SHIFT));
485 write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
486#else
487 write_aux_reg(ARC_REG_IC_IVIL, start);
488 write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
489 write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
490 write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
491#endif
492 start += ARC_ICACHE_LINE_LEN;
493 }
494}
495
496static void __ic_line_inv(unsigned long start, unsigned long sz)
497{
498 unsigned long flags;
499 int num_lines, slack;
500
501 /*
502 * Ensure we properly floor/ceil the non-line aligned/sized requests
503 * and have @start - aligned to cache line, and integral @num_lines
504 * However page sized flushes can be compile time optimised.
505 * -@start will be cache-line aligned already (being page aligned)
506 * -@sz will be integral multiple of line size (being page sized).
507 */
508 if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
509 slack = start & ~ICACHE_LINE_MASK;
510 sz += slack;
511 start -= slack;
512 }
513
514 num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
515
516 local_irq_save(flags);
517 (*___flush_icache_rtn) (start, num_lines);
518 local_irq_restore(flags);
519}
520
521/* Unlike routines above, having vaddr for flush op (along with paddr),
522 * prevents the need to speculatively kill the lines in multiple sets
523 * based on ratio of way_sz : pg_sz
524 */
525static void __ic_line_inv_vaddr(unsigned long phy_start,
526 unsigned long vaddr, unsigned long sz)
527{
528 unsigned long flags;
529 int num_lines, slack;
530 unsigned int addr;
531
532 slack = phy_start & ~ICACHE_LINE_MASK;
533 sz += slack;
534 phy_start -= slack;
535 num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
536
537#if (CONFIG_ARC_MMU_VER > 2)
538 vaddr &= ~ICACHE_LINE_MASK;
539 addr = phy_start;
540#else
541 /* bits 17:13 of vaddr go as bits 4:0 of paddr */
542 addr = phy_start | ((vaddr >> 13) & 0x1F);
543#endif
544
545 local_irq_save(flags);
546 while (num_lines-- > 0) {
547#if (CONFIG_ARC_MMU_VER > 2)
548 /* tag comes from phy addr */
549 write_aux_reg(ARC_REG_IC_PTAG, addr);
550
551 /* index bits come from vaddr */
552 write_aux_reg(ARC_REG_IC_IVIL, vaddr);
553 vaddr += ARC_ICACHE_LINE_LEN;
554#else
555 /* this paddr contains vaddrs bits as needed */
556 write_aux_reg(ARC_REG_IC_IVIL, addr);
557#endif
558 addr += ARC_ICACHE_LINE_LEN;
559 }
560 local_irq_restore(flags);
561}
562
563#else
564
565#define __ic_line_inv(start, sz)
566#define __ic_line_inv_vaddr(pstart, vstart, sz)
567
568#endif /* CONFIG_ARC_HAS_ICACHE */
569
570
571/***********************************************************
572 * Exported APIs
573 */
574
575/* TBD: use pg_arch_1 to optimize this */
576void flush_dcache_page(struct page *page)
577{
578 __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
579}
580EXPORT_SYMBOL(flush_dcache_page);
581
582
583void dma_cache_wback_inv(unsigned long start, unsigned long sz)
584{
585 __dc_line_op(start, sz, OP_FLUSH_N_INV);
586}
587EXPORT_SYMBOL(dma_cache_wback_inv);
588
589void dma_cache_inv(unsigned long start, unsigned long sz)
590{
591 __dc_line_op(start, sz, OP_INV);
592}
593EXPORT_SYMBOL(dma_cache_inv);
594
595void dma_cache_wback(unsigned long start, unsigned long sz)
596{
597 __dc_line_op(start, sz, OP_FLUSH);
598}
599EXPORT_SYMBOL(dma_cache_wback);
600
601/*
602 * This is API for making I/D Caches consistent when modifying code
603 * (loadable modules, kprobes, etc)
604 * This is called on insmod, with kernel virtual address for CODE of
605 * the module. ARC cache maintenance ops require PHY address thus we
606 * need to convert vmalloc addr to PHY addr
607 */
608void flush_icache_range(unsigned long kstart, unsigned long kend)
609{
610 unsigned int tot_sz, off, sz;
611 unsigned long phy, pfn;
612 unsigned long flags;
613
614 /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
615
616 /* This is not the right API for user virtual address */
617 if (kstart < TASK_SIZE) {
618 BUG_ON("Flush icache range for user virtual addr space");
619 return;
620 }
621
622 /* Shortcut for bigger flush ranges.
623 * Here we don't care if this was kernel virtual or phy addr
624 */
625 tot_sz = kend - kstart;
626 if (tot_sz > PAGE_SIZE) {
627 flush_cache_all();
628 return;
629 }
630
631 /* Case: Kernel Phy addr (0x8000_0000 onwards) */
632 if (likely(kstart > PAGE_OFFSET)) {
633 __ic_line_inv(kstart, kend - kstart);
634 __dc_line_op(kstart, kend - kstart, OP_FLUSH);
635 return;
636 }
637
638 /*
639 * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff)
640 * (1) ARC Cache Maintenance ops only take Phy addr, hence special
641 * handling of kernel vaddr.
642 *
643 * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already),
644 * it still needs to handle a 2 page scenario, where the range
645 * straddles across 2 virtual pages and hence need for loop
646 */
647 while (tot_sz > 0) {
648 off = kstart % PAGE_SIZE;
649 pfn = vmalloc_to_pfn((void *)kstart);
650 phy = (pfn << PAGE_SHIFT) + off;
651 sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
652 local_irq_save(flags);
653 __dc_line_op(phy, sz, OP_FLUSH);
654 __ic_line_inv(phy, sz);
655 local_irq_restore(flags);
656 kstart += sz;
657 tot_sz -= sz;
658 }
659}
660
661/*
662 * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
663 * where vaddr is also available. This allows passing both vaddr and paddr
664 * bits to CDU for cache flush, short-circuting the current pessimistic algo
665 * which kills all possible aliases.
666 * An added adv of knowing that vaddr is user-vaddr avoids various checks
667 * and handling for k-vaddr, k-paddr as done in orig ver above
668 */
669void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
670 int len)
671{
672 __ic_line_inv_vaddr(paddr, u_vaddr, len);
673 __dc_line_op(paddr, len, OP_FLUSH);
674}
675
676/*
677 * XXX: This also needs to be optim using pg_arch_1
678 * This is called when a page-cache page is about to be mapped into a
679 * user process' address space. It offers an opportunity for a
680 * port to ensure d-cache/i-cache coherency if necessary.
681 */
682void flush_icache_page(struct vm_area_struct *vma, struct page *page)
683{
684 if (!(vma->vm_flags & VM_EXEC))
685 return;
686
687 __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
688}
689
690void flush_icache_all(void)
691{
692 unsigned long flags;
693
694 local_irq_save(flags);
695
696 write_aux_reg(ARC_REG_IC_IVIC, 1);
697
698 /* lr will not complete till the icache inv operation is not over */
699 read_aux_reg(ARC_REG_IC_CTRL);
700 local_irq_restore(flags);
701}
702
703noinline void flush_cache_all(void)
704{
705 unsigned long flags;
706
707 local_irq_save(flags);
708
709 flush_icache_all();
710 __dc_entire_op(OP_FLUSH_N_INV);
711
712 local_irq_restore(flags);
713
714}
715
716/**********************************************************************
717 * Explicit Cache flush request from user space via syscall
718 * Needed for JITs which generate code on the fly
719 */
720SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
721{
722 /* TBD: optimize this */
723 flush_cache_all();
724 return 0;
725}