1 files changed, 57 insertions, 212 deletions
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index 88d617d84234..c854cf95f706 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -72,16 +72,6 @@
 #include <asm/cachectl.h>
 #include <asm/setup.h>
-#ifdef CONFIG_ARC_HAS_ICACHE
-static void __ic_line_inv_no_alias(unsigned long, int);
-static void __ic_line_inv_2_alias(unsigned long, int);
-static void __ic_line_inv_4_alias(unsigned long, int);
-/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
-static void (*___flush_icache_rtn) (unsigned long, int);
-#endif
 char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 {
        int n = 0;
@@ -109,7 +99,7 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len)
 * the cpuinfo structure for later use.
 * No Validation done here, simply read/convert the BCRs
 */
-void __init read_decode_cache_bcr(void)
+void __cpuinit read_decode_cache_bcr(void)
 {
        struct bcr_cache ibcr, dbcr;
        struct cpuinfo_arc_cache *p_ic, *p_dc;
@@ -141,7 +131,7 @@ void __init read_decode_cache_bcr(void)
 * 3. Enable the Caches, setup default flush mode for D-Cache
 * 3. Calculate the SHMLBA used by user space
 */
-void __init arc_cache_init(void)
+void __cpuinit arc_cache_init(void)
 {
        unsigned int temp;
        unsigned int cpu = smp_processor_id();
@@ -171,30 +161,6 @@ void __init arc_cache_init(void)
        }
 #endif
-        /*
-         * if Cache way size is <= page size then no aliasing exhibited
-         * otherwise ratio determines num of aliases.
-         * e.g. 32K I$, 2 way set assoc, 8k pg size
-         *       way-sz = 32k/2 = 16k
-         *       way-pg-ratio = 16k/8k = 2, so 2 aliases possible
-         *       (meaning 1 line could be in 2 possible locations).
-         */
-        way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
-        switch (way_pg_ratio) {
-        case 0:
-        case 1:
-                ___flush_icache_rtn = __ic_line_inv_no_alias;
-                break;
-        case 2:
-                ___flush_icache_rtn = __ic_line_inv_2_alias;
-                break;
-        case 4:
-                ___flush_icache_rtn = __ic_line_inv_4_alias;
-                break;
-        default:
-                panic("Unsupported I-Cache Sz\n");
-        }
 #endif
        /* Enable/disable I-Cache */
@@ -391,75 +357,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
 /*
 *              I-Cache Aliasing in ARC700 VIPT caches
 *
- * For fetching code from I$, ARC700 uses vaddr (embedded in program code)
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * to "index" into SET of cache-line and paddr from MMU to match the TAG
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * in the WAYS of SET.
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
 *
- * However the CDU iterface (to flush/inv) lines from software, only takes
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr (to have simpler hardware interface). For simpler cases, using paddr
+ * paddr alone could not be used to correctly index the cache.
- * alone suffices.
- * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
- *      way_sz = cache_sz / num_ways = 16k/2 = 8k
- *      num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
- *   Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
- *   bits req for calc set-index = bits 12:5 (0 based). Since this range fits
- *   inside the bottom 13 bits of paddr, which are same for vaddr and paddr
- *   (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
- *   locate a cache-line.
- *
- * However for a difft sized cache, say 32k I$, above math yields need
- * for 14 bits of vaddr to locate a cache line, which can't be provided by
- * paddr, since the bit 13 (0 based) might differ between the two.
- *
- * This lack of extra bits needed for correct line addressing, defines the
- * classical problem of Cache aliasing with VIPT architectures
- * num_aliases = 1 << extra_bits
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
- *      2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
- *      2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
 *
 * ------------------
 * MMU v1/v2 (Fixed Page Size 8k)
 * ------------------
 * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geom.
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
 * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
 * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
 * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
 * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU.
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
 *
 * 17:13 represented the max num of bits passable, actual bits needed were
 * fewer, based on the num-of-aliases possible.
 * -for 2 alias possibility, only bit 13 needed (32K cache)
 * -for 4 alias possibility, bits 14:13 needed (64K cache)
 *
- * Since vaddr was not available for all instances of I$ flush req by core
- * kernel, the only safe way (non-optimal though) was to kill all possible
- * lines which could represent an alias (even if they didnt represent one
- * in execution).
- * e.g. for 64K I$, 4 aliases possible, so we did
- *      flush start
- *      flush start | 0x01
- *      flush start | 0x2
- *      flush start | 0x3
- *
- * The penalty was invoking the operation itself, since tag match is anyways
- * paddr based, a line which didn't represent an alias would not match the
- * paddr, hence wont be killed
- *
- * Note that aliasing concerns are independent of line-sz for a given cache
- * geometry (size + set_assoc) because the extra bits required by line-sz are
- * reduced from the set calc.
- * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
- *  32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
- *  64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
- *
 * ------------------
 * MMU v3
 * ------------------
- * This ver of MMU supports var page sizes (1k-16k) - Linux will support
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * 8k (default), 16k and 4k.
+ * only support 8k (default), 16k and 4k.
 * However from hardware perspective, smaller page sizes aggrevate aliasing
 * meaning more vaddr bits needed to disambiguate the cache-line-op ;
 * the existing scheme of piggybacking won't work for certain configurations.
@@ -468,115 +397,29 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
 */
 /***********************************************************
- * Machine specific helpers for per line I-Cache invalidate.
+ * Machine specific helper for per line I-Cache invalidate.
- * 3 routines to accpunt for 1, 2, 4 aliases possible
 */
+static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
-static void __ic_line_inv_no_alias(unsigned long start, int num_lines)
+                                unsigned long sz)
-{
-        while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-                write_aux_reg(ARC_REG_IC_PTAG, start);
-#endif
-                write_aux_reg(ARC_REG_IC_IVIL, start);
-                start += ARC_ICACHE_LINE_LEN;
-        }
-}
-static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
-{
-        while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-                /*
-                 *  MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
-                 * reg to pass the "tag" bits and existing IVIL reg only looks
-                 * at bits relevant for "index" (details above)
-                 * Programming Notes:
-                 * -when writing tag to PTAG reg, bit chopping can be avoided,
-                 *  CDU ignores non-tag bits.
-                 * -Ideally "index" must be computed from vaddr, but it is not
-                 *  avail in these rtns. So to be safe, we kill the lines in all
-                 *  possible indexes corresp to num of aliases possible for
-                 *  given cache config.
-                 */
-                write_aux_reg(ARC_REG_IC_PTAG, start);
-                write_aux_reg(ARC_REG_IC_IVIL,
-                                  start & ~(0x1 << PAGE_SHIFT));
-                write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT));
-#else
-                write_aux_reg(ARC_REG_IC_IVIL, start);
-                write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-#endif
-                start += ARC_ICACHE_LINE_LEN;
-        }
-}
-static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
-{
-        while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
-                write_aux_reg(ARC_REG_IC_PTAG, start);
-                write_aux_reg(ARC_REG_IC_IVIL,
-                                  start & ~(0x3 << PAGE_SHIFT));
-                write_aux_reg(ARC_REG_IC_IVIL,
-                                  start & ~(0x2 << PAGE_SHIFT));
-                write_aux_reg(ARC_REG_IC_IVIL,
-                                  start & ~(0x1 << PAGE_SHIFT));
-                write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT));
-#else
-                write_aux_reg(ARC_REG_IC_IVIL, start);
-                write_aux_reg(ARC_REG_IC_IVIL, start | 0x01);
-                write_aux_reg(ARC_REG_IC_IVIL, start | 0x02);
-                write_aux_reg(ARC_REG_IC_IVIL, start | 0x03);
-#endif
-                start += ARC_ICACHE_LINE_LEN;
-        }
-}
-static void __ic_line_inv(unsigned long start, unsigned long sz)
 {
        unsigned long flags;
        int num_lines, slack;
+        unsigned int addr;
        /*
-         * Ensure we properly floor/ceil the non-line aligned/sized requests
+         * Ensure we properly floor/ceil the non-line aligned/sized requests:
-         * and have @start - aligned to cache line, and integral @num_lines
         * However page sized flushes can be compile time optimised.
-         *  -@start will be cache-line aligned already (being page aligned)
+         *  -@phy_start will be cache-line aligned already (being page aligned)
         *  -@sz will be integral multiple of line size (being page sized).
         */
        if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
-                slack = start & ~ICACHE_LINE_MASK;
+                slack = phy_start & ~ICACHE_LINE_MASK;
                sz += slack;
-                start -= slack;
+                phy_start -= slack;
        }
        num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
-        local_irq_save(flags);
-        (*___flush_icache_rtn) (start, num_lines);
-        local_irq_restore(flags);
-}
-/* Unlike routines above, having vaddr for flush op (along with paddr),
- * prevents the need to speculatively kill the lines in multiple sets
- * based on ratio of way_sz : pg_sz
- */
-static void __ic_line_inv_vaddr(unsigned long phy_start,
-                                         unsigned long vaddr, unsigned long sz)
-{
-        unsigned long flags;
-        int num_lines, slack;
-        unsigned int addr;
-        slack = phy_start & ~ICACHE_LINE_MASK;
-        sz += slack;
-        phy_start -= slack;
-        num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
 #if (CONFIG_ARC_MMU_VER > 2)
        vaddr &= ~ICACHE_LINE_MASK;
        addr = phy_start;
@@ -595,7 +438,7 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
                write_aux_reg(ARC_REG_IC_IVIL, vaddr);
                vaddr += ARC_ICACHE_LINE_LEN;
 #else
-                /* this paddr contains vaddrs bits as needed */
+                /* paddr contains stuffed vaddrs bits */
                write_aux_reg(ARC_REG_IC_IVIL, addr);
 #endif
                addr += ARC_ICACHE_LINE_LEN;
@@ -605,7 +448,6 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 #else
-#define __ic_line_inv(start, sz)
 #define __ic_line_inv_vaddr(pstart, vstart, sz)
 #endif /* CONFIG_ARC_HAS_ICACHE */
@@ -615,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
 * Exported APIs
 */
-/* TBD: use pg_arch_1 to optimize this */
 void flush_dcache_page(struct page *page)
 {
-        __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);
+        /* Make a note that dcache is not yet flushed for this page */
+        set_bit(PG_arch_1, &page->flags);
 }
 EXPORT_SYMBOL(flush_dcache_page);
@@ -642,8 +484,8 @@ void dma_cache_wback(unsigned long start, unsigned long sz)
 EXPORT_SYMBOL(dma_cache_wback);
 /*
- * This is API for making I/D Caches consistent when modifying code
+ * This is API for making I/D Caches consistent when modifying
- * (loadable modules, kprobes,  etc)
+ * kernel code (loadable modules, kprobes, kgdb...)
 * This is called on insmod, with kernel virtual address for CODE of
 * the module. ARC cache maintenance ops require PHY address thus we
 * need to convert vmalloc addr to PHY addr
@@ -652,7 +494,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
 {
        unsigned int tot_sz, off, sz;
        unsigned long phy, pfn;
-        unsigned long flags;
        /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
@@ -673,8 +514,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
        /* Case: Kernel Phy addr (0x8000_0000 onwards) */
        if (likely(kstart > PAGE_OFFSET)) {
-                __ic_line_inv(kstart, kend - kstart);
+                /*
-                __dc_line_op(kstart, kend - kstart, OP_FLUSH);
+                 * The 2nd arg despite being paddr will be used to index icache
+                 * This is OK since no alternate virtual mappings will exist
+                 * given the callers for this case: kprobe/kgdb in built-in
+                 * kernel code only.
+                 */
+                __sync_icache_dcache(kstart, kstart, kend - kstart);
                return;
        }
@@ -692,42 +538,41 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
                pfn = vmalloc_to_pfn((void *)kstart);
                phy = (pfn << PAGE_SHIFT) + off;
                sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
-                local_irq_save(flags);
+                __sync_icache_dcache(phy, kstart, sz);
-                __dc_line_op(phy, sz, OP_FLUSH);
-                __ic_line_inv(phy, sz);
-                local_irq_restore(flags);
                kstart += sz;
                tot_sz -= sz;
        }
 }
 /*
- * Optimised ver of flush_icache_range() with spec callers: ptrace/signals
+ * General purpose helper to make I and D cache lines consistent.
- * where vaddr is also available. This allows passing both vaddr and paddr
+ * @paddr is phy addr of region
- * bits to CDU for cache flush, short-circuting the current pessimistic algo
+ * @vaddr is typically user or kernel vaddr (vmalloc)
- * which kills all possible aliases.
+ *    Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
- * An added adv of knowing that vaddr is user-vaddr avoids various checks
+ *    builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
- * and handling for k-vaddr, k-paddr as done in orig ver above
+ *    use a paddr to index the cache (despite VIPT). This is fine since since a
+ *    built-in kernel page will not have any virtual mappings (not even kernel)
+ *    kprobe on loadable module is different as it will have kvaddr.
 */
-void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,
+void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
-                              int len)
 {
-        __ic_line_inv_vaddr(paddr, u_vaddr, len);
+        unsigned long flags;
+        local_irq_save(flags);
+        __ic_line_inv_vaddr(paddr, vaddr, len);
        __dc_line_op(paddr, len, OP_FLUSH);
+        local_irq_restore(flags);
 }
-/*
+/* wrapper to compile time eliminate alignment checks in flush loop */
- * XXX: This also needs to be optim using pg_arch_1
+void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
- * This is called when a page-cache page is about to be mapped into a
- * user process' address space.  It offers an opportunity for a
- * port to ensure d-cache/i-cache coherency if necessary.
- */
-void flush_icache_page(struct vm_area_struct *vma, struct page *page)
 {
-        if (!(vma->vm_flags & VM_EXEC))
+        __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
-                return;
+}
-        __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);
+void __flush_dcache_page(unsigned long paddr)
+{
+        __dc_line_op(paddr, PAGE_SIZE, OP_FLUSH_N_INV);
 }
 void flush_icache_all(void)

diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 88d617d84234..c854cf95f706 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c
@@ -72,16 +72,6 @@
72	#include <asm/cachectl.h>	72	#include <asm/cachectl.h>
73	#include <asm/setup.h>	73	#include <asm/setup.h>
74		74
75
76	#ifdef CONFIG_ARC_HAS_ICACHE
77	static void __ic_line_inv_no_alias(unsigned long, int);
78	static void __ic_line_inv_2_alias(unsigned long, int);
79	static void __ic_line_inv_4_alias(unsigned long, int);
80
81	/* Holds the ptr to flush routine, dependign on size due to aliasing issues */
82	static void (*___flush_icache_rtn) (unsigned long, int);
83	#endif
84
85	char arc_cache_mumbojumbo(int cpu_id, char buf, int len)	75	char arc_cache_mumbojumbo(int cpu_id, char buf, int len)
86	{	76	{
87	int n = 0;	77	int n = 0;
@@ -109,7 +99,7 @@ char arc_cache_mumbojumbo(int cpu_id, char buf, int len)
109	* the cpuinfo structure for later use.	99	* the cpuinfo structure for later use.
110	* No Validation done here, simply read/convert the BCRs	100	* No Validation done here, simply read/convert the BCRs
111	*/	101	*/
112	void __init read_decode_cache_bcr(void)	102	void __cpuinit read_decode_cache_bcr(void)
113	{	103	{
114	struct bcr_cache ibcr, dbcr;	104	struct bcr_cache ibcr, dbcr;
115	struct cpuinfo_arc_cache p_ic, p_dc;	105	struct cpuinfo_arc_cache p_ic, p_dc;
@@ -141,7 +131,7 @@ void __init read_decode_cache_bcr(void)
141	* 3. Enable the Caches, setup default flush mode for D-Cache	131	* 3. Enable the Caches, setup default flush mode for D-Cache
142	* 3. Calculate the SHMLBA used by user space	132	* 3. Calculate the SHMLBA used by user space
143	*/	133	*/
144	void __init arc_cache_init(void)	134	void __cpuinit arc_cache_init(void)
145	{	135	{
146	unsigned int temp;	136	unsigned int temp;
147	unsigned int cpu = smp_processor_id();	137	unsigned int cpu = smp_processor_id();
@@ -171,30 +161,6 @@ void __init arc_cache_init(void)
171		161
172	}	162	}
173	#endif	163	#endif
174
175	/*
176	* if Cache way size is <= page size then no aliasing exhibited
177	* otherwise ratio determines num of aliases.
178	* e.g. 32K I$, 2 way set assoc, 8k pg size
179	* way-sz = 32k/2 = 16k
180	* way-pg-ratio = 16k/8k = 2, so 2 aliases possible
181	* (meaning 1 line could be in 2 possible locations).
182	*/
183	way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE;
184	switch (way_pg_ratio) {
185	case 0:
186	case 1:
187	___flush_icache_rtn = __ic_line_inv_no_alias;
188	break;
189	case 2:
190	___flush_icache_rtn = __ic_line_inv_2_alias;
191	break;
192	case 4:
193	___flush_icache_rtn = __ic_line_inv_4_alias;
194	break;
195	default:
196	panic("Unsupported I-Cache Sz\n");
197	}
198	#endif	164	#endif
199		165
200	/* Enable/disable I-Cache */	166	/* Enable/disable I-Cache */
@@ -391,75 +357,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
391	/*	357	/*
392	* I-Cache Aliasing in ARC700 VIPT caches	358	* I-Cache Aliasing in ARC700 VIPT caches
393	*	359	*
394	* For fetching code from I$, ARC700 uses vaddr (embedded in program code)	360	* ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
395	* to "index" into SET of cache-line and paddr from MMU to match the TAG	361	* The orig Cache Management Module "CDU" only required paddr to invalidate a
396	* in the WAYS of SET.	362	* certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
		363	* Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
		364	* the exact same line.
397	*	365	*
398	* However the CDU iterface (to flush/inv) lines from software, only takes	366	* However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
399	* paddr (to have simpler hardware interface). For simpler cases, using paddr	367	* paddr alone could not be used to correctly index the cache.
400	* alone suffices.
401	* e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size):
402	* way_sz = cache_sz / num_ways = 16k/2 = 8k
403	* num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits
404	* Ignoring the bottom 5 bits corresp to the off within a 32b cacheline,
405	* bits req for calc set-index = bits 12:5 (0 based). Since this range fits
406	* inside the bottom 13 bits of paddr, which are same for vaddr and paddr
407	* (with 8k pg sz), paddr alone can be safely used by CDU to unambigously
408	* locate a cache-line.
409	*
410	* However for a difft sized cache, say 32k I$, above math yields need
411	* for 14 bits of vaddr to locate a cache line, which can't be provided by
412	* paddr, since the bit 13 (0 based) might differ between the two.
413	*
414	* This lack of extra bits needed for correct line addressing, defines the
415	* classical problem of Cache aliasing with VIPT architectures
416	* num_aliases = 1 << extra_bits
417	* e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases
418	* 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases
419	* 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases
420	*	368	*
421	* ------------------	369	* ------------------
422	* MMU v1/v2 (Fixed Page Size 8k)	370	* MMU v1/v2 (Fixed Page Size 8k)
423	* ------------------	371	* ------------------
424	* The solution was to provide CDU with these additonal vaddr bits. These	372	* The solution was to provide CDU with these additonal vaddr bits. These
425	* would be bits [x:13], x would depend on cache-geom.	373	* would be bits [x:13], x would depend on cache-geometry, 13 comes from
		374	* standard page size of 8k.
426	* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits	375	* H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
427	* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the	376	* of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
428	* orig 5 bits of paddr were anyways ignored by CDU line ops, as they	377	* orig 5 bits of paddr were anyways ignored by CDU line ops, as they
429	* represent the offset within cache-line. The adv of using this "clumsy"	378	* represent the offset within cache-line. The adv of using this "clumsy"
430	* interface for additional info was no new reg was needed in CDU.	379	* interface for additional info was no new reg was needed in CDU programming
		380	* model.
431	*	381	*
432	* 17:13 represented the max num of bits passable, actual bits needed were	382	* 17:13 represented the max num of bits passable, actual bits needed were
433	* fewer, based on the num-of-aliases possible.	383	* fewer, based on the num-of-aliases possible.
434	* -for 2 alias possibility, only bit 13 needed (32K cache)	384	* -for 2 alias possibility, only bit 13 needed (32K cache)
435	* -for 4 alias possibility, bits 14:13 needed (64K cache)	385	* -for 4 alias possibility, bits 14:13 needed (64K cache)
436	*	386	*
437	* Since vaddr was not available for all instances of I$ flush req by core
438	* kernel, the only safe way (non-optimal though) was to kill all possible
439	* lines which could represent an alias (even if they didnt represent one
440	* in execution).
441	* e.g. for 64K I$, 4 aliases possible, so we did
442	* flush start
443	* flush start \| 0x01
444	* flush start \| 0x2
445	* flush start \| 0x3
446	*
447	* The penalty was invoking the operation itself, since tag match is anyways
448	* paddr based, a line which didn't represent an alias would not match the
449	* paddr, hence wont be killed
450	*
451	* Note that aliasing concerns are independent of line-sz for a given cache
452	* geometry (size + set_assoc) because the extra bits required by line-sz are
453	* reduced from the set calc.
454	* e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above
455	* 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit
456	* 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit
457	*
458	* ------------------	387	* ------------------
459	* MMU v3	388	* MMU v3
460	* ------------------	389	* ------------------
461	* This ver of MMU supports var page sizes (1k-16k) - Linux will support	390	* This ver of MMU supports variable page sizes (1k-16k): although Linux will
462	* 8k (default), 16k and 4k.	391	* only support 8k (default), 16k and 4k.
463	* However from hardware perspective, smaller page sizes aggrevate aliasing	392	* However from hardware perspective, smaller page sizes aggrevate aliasing
464	* meaning more vaddr bits needed to disambiguate the cache-line-op ;	393	* meaning more vaddr bits needed to disambiguate the cache-line-op ;
465	* the existing scheme of piggybacking won't work for certain configurations.	394	* the existing scheme of piggybacking won't work for certain configurations.
@@ -468,115 +397,29 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz,
468	*/	397	*/
469		398
470	/***********************************************************	399	/***********************************************************
471	* Machine specific helpers for per line I-Cache invalidate.	400	* Machine specific helper for per line I-Cache invalidate.
472	* 3 routines to accpunt for 1, 2, 4 aliases possible
473	*/	401	*/
474		402	static void __ic_line_inv_vaddr(unsigned long phy_start, unsigned long vaddr,
475	static void __ic_line_inv_no_alias(unsigned long start, int num_lines)	403	unsigned long sz)
476	{
477	while (num_lines-- > 0) {
478	#if (CONFIG_ARC_MMU_VER > 2)
479	write_aux_reg(ARC_REG_IC_PTAG, start);
480	#endif
481	write_aux_reg(ARC_REG_IC_IVIL, start);
482	start += ARC_ICACHE_LINE_LEN;
483	}
484	}
485
486	static void __ic_line_inv_2_alias(unsigned long start, int num_lines)
487	{
488	while (num_lines-- > 0) {
489
490	#if (CONFIG_ARC_MMU_VER > 2)
491	/*
492	* MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG
493	* reg to pass the "tag" bits and existing IVIL reg only looks
494	* at bits relevant for "index" (details above)
495	* Programming Notes:
496	* -when writing tag to PTAG reg, bit chopping can be avoided,
497	* CDU ignores non-tag bits.
498	* -Ideally "index" must be computed from vaddr, but it is not
499	* avail in these rtns. So to be safe, we kill the lines in all
500	* possible indexes corresp to num of aliases possible for
501	* given cache config.
502	*/
503	write_aux_reg(ARC_REG_IC_PTAG, start);
504	write_aux_reg(ARC_REG_IC_IVIL,
505	start & ~(0x1 << PAGE_SHIFT));
506	write_aux_reg(ARC_REG_IC_IVIL, start \| (0x1 << PAGE_SHIFT));
507	#else
508	write_aux_reg(ARC_REG_IC_IVIL, start);
509	write_aux_reg(ARC_REG_IC_IVIL, start \| 0x01);
510	#endif
511	start += ARC_ICACHE_LINE_LEN;
512	}
513	}
514
515	static void __ic_line_inv_4_alias(unsigned long start, int num_lines)
516	{
517	while (num_lines-- > 0) {
518
519	#if (CONFIG_ARC_MMU_VER > 2)
520	write_aux_reg(ARC_REG_IC_PTAG, start);
521
522	write_aux_reg(ARC_REG_IC_IVIL,
523	start & ~(0x3 << PAGE_SHIFT));
524	write_aux_reg(ARC_REG_IC_IVIL,
525	start & ~(0x2 << PAGE_SHIFT));
526	write_aux_reg(ARC_REG_IC_IVIL,
527	start & ~(0x1 << PAGE_SHIFT));
528	write_aux_reg(ARC_REG_IC_IVIL, start \| (0x3 << PAGE_SHIFT));
529	#else
530	write_aux_reg(ARC_REG_IC_IVIL, start);
531	write_aux_reg(ARC_REG_IC_IVIL, start \| 0x01);
532	write_aux_reg(ARC_REG_IC_IVIL, start \| 0x02);
533	write_aux_reg(ARC_REG_IC_IVIL, start \| 0x03);
534	#endif
535	start += ARC_ICACHE_LINE_LEN;
536	}
537	}
538
539	static void __ic_line_inv(unsigned long start, unsigned long sz)
540	{	404	{
541	unsigned long flags;	405	unsigned long flags;
542	int num_lines, slack;	406	int num_lines, slack;
		407	unsigned int addr;
543		408
544	/*	409	/*
545	* Ensure we properly floor/ceil the non-line aligned/sized requests	410	* Ensure we properly floor/ceil the non-line aligned/sized requests:
546	* and have @start - aligned to cache line, and integral @num_lines
547	* However page sized flushes can be compile time optimised.	411	* However page sized flushes can be compile time optimised.
548	* -@start will be cache-line aligned already (being page aligned)	412	* -@phy_start will be cache-line aligned already (being page aligned)
549	* -@sz will be integral multiple of line size (being page sized).	413	* -@sz will be integral multiple of line size (being page sized).
550	*/	414	*/
551	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {	415	if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) {
552	slack = start & ~ICACHE_LINE_MASK;	416	slack = phy_start & ~ICACHE_LINE_MASK;
553	sz += slack;	417	sz += slack;
554	start -= slack;	418	phy_start -= slack;
555	}	419	}
556		420
557	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);	421	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
558		422
559	local_irq_save(flags);
560	(*___flush_icache_rtn) (start, num_lines);
561	local_irq_restore(flags);
562	}
563
564	/* Unlike routines above, having vaddr for flush op (along with paddr),
565	* prevents the need to speculatively kill the lines in multiple sets
566	* based on ratio of way_sz : pg_sz
567	*/
568	static void __ic_line_inv_vaddr(unsigned long phy_start,
569	unsigned long vaddr, unsigned long sz)
570	{
571	unsigned long flags;
572	int num_lines, slack;
573	unsigned int addr;
574
575	slack = phy_start & ~ICACHE_LINE_MASK;
576	sz += slack;
577	phy_start -= slack;
578	num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN);
579
580	#if (CONFIG_ARC_MMU_VER > 2)	423	#if (CONFIG_ARC_MMU_VER > 2)
581	vaddr &= ~ICACHE_LINE_MASK;	424	vaddr &= ~ICACHE_LINE_MASK;
582	addr = phy_start;	425	addr = phy_start;
@@ -595,7 +438,7 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
595	write_aux_reg(ARC_REG_IC_IVIL, vaddr);	438	write_aux_reg(ARC_REG_IC_IVIL, vaddr);
596	vaddr += ARC_ICACHE_LINE_LEN;	439	vaddr += ARC_ICACHE_LINE_LEN;
597	#else	440	#else
598	/* this paddr contains vaddrs bits as needed */	441	/* paddr contains stuffed vaddrs bits */
599	write_aux_reg(ARC_REG_IC_IVIL, addr);	442	write_aux_reg(ARC_REG_IC_IVIL, addr);
600	#endif	443	#endif
601	addr += ARC_ICACHE_LINE_LEN;	444	addr += ARC_ICACHE_LINE_LEN;
@@ -605,7 +448,6 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
605		448
606	#else	449	#else
607		450
608	#define __ic_line_inv(start, sz)
609	#define __ic_line_inv_vaddr(pstart, vstart, sz)	451	#define __ic_line_inv_vaddr(pstart, vstart, sz)
610		452
611	#endif /* CONFIG_ARC_HAS_ICACHE */	453	#endif /* CONFIG_ARC_HAS_ICACHE */
@@ -615,10 +457,10 @@ static void __ic_line_inv_vaddr(unsigned long phy_start,
615	* Exported APIs	457	* Exported APIs
616	*/	458	*/
617		459
618	/* TBD: use pg_arch_1 to optimize this */
619	void flush_dcache_page(struct page *page)	460	void flush_dcache_page(struct page *page)
620	{	461	{
621	__dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH);	462	/* Make a note that dcache is not yet flushed for this page */
		463	set_bit(PG_arch_1, &page->flags);
622	}	464	}
623	EXPORT_SYMBOL(flush_dcache_page);	465	EXPORT_SYMBOL(flush_dcache_page);
624		466
@@ -642,8 +484,8 @@ void dma_cache_wback(unsigned long start, unsigned long sz)
642	EXPORT_SYMBOL(dma_cache_wback);	484	EXPORT_SYMBOL(dma_cache_wback);
643		485
644	/*	486	/*
645	* This is API for making I/D Caches consistent when modifying code	487	* This is API for making I/D Caches consistent when modifying
646	* (loadable modules, kprobes, etc)	488	* kernel code (loadable modules, kprobes, kgdb...)
647	* This is called on insmod, with kernel virtual address for CODE of	489	* This is called on insmod, with kernel virtual address for CODE of
648	* the module. ARC cache maintenance ops require PHY address thus we	490	* the module. ARC cache maintenance ops require PHY address thus we
649	* need to convert vmalloc addr to PHY addr	491	* need to convert vmalloc addr to PHY addr
@@ -652,7 +494,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
652	{	494	{
653	unsigned int tot_sz, off, sz;	495	unsigned int tot_sz, off, sz;
654	unsigned long phy, pfn;	496	unsigned long phy, pfn;
655	unsigned long flags;
656		497
657	/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */	498	/* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */
658		499
@@ -673,8 +514,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
673		514
674	/* Case: Kernel Phy addr (0x8000_0000 onwards) */	515	/* Case: Kernel Phy addr (0x8000_0000 onwards) */
675	if (likely(kstart > PAGE_OFFSET)) {	516	if (likely(kstart > PAGE_OFFSET)) {
676	__ic_line_inv(kstart, kend - kstart);	517	/*
677	__dc_line_op(kstart, kend - kstart, OP_FLUSH);	518	* The 2nd arg despite being paddr will be used to index icache
		519	* This is OK since no alternate virtual mappings will exist
		520	* given the callers for this case: kprobe/kgdb in built-in
		521	* kernel code only.
		522	*/
		523	__sync_icache_dcache(kstart, kstart, kend - kstart);
678	return;	524	return;
679	}	525	}
680		526
@@ -692,42 +538,41 @@ void flush_icache_range(unsigned long kstart, unsigned long kend)
692	pfn = vmalloc_to_pfn((void *)kstart);	538	pfn = vmalloc_to_pfn((void *)kstart);
693	phy = (pfn << PAGE_SHIFT) + off;	539	phy = (pfn << PAGE_SHIFT) + off;
694	sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);	540	sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off);
695	local_irq_save(flags);	541	__sync_icache_dcache(phy, kstart, sz);
696	__dc_line_op(phy, sz, OP_FLUSH);
697	__ic_line_inv(phy, sz);
698	local_irq_restore(flags);
699	kstart += sz;	542	kstart += sz;
700	tot_sz -= sz;	543	tot_sz -= sz;
701	}	544	}
702	}	545	}
703		546
704	/*	547	/*
705	* Optimised ver of flush_icache_range() with spec callers: ptrace/signals	548	* General purpose helper to make I and D cache lines consistent.
706	* where vaddr is also available. This allows passing both vaddr and paddr	549	* @paddr is phy addr of region
707	* bits to CDU for cache flush, short-circuting the current pessimistic algo	550	* @vaddr is typically user or kernel vaddr (vmalloc)
708	* which kills all possible aliases.	551	* Howver in one instance, flush_icache_range() by kprobe (for a breakpt in
709	* An added adv of knowing that vaddr is user-vaddr avoids various checks	552	* builtin kernel code) @vaddr will be paddr only, meaning CDU operation will
710	* and handling for k-vaddr, k-paddr as done in orig ver above	553	* use a paddr to index the cache (despite VIPT). This is fine since since a
		554	* built-in kernel page will not have any virtual mappings (not even kernel)
		555	* kprobe on loadable module is different as it will have kvaddr.
711	*/	556	*/
712	void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr,	557	void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
713	int len)
714	{	558	{
715	__ic_line_inv_vaddr(paddr, u_vaddr, len);	559	unsigned long flags;
		560
		561	local_irq_save(flags);
		562	__ic_line_inv_vaddr(paddr, vaddr, len);
716	__dc_line_op(paddr, len, OP_FLUSH);	563	__dc_line_op(paddr, len, OP_FLUSH);
		564	local_irq_restore(flags);
717	}	565	}
718		566
719	/*	567	/* wrapper to compile time eliminate alignment checks in flush loop */
720	* XXX: This also needs to be optim using pg_arch_1	568	void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
721	* This is called when a page-cache page is about to be mapped into a
722	* user process' address space. It offers an opportunity for a
723	* port to ensure d-cache/i-cache coherency if necessary.
724	*/
725	void flush_icache_page(struct vm_area_struct vma, struct page page)
726	{	569	{
727	if (!(vma->vm_flags & VM_EXEC))	570	__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
728	return;	571	}
729		572
730	__ic_line_inv((unsigned long)page_address(page), PAGE_SIZE);	573	void __flush_dcache_page(unsigned long paddr)
		574	{
		575	__dc_line_op(paddr, PAGE_SIZE, OP_FLUSH_N_INV);
731	}	576	}
732		577
733	void flush_icache_all(void)	578	void flush_icache_all(void)