aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arc
diff options
context:
space:
mode:
authorVineet Gupta <vgupta@synopsys.com>2013-09-05 05:15:51 -0400
committerVineet Gupta <vgupta@synopsys.com>2013-11-06 00:11:38 -0500
commitd4599baf5c773660f32ee6bc35c1afab009a52d9 (patch)
treee7538e616f56ceade60bd63833df965afbcc50b0 /arch/arc
parentbd12976c366486ea90aebd83f1cf2863ee47c76a (diff)
ARC: cacheflush optim - PTAG can be loop invariant if V-P is const
Line op needs vaddr (indexing) and paddr (tag match). For page sized flushes (V-P const), each line op will need a different index, but the tag bits wil remain constant, hence paddr can be setup once outside the loop. This improves select LMBench numbers for Aliasing dcache where we have more "preventive" cache flushing. Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 3.11-rc7- Linux 3.11.0- 80 4.66 8.88 69.7 112. 268. 8.60 28.0 3489 13.K 27.K # Non alias ARC700 3.11-rc7- Linux 3.11.0- 80 4.64 8.51 68.6 98.5 271. 8.58 28.1 4160 15.K 32.K # Aliasing 3.11-rc7- Linux 3.11.0- 80 4.64 8.51 69.8 99.4 270. 8.73 27.5 3880 15.K 31.K # PTAG loop Inv Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
Diffstat (limited to 'arch/arc')
-rw-r--r--arch/arc/mm/cache_arc700.c14
1 files changed, 11 insertions, 3 deletions
diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c
index a152f3263ac0..6b58c1de7577 100644
--- a/arch/arc/mm/cache_arc700.c
+++ b/arch/arc/mm/cache_arc700.c
@@ -250,6 +250,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
250{ 250{
251 unsigned int aux_cmd, aux_tag; 251 unsigned int aux_cmd, aux_tag;
252 int num_lines; 252 int num_lines;
253 const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
253 254
254 if (cacheop == OP_INV_IC) { 255 if (cacheop == OP_INV_IC) {
255 aux_cmd = ARC_REG_IC_IVIL; 256 aux_cmd = ARC_REG_IC_IVIL;
@@ -267,7 +268,7 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
267 * -@paddr will be cache-line aligned already (being page aligned) 268 * -@paddr will be cache-line aligned already (being page aligned)
268 * -@sz will be integral multiple of line size (being page sized). 269 * -@sz will be integral multiple of line size (being page sized).
269 */ 270 */
270 if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { 271 if (!full_page_op) {
271 sz += paddr & ~CACHE_LINE_MASK; 272 sz += paddr & ~CACHE_LINE_MASK;
272 paddr &= CACHE_LINE_MASK; 273 paddr &= CACHE_LINE_MASK;
273 vaddr &= CACHE_LINE_MASK; 274 vaddr &= CACHE_LINE_MASK;
@@ -278,19 +279,26 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
278#if (CONFIG_ARC_MMU_VER <= 2) 279#if (CONFIG_ARC_MMU_VER <= 2)
279 /* MMUv2 and before: paddr contains stuffed vaddrs bits */ 280 /* MMUv2 and before: paddr contains stuffed vaddrs bits */
280 paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; 281 paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
282#else
283 /* if V-P const for loop, PTAG can be written once outside loop */
284 if (full_page_op)
285 write_aux_reg(ARC_REG_DC_PTAG, paddr);
281#endif 286#endif
282 287
283 while (num_lines-- > 0) { 288 while (num_lines-- > 0) {
284#if (CONFIG_ARC_MMU_VER > 2) 289#if (CONFIG_ARC_MMU_VER > 2)
285 /* MMUv3, cache ops require paddr seperately */ 290 /* MMUv3, cache ops require paddr seperately */
286 write_aux_reg(ARC_REG_DC_PTAG, paddr); 291 if (!full_page_op) {
292 write_aux_reg(aux_tag, paddr);
293 paddr += L1_CACHE_BYTES;
294 }
287 295
288 write_aux_reg(aux_cmd, vaddr); 296 write_aux_reg(aux_cmd, vaddr);
289 vaddr += L1_CACHE_BYTES; 297 vaddr += L1_CACHE_BYTES;
290#else 298#else
291 write_aux_reg(aux, paddr); 299 write_aux_reg(aux, paddr);
292#endif
293 paddr += L1_CACHE_BYTES; 300 paddr += L1_CACHE_BYTES;
301#endif
294 } 302 }
295} 303}
296 304