diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-11-19 06:41:09 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-12-14 08:35:13 -0500 |
commit | 3d1074349b22c9653e746282564136c87668c2b8 (patch) | |
tree | b86a8b3cfa5b63e218df8da66417a4ec5f229b0e /arch/arm/mm | |
parent | 0eb948dd7f7c3cec37440c16a6c738c8e75efcda (diff) |
ARM: cache-l2x0: make better use of background cache handling
There's no point having the hardware support background operations
if we issue a cache operation, and then wait for it to complete
before calculating the address of the next operation. We gain no
advantage in the cache controller stalling the bus until completion.
What we should be doing is using the 'wait' time productively by
calculating the address of the next operation, and only then waiting
for the previous operation to complete. This means that cache
operations can occur in parallel with the CPU calculating the next
address.
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 34 |
1 files changed, 23 insertions, 11 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c1b7bfff47f4..ec85dda1e733 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c | |||
@@ -28,18 +28,18 @@ | |||
28 | static void __iomem *l2x0_base; | 28 | static void __iomem *l2x0_base; |
29 | static DEFINE_SPINLOCK(l2x0_lock); | 29 | static DEFINE_SPINLOCK(l2x0_lock); |
30 | 30 | ||
31 | static inline void sync_writel(unsigned long val, unsigned long reg, | 31 | static inline void cache_wait(void __iomem *reg, unsigned long mask) |
32 | unsigned long complete_mask) | ||
33 | { | 32 | { |
34 | writel(val, l2x0_base + reg); | ||
35 | /* wait for the operation to complete */ | 33 | /* wait for the operation to complete */ |
36 | while (readl(l2x0_base + reg) & complete_mask) | 34 | while (readl(reg) & mask) |
37 | ; | 35 | ; |
38 | } | 36 | } |
39 | 37 | ||
40 | static inline void cache_sync(void) | 38 | static inline void cache_sync(void) |
41 | { | 39 | { |
42 | sync_writel(0, L2X0_CACHE_SYNC, 1); | 40 | void __iomem *base = l2x0_base; |
41 | writel(0, base + L2X0_CACHE_SYNC); | ||
42 | cache_wait(base + L2X0_CACHE_SYNC, 1); | ||
43 | } | 43 | } |
44 | 44 | ||
45 | static inline void l2x0_inv_all(void) | 45 | static inline void l2x0_inv_all(void) |
@@ -48,32 +48,37 @@ static inline void l2x0_inv_all(void) | |||
48 | 48 | ||
49 | /* invalidate all ways */ | 49 | /* invalidate all ways */ |
50 | spin_lock_irqsave(&l2x0_lock, flags); | 50 | spin_lock_irqsave(&l2x0_lock, flags); |
51 | sync_writel(0xff, L2X0_INV_WAY, 0xff); | 51 | writel(0xff, l2x0_base + L2X0_INV_WAY); |
52 | cache_wait(l2x0_base + L2X0_INV_WAY, 0xff); | ||
52 | cache_sync(); | 53 | cache_sync(); |
53 | spin_unlock_irqrestore(&l2x0_lock, flags); | 54 | spin_unlock_irqrestore(&l2x0_lock, flags); |
54 | } | 55 | } |
55 | 56 | ||
56 | static void l2x0_inv_range(unsigned long start, unsigned long end) | 57 | static void l2x0_inv_range(unsigned long start, unsigned long end) |
57 | { | 58 | { |
59 | void __iomem *base = l2x0_base; | ||
58 | unsigned long flags; | 60 | unsigned long flags; |
59 | 61 | ||
60 | spin_lock_irqsave(&l2x0_lock, flags); | 62 | spin_lock_irqsave(&l2x0_lock, flags); |
61 | if (start & (CACHE_LINE_SIZE - 1)) { | 63 | if (start & (CACHE_LINE_SIZE - 1)) { |
62 | start &= ~(CACHE_LINE_SIZE - 1); | 64 | start &= ~(CACHE_LINE_SIZE - 1); |
63 | sync_writel(start, L2X0_CLEAN_INV_LINE_PA, 1); | 65 | cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); |
66 | writel(start, base + L2X0_CLEAN_INV_LINE_PA); | ||
64 | start += CACHE_LINE_SIZE; | 67 | start += CACHE_LINE_SIZE; |
65 | } | 68 | } |
66 | 69 | ||
67 | if (end & (CACHE_LINE_SIZE - 1)) { | 70 | if (end & (CACHE_LINE_SIZE - 1)) { |
68 | end &= ~(CACHE_LINE_SIZE - 1); | 71 | end &= ~(CACHE_LINE_SIZE - 1); |
69 | sync_writel(end, L2X0_CLEAN_INV_LINE_PA, 1); | 72 | cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); |
73 | writel(end, base + L2X0_CLEAN_INV_LINE_PA); | ||
70 | } | 74 | } |
71 | 75 | ||
72 | while (start < end) { | 76 | while (start < end) { |
73 | unsigned long blk_end = start + min(end - start, 4096UL); | 77 | unsigned long blk_end = start + min(end - start, 4096UL); |
74 | 78 | ||
75 | while (start < blk_end) { | 79 | while (start < blk_end) { |
76 | sync_writel(start, L2X0_INV_LINE_PA, 1); | 80 | cache_wait(base + L2X0_INV_LINE_PA, 1); |
81 | writel(start, base + L2X0_INV_LINE_PA); | ||
77 | start += CACHE_LINE_SIZE; | 82 | start += CACHE_LINE_SIZE; |
78 | } | 83 | } |
79 | 84 | ||
@@ -82,12 +87,14 @@ static void l2x0_inv_range(unsigned long start, unsigned long end) | |||
82 | spin_lock_irqsave(&l2x0_lock, flags); | 87 | spin_lock_irqsave(&l2x0_lock, flags); |
83 | } | 88 | } |
84 | } | 89 | } |
90 | cache_wait(base + L2X0_INV_LINE_PA, 1); | ||
85 | cache_sync(); | 91 | cache_sync(); |
86 | spin_unlock_irqrestore(&l2x0_lock, flags); | 92 | spin_unlock_irqrestore(&l2x0_lock, flags); |
87 | } | 93 | } |
88 | 94 | ||
89 | static void l2x0_clean_range(unsigned long start, unsigned long end) | 95 | static void l2x0_clean_range(unsigned long start, unsigned long end) |
90 | { | 96 | { |
97 | void __iomem *base = l2x0_base; | ||
91 | unsigned long flags; | 98 | unsigned long flags; |
92 | 99 | ||
93 | spin_lock_irqsave(&l2x0_lock, flags); | 100 | spin_lock_irqsave(&l2x0_lock, flags); |
@@ -96,7 +103,8 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) | |||
96 | unsigned long blk_end = start + min(end - start, 4096UL); | 103 | unsigned long blk_end = start + min(end - start, 4096UL); |
97 | 104 | ||
98 | while (start < blk_end) { | 105 | while (start < blk_end) { |
99 | sync_writel(start, L2X0_CLEAN_LINE_PA, 1); | 106 | cache_wait(base + L2X0_CLEAN_LINE_PA, 1); |
107 | writel(start, base + L2X0_CLEAN_LINE_PA); | ||
100 | start += CACHE_LINE_SIZE; | 108 | start += CACHE_LINE_SIZE; |
101 | } | 109 | } |
102 | 110 | ||
@@ -105,12 +113,14 @@ static void l2x0_clean_range(unsigned long start, unsigned long end) | |||
105 | spin_lock_irqsave(&l2x0_lock, flags); | 113 | spin_lock_irqsave(&l2x0_lock, flags); |
106 | } | 114 | } |
107 | } | 115 | } |
116 | cache_wait(base + L2X0_CLEAN_LINE_PA, 1); | ||
108 | cache_sync(); | 117 | cache_sync(); |
109 | spin_unlock_irqrestore(&l2x0_lock, flags); | 118 | spin_unlock_irqrestore(&l2x0_lock, flags); |
110 | } | 119 | } |
111 | 120 | ||
112 | static void l2x0_flush_range(unsigned long start, unsigned long end) | 121 | static void l2x0_flush_range(unsigned long start, unsigned long end) |
113 | { | 122 | { |
123 | void __iomem *base = l2x0_base; | ||
114 | unsigned long flags; | 124 | unsigned long flags; |
115 | 125 | ||
116 | spin_lock_irqsave(&l2x0_lock, flags); | 126 | spin_lock_irqsave(&l2x0_lock, flags); |
@@ -119,7 +129,8 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) | |||
119 | unsigned long blk_end = start + min(end - start, 4096UL); | 129 | unsigned long blk_end = start + min(end - start, 4096UL); |
120 | 130 | ||
121 | while (start < blk_end) { | 131 | while (start < blk_end) { |
122 | sync_writel(start, L2X0_CLEAN_INV_LINE_PA, 1); | 132 | cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); |
133 | writel(start, base + L2X0_CLEAN_INV_LINE_PA); | ||
123 | start += CACHE_LINE_SIZE; | 134 | start += CACHE_LINE_SIZE; |
124 | } | 135 | } |
125 | 136 | ||
@@ -128,6 +139,7 @@ static void l2x0_flush_range(unsigned long start, unsigned long end) | |||
128 | spin_lock_irqsave(&l2x0_lock, flags); | 139 | spin_lock_irqsave(&l2x0_lock, flags); |
129 | } | 140 | } |
130 | } | 141 | } |
142 | cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); | ||
131 | cache_sync(); | 143 | cache_sync(); |
132 | spin_unlock_irqrestore(&l2x0_lock, flags); | 144 | spin_unlock_irqrestore(&l2x0_lock, flags); |
133 | } | 145 | } |