aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnd Bergmann <arnd@arndb.de>2015-01-28 11:55:31 -0500
committerRussell King <rmk+kernel@arm.linux.org.uk>2015-02-06 15:16:40 -0500
commit1d88967900b87f94435581dad4ae319686c6ce10 (patch)
tree7fc8d85ca097dfdb6a49276061ffb1466723ef0c
parent20e783e39e55c2615fb61d1b3d139ee9edcf6772 (diff)
ARM: 8297/1: cache-l2x0: optimize aurora range operations
The aurora_inv_range(), aurora_clean_range() and aurora_flush_range() functions are highly redundant, both in source and in object code, and they are harder to understand than necessary. By moving the range loop into the aurora_pa_range() function, they become trivial wrappers, and the object code start looking like what one would expect for an optimal implementation. Further optimization may be possible by using the per-CPU "virtual" registers to avoid the spinlocks in most cases. (on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of DMA traffic by reading data from a SD card) Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r--arch/arm/mm/cache-l2x0.c68
1 files changed, 22 insertions, 46 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 404c598da27d..5ea2d6d417f7 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = {
1256 * noninclusive, while the hardware cache range operations use 1256 * noninclusive, while the hardware cache range operations use
1257 * inclusive start and end addresses. 1257 * inclusive start and end addresses.
1258 */ 1258 */
1259static unsigned long calc_range_end(unsigned long start, unsigned long end) 1259static unsigned long aurora_range_end(unsigned long start, unsigned long end)
1260{ 1260{
1261 /* 1261 /*
1262 * Limit the number of cache lines processed at once, 1262 * Limit the number of cache lines processed at once,
@@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end)
1275 return end; 1275 return end;
1276} 1276}
1277 1277
1278/*
1279 * Make sure 'start' and 'end' reference the same page, as L2 is PIPT
1280 * and range operations only do a TLB lookup on the start address.
1281 */
1282static void aurora_pa_range(unsigned long start, unsigned long end, 1278static void aurora_pa_range(unsigned long start, unsigned long end,
1283 unsigned long offset) 1279 unsigned long offset)
1284{ 1280{
1285 void __iomem *base = l2x0_base; 1281 void __iomem *base = l2x0_base;
1282 unsigned long range_end;
1286 unsigned long flags; 1283 unsigned long flags;
1287 1284
1288 raw_spin_lock_irqsave(&l2x0_lock, flags);
1289 writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
1290 writel_relaxed(end, base + offset);
1291 raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1292
1293 writel_relaxed(0, base + AURORA_SYNC_REG);
1294}
1295
1296static void aurora_inv_range(unsigned long start, unsigned long end)
1297{
1298 /* 1285 /*
1299 * round start and end adresses up to cache line size 1286 * round start and end adresses up to cache line size
1300 */ 1287 */
@@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end)
1302 end = ALIGN(end, CACHE_LINE_SIZE); 1289 end = ALIGN(end, CACHE_LINE_SIZE);
1303 1290
1304 /* 1291 /*
1305 * Invalidate all full cache lines between 'start' and 'end'. 1292 * perform operation on all full cache lines between 'start' and 'end'
1306 */ 1293 */
1307 while (start < end) { 1294 while (start < end) {
1308 unsigned long range_end = calc_range_end(start, end); 1295 range_end = aurora_range_end(start, end);
1309 aurora_pa_range(start, range_end - CACHE_LINE_SIZE, 1296
1310 AURORA_INVAL_RANGE_REG); 1297 raw_spin_lock_irqsave(&l2x0_lock, flags);
1298 writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG);
1299 writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset);
1300 raw_spin_unlock_irqrestore(&l2x0_lock, flags);
1301
1302 writel_relaxed(0, base + AURORA_SYNC_REG);
1311 start = range_end; 1303 start = range_end;
1312 } 1304 }
1313} 1305}
1306static void aurora_inv_range(unsigned long start, unsigned long end)
1307{
1308 aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
1309}
1314 1310
1315static void aurora_clean_range(unsigned long start, unsigned long end) 1311static void aurora_clean_range(unsigned long start, unsigned long end)
1316{ 1312{
@@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end)
1318 * If L2 is forced to WT, the L2 will always be clean and we 1314 * If L2 is forced to WT, the L2 will always be clean and we
1319 * don't need to do anything here. 1315 * don't need to do anything here.
1320 */ 1316 */
1321 if (!l2_wt_override) { 1317 if (!l2_wt_override)
1322 start &= ~(CACHE_LINE_SIZE - 1); 1318 aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG);
1323 end = ALIGN(end, CACHE_LINE_SIZE);
1324 while (start != end) {
1325 unsigned long range_end = calc_range_end(start, end);
1326 aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1327 AURORA_CLEAN_RANGE_REG);
1328 start = range_end;
1329 }
1330 }
1331} 1319}
1332 1320
1333static void aurora_flush_range(unsigned long start, unsigned long end) 1321static void aurora_flush_range(unsigned long start, unsigned long end)
1334{ 1322{
1335 start &= ~(CACHE_LINE_SIZE - 1); 1323 if (l2_wt_override)
1336 end = ALIGN(end, CACHE_LINE_SIZE); 1324 aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG);
1337 while (start != end) { 1325 else
1338 unsigned long range_end = calc_range_end(start, end); 1326 aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG);
1339 /*
1340 * If L2 is forced to WT, the L2 will always be clean and we
1341 * just need to invalidate.
1342 */
1343 if (l2_wt_override)
1344 aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1345 AURORA_INVAL_RANGE_REG);
1346 else
1347 aurora_pa_range(start, range_end - CACHE_LINE_SIZE,
1348 AURORA_FLUSH_RANGE_REG);
1349 start = range_end;
1350 }
1351} 1327}
1352 1328
1353static void aurora_flush_all(void) 1329static void aurora_flush_all(void)