diff options
author | Arnd Bergmann <arnd@arndb.de> | 2015-01-28 11:55:31 -0500 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-02-06 15:16:40 -0500 |
commit | 1d88967900b87f94435581dad4ae319686c6ce10 (patch) | |
tree | 7fc8d85ca097dfdb6a49276061ffb1466723ef0c | |
parent | 20e783e39e55c2615fb61d1b3d139ee9edcf6772 (diff) |
ARM: 8297/1: cache-l2x0: optimize aurora range operations
The aurora_inv_range(), aurora_clean_range() and aurora_flush_range()
functions are highly redundant, both in source and in object code, and
they are harder to understand than necessary.
By moving the range loop into the aurora_pa_range() function, they
become trivial wrappers, and the object code start looking like what
one would expect for an optimal implementation.
Further optimization may be possible by using the per-CPU "virtual"
registers to avoid the spinlocks in most cases.
(on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
DMA traffic by reading data from a SD card)
Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 68 |
1 files changed, 22 insertions, 46 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 404c598da27d..5ea2d6d417f7 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c | |||
@@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { | |||
1256 | * noninclusive, while the hardware cache range operations use | 1256 | * noninclusive, while the hardware cache range operations use |
1257 | * inclusive start and end addresses. | 1257 | * inclusive start and end addresses. |
1258 | */ | 1258 | */ |
1259 | static unsigned long calc_range_end(unsigned long start, unsigned long end) | 1259 | static unsigned long aurora_range_end(unsigned long start, unsigned long end) |
1260 | { | 1260 | { |
1261 | /* | 1261 | /* |
1262 | * Limit the number of cache lines processed at once, | 1262 | * Limit the number of cache lines processed at once, |
@@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) | |||
1275 | return end; | 1275 | return end; |
1276 | } | 1276 | } |
1277 | 1277 | ||
1278 | /* | ||
1279 | * Make sure 'start' and 'end' reference the same page, as L2 is PIPT | ||
1280 | * and range operations only do a TLB lookup on the start address. | ||
1281 | */ | ||
1282 | static void aurora_pa_range(unsigned long start, unsigned long end, | 1278 | static void aurora_pa_range(unsigned long start, unsigned long end, |
1283 | unsigned long offset) | 1279 | unsigned long offset) |
1284 | { | 1280 | { |
1285 | void __iomem *base = l2x0_base; | 1281 | void __iomem *base = l2x0_base; |
1282 | unsigned long range_end; | ||
1286 | unsigned long flags; | 1283 | unsigned long flags; |
1287 | 1284 | ||
1288 | raw_spin_lock_irqsave(&l2x0_lock, flags); | ||
1289 | writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); | ||
1290 | writel_relaxed(end, base + offset); | ||
1291 | raw_spin_unlock_irqrestore(&l2x0_lock, flags); | ||
1292 | |||
1293 | writel_relaxed(0, base + AURORA_SYNC_REG); | ||
1294 | } | ||
1295 | |||
1296 | static void aurora_inv_range(unsigned long start, unsigned long end) | ||
1297 | { | ||
1298 | /* | 1285 | /* |
1299 | * round start and end adresses up to cache line size | 1286 | * round start and end adresses up to cache line size |
1300 | */ | 1287 | */ |
@@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end) | |||
1302 | end = ALIGN(end, CACHE_LINE_SIZE); | 1289 | end = ALIGN(end, CACHE_LINE_SIZE); |
1303 | 1290 | ||
1304 | /* | 1291 | /* |
1305 | * Invalidate all full cache lines between 'start' and 'end'. | 1292 | * perform operation on all full cache lines between 'start' and 'end' |
1306 | */ | 1293 | */ |
1307 | while (start < end) { | 1294 | while (start < end) { |
1308 | unsigned long range_end = calc_range_end(start, end); | 1295 | range_end = aurora_range_end(start, end); |
1309 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | 1296 | |
1310 | AURORA_INVAL_RANGE_REG); | 1297 | raw_spin_lock_irqsave(&l2x0_lock, flags); |
1298 | writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); | ||
1299 | writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); | ||
1300 | raw_spin_unlock_irqrestore(&l2x0_lock, flags); | ||
1301 | |||
1302 | writel_relaxed(0, base + AURORA_SYNC_REG); | ||
1311 | start = range_end; | 1303 | start = range_end; |
1312 | } | 1304 | } |
1313 | } | 1305 | } |
1306 | static void aurora_inv_range(unsigned long start, unsigned long end) | ||
1307 | { | ||
1308 | aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); | ||
1309 | } | ||
1314 | 1310 | ||
1315 | static void aurora_clean_range(unsigned long start, unsigned long end) | 1311 | static void aurora_clean_range(unsigned long start, unsigned long end) |
1316 | { | 1312 | { |
@@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end) | |||
1318 | * If L2 is forced to WT, the L2 will always be clean and we | 1314 | * If L2 is forced to WT, the L2 will always be clean and we |
1319 | * don't need to do anything here. | 1315 | * don't need to do anything here. |
1320 | */ | 1316 | */ |
1321 | if (!l2_wt_override) { | 1317 | if (!l2_wt_override) |
1322 | start &= ~(CACHE_LINE_SIZE - 1); | 1318 | aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); |
1323 | end = ALIGN(end, CACHE_LINE_SIZE); | ||
1324 | while (start != end) { | ||
1325 | unsigned long range_end = calc_range_end(start, end); | ||
1326 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
1327 | AURORA_CLEAN_RANGE_REG); | ||
1328 | start = range_end; | ||
1329 | } | ||
1330 | } | ||
1331 | } | 1319 | } |
1332 | 1320 | ||
1333 | static void aurora_flush_range(unsigned long start, unsigned long end) | 1321 | static void aurora_flush_range(unsigned long start, unsigned long end) |
1334 | { | 1322 | { |
1335 | start &= ~(CACHE_LINE_SIZE - 1); | 1323 | if (l2_wt_override) |
1336 | end = ALIGN(end, CACHE_LINE_SIZE); | 1324 | aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); |
1337 | while (start != end) { | 1325 | else |
1338 | unsigned long range_end = calc_range_end(start, end); | 1326 | aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); |
1339 | /* | ||
1340 | * If L2 is forced to WT, the L2 will always be clean and we | ||
1341 | * just need to invalidate. | ||
1342 | */ | ||
1343 | if (l2_wt_override) | ||
1344 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
1345 | AURORA_INVAL_RANGE_REG); | ||
1346 | else | ||
1347 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
1348 | AURORA_FLUSH_RANGE_REG); | ||
1349 | start = range_end; | ||
1350 | } | ||
1351 | } | 1327 | } |
1352 | 1328 | ||
1353 | static void aurora_flush_all(void) | 1329 | static void aurora_flush_all(void) |