diff options
| author | Arnd Bergmann <arnd@arndb.de> | 2015-01-28 11:55:31 -0500 |
|---|---|---|
| committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2015-02-06 15:16:40 -0500 |
| commit | 1d88967900b87f94435581dad4ae319686c6ce10 (patch) | |
| tree | 7fc8d85ca097dfdb6a49276061ffb1466723ef0c | |
| parent | 20e783e39e55c2615fb61d1b3d139ee9edcf6772 (diff) | |
ARM: 8297/1: cache-l2x0: optimize aurora range operations
The aurora_inv_range(), aurora_clean_range() and aurora_flush_range()
functions are highly redundant, both in source and in object code, and
they are harder to understand than necessary.
By moving the range loop into the aurora_pa_range() function, they
become trivial wrappers, and the object code start looking like what
one would expect for an optimal implementation.
Further optimization may be possible by using the per-CPU "virtual"
registers to avoid the spinlocks in most cases.
(on Armada 370 RD and Armada XP GP, boot tested, plus a little bit of
DMA traffic by reading data from a SD card)
Reviewed-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Tested-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
| -rw-r--r-- | arch/arm/mm/cache-l2x0.c | 68 |
1 files changed, 22 insertions, 46 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 404c598da27d..5ea2d6d417f7 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c | |||
| @@ -1256,7 +1256,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { | |||
| 1256 | * noninclusive, while the hardware cache range operations use | 1256 | * noninclusive, while the hardware cache range operations use |
| 1257 | * inclusive start and end addresses. | 1257 | * inclusive start and end addresses. |
| 1258 | */ | 1258 | */ |
| 1259 | static unsigned long calc_range_end(unsigned long start, unsigned long end) | 1259 | static unsigned long aurora_range_end(unsigned long start, unsigned long end) |
| 1260 | { | 1260 | { |
| 1261 | /* | 1261 | /* |
| 1262 | * Limit the number of cache lines processed at once, | 1262 | * Limit the number of cache lines processed at once, |
| @@ -1275,26 +1275,13 @@ static unsigned long calc_range_end(unsigned long start, unsigned long end) | |||
| 1275 | return end; | 1275 | return end; |
| 1276 | } | 1276 | } |
| 1277 | 1277 | ||
| 1278 | /* | ||
| 1279 | * Make sure 'start' and 'end' reference the same page, as L2 is PIPT | ||
| 1280 | * and range operations only do a TLB lookup on the start address. | ||
| 1281 | */ | ||
| 1282 | static void aurora_pa_range(unsigned long start, unsigned long end, | 1278 | static void aurora_pa_range(unsigned long start, unsigned long end, |
| 1283 | unsigned long offset) | 1279 | unsigned long offset) |
| 1284 | { | 1280 | { |
| 1285 | void __iomem *base = l2x0_base; | 1281 | void __iomem *base = l2x0_base; |
| 1282 | unsigned long range_end; | ||
| 1286 | unsigned long flags; | 1283 | unsigned long flags; |
| 1287 | 1284 | ||
| 1288 | raw_spin_lock_irqsave(&l2x0_lock, flags); | ||
| 1289 | writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); | ||
| 1290 | writel_relaxed(end, base + offset); | ||
| 1291 | raw_spin_unlock_irqrestore(&l2x0_lock, flags); | ||
| 1292 | |||
| 1293 | writel_relaxed(0, base + AURORA_SYNC_REG); | ||
| 1294 | } | ||
| 1295 | |||
| 1296 | static void aurora_inv_range(unsigned long start, unsigned long end) | ||
| 1297 | { | ||
| 1298 | /* | 1285 | /* |
| 1299 | * round start and end adresses up to cache line size | 1286 | * round start and end adresses up to cache line size |
| 1300 | */ | 1287 | */ |
| @@ -1302,15 +1289,24 @@ static void aurora_inv_range(unsigned long start, unsigned long end) | |||
| 1302 | end = ALIGN(end, CACHE_LINE_SIZE); | 1289 | end = ALIGN(end, CACHE_LINE_SIZE); |
| 1303 | 1290 | ||
| 1304 | /* | 1291 | /* |
| 1305 | * Invalidate all full cache lines between 'start' and 'end'. | 1292 | * perform operation on all full cache lines between 'start' and 'end' |
| 1306 | */ | 1293 | */ |
| 1307 | while (start < end) { | 1294 | while (start < end) { |
| 1308 | unsigned long range_end = calc_range_end(start, end); | 1295 | range_end = aurora_range_end(start, end); |
| 1309 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | 1296 | |
| 1310 | AURORA_INVAL_RANGE_REG); | 1297 | raw_spin_lock_irqsave(&l2x0_lock, flags); |
| 1298 | writel_relaxed(start, base + AURORA_RANGE_BASE_ADDR_REG); | ||
| 1299 | writel_relaxed(range_end - CACHE_LINE_SIZE, base + offset); | ||
| 1300 | raw_spin_unlock_irqrestore(&l2x0_lock, flags); | ||
| 1301 | |||
| 1302 | writel_relaxed(0, base + AURORA_SYNC_REG); | ||
| 1311 | start = range_end; | 1303 | start = range_end; |
| 1312 | } | 1304 | } |
| 1313 | } | 1305 | } |
| 1306 | static void aurora_inv_range(unsigned long start, unsigned long end) | ||
| 1307 | { | ||
| 1308 | aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); | ||
| 1309 | } | ||
| 1314 | 1310 | ||
| 1315 | static void aurora_clean_range(unsigned long start, unsigned long end) | 1311 | static void aurora_clean_range(unsigned long start, unsigned long end) |
| 1316 | { | 1312 | { |
| @@ -1318,36 +1314,16 @@ static void aurora_clean_range(unsigned long start, unsigned long end) | |||
| 1318 | * If L2 is forced to WT, the L2 will always be clean and we | 1314 | * If L2 is forced to WT, the L2 will always be clean and we |
| 1319 | * don't need to do anything here. | 1315 | * don't need to do anything here. |
| 1320 | */ | 1316 | */ |
| 1321 | if (!l2_wt_override) { | 1317 | if (!l2_wt_override) |
| 1322 | start &= ~(CACHE_LINE_SIZE - 1); | 1318 | aurora_pa_range(start, end, AURORA_CLEAN_RANGE_REG); |
| 1323 | end = ALIGN(end, CACHE_LINE_SIZE); | ||
| 1324 | while (start != end) { | ||
| 1325 | unsigned long range_end = calc_range_end(start, end); | ||
| 1326 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
| 1327 | AURORA_CLEAN_RANGE_REG); | ||
| 1328 | start = range_end; | ||
| 1329 | } | ||
| 1330 | } | ||
| 1331 | } | 1319 | } |
| 1332 | 1320 | ||
| 1333 | static void aurora_flush_range(unsigned long start, unsigned long end) | 1321 | static void aurora_flush_range(unsigned long start, unsigned long end) |
| 1334 | { | 1322 | { |
| 1335 | start &= ~(CACHE_LINE_SIZE - 1); | 1323 | if (l2_wt_override) |
| 1336 | end = ALIGN(end, CACHE_LINE_SIZE); | 1324 | aurora_pa_range(start, end, AURORA_INVAL_RANGE_REG); |
| 1337 | while (start != end) { | 1325 | else |
| 1338 | unsigned long range_end = calc_range_end(start, end); | 1326 | aurora_pa_range(start, end, AURORA_FLUSH_RANGE_REG); |
| 1339 | /* | ||
| 1340 | * If L2 is forced to WT, the L2 will always be clean and we | ||
| 1341 | * just need to invalidate. | ||
| 1342 | */ | ||
| 1343 | if (l2_wt_override) | ||
| 1344 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
| 1345 | AURORA_INVAL_RANGE_REG); | ||
| 1346 | else | ||
| 1347 | aurora_pa_range(start, range_end - CACHE_LINE_SIZE, | ||
| 1348 | AURORA_FLUSH_RANGE_REG); | ||
| 1349 | start = range_end; | ||
| 1350 | } | ||
| 1351 | } | 1327 | } |
| 1352 | 1328 | ||
| 1353 | static void aurora_flush_all(void) | 1329 | static void aurora_flush_all(void) |
