diff options
| -rw-r--r-- | arch/arm/mach-mvebu/coherency.c | 62 | ||||
| -rw-r--r-- | arch/arm/mach-mvebu/coherency_ll.S | 77 |
2 files changed, 106 insertions, 33 deletions
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index d5a975b6a590..477202fd39cc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c | |||
| @@ -29,8 +29,10 @@ | |||
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include <linux/mbus.h> | 30 | #include <linux/mbus.h> |
| 31 | #include <linux/clk.h> | 31 | #include <linux/clk.h> |
| 32 | #include <linux/pci.h> | ||
| 32 | #include <asm/smp_plat.h> | 33 | #include <asm/smp_plat.h> |
| 33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
| 35 | #include <asm/mach/map.h> | ||
| 34 | #include "armada-370-xp.h" | 36 | #include "armada-370-xp.h" |
| 35 | #include "coherency.h" | 37 | #include "coherency.h" |
| 36 | #include "mvebu-soc-id.h" | 38 | #include "mvebu-soc-id.h" |
| @@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = { | |||
| 274 | .set_dma_mask = arm_dma_set_mask, | 276 | .set_dma_mask = arm_dma_set_mask, |
| 275 | }; | 277 | }; |
| 276 | 278 | ||
| 277 | static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, | 279 | static int mvebu_hwcc_notifier(struct notifier_block *nb, |
| 278 | unsigned long event, void *__dev) | 280 | unsigned long event, void *__dev) |
| 279 | { | 281 | { |
| 280 | struct device *dev = __dev; | 282 | struct device *dev = __dev; |
| 281 | 283 | ||
| @@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, | |||
| 286 | return NOTIFY_OK; | 288 | return NOTIFY_OK; |
| 287 | } | 289 | } |
| 288 | 290 | ||
| 289 | static struct notifier_block mvebu_hwcc_platform_nb = { | 291 | static struct notifier_block mvebu_hwcc_nb = { |
| 290 | .notifier_call = mvebu_hwcc_platform_notifier, | 292 | .notifier_call = mvebu_hwcc_notifier, |
| 291 | }; | 293 | }; |
| 292 | 294 | ||
| 293 | static void __init armada_370_coherency_init(struct device_node *np) | 295 | static void __init armada_370_coherency_init(struct device_node *np) |
| @@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np) | |||
| 308 | set_cpu_coherent(); | 310 | set_cpu_coherent(); |
| 309 | } | 311 | } |
| 310 | 312 | ||
| 313 | /* | ||
| 314 | * This ioremap hook is used on Armada 375/38x to ensure that PCIe | ||
| 315 | * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This | ||
| 316 | * is needed as a workaround for a deadlock issue between the PCIe | ||
| 317 | * interface and the cache controller. | ||
| 318 | */ | ||
| 319 | static void __iomem * | ||
| 320 | armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, | ||
| 321 | unsigned int mtype, void *caller) | ||
| 322 | { | ||
| 323 | struct resource pcie_mem; | ||
| 324 | |||
| 325 | mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); | ||
| 326 | |||
| 327 | if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) | ||
| 328 | mtype = MT_UNCACHED; | ||
| 329 | |||
| 330 | return __arm_ioremap_caller(phys_addr, size, mtype, caller); | ||
| 331 | } | ||
| 332 | |||
| 311 | static void __init armada_375_380_coherency_init(struct device_node *np) | 333 | static void __init armada_375_380_coherency_init(struct device_node *np) |
| 312 | { | 334 | { |
| 335 | struct device_node *cache_dn; | ||
| 336 | |||
| 313 | coherency_cpu_base = of_iomap(np, 0); | 337 | coherency_cpu_base = of_iomap(np, 0); |
| 338 | arch_ioremap_caller = armada_pcie_wa_ioremap_caller; | ||
| 339 | |||
| 340 | /* | ||
| 341 | * Add the PL310 property "arm,io-coherent". This makes sure the | ||
| 342 | * outer sync operation is not used, which allows to | ||
| 343 | * workaround the system erratum that causes deadlocks when | ||
| 344 | * doing PCIe in an SMP situation on Armada 375 and Armada | ||
| 345 | * 38x. | ||
| 346 | */ | ||
| 347 | for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") { | ||
| 348 | struct property *p; | ||
| 349 | |||
| 350 | p = kzalloc(sizeof(*p), GFP_KERNEL); | ||
| 351 | p->name = kstrdup("arm,io-coherent", GFP_KERNEL); | ||
| 352 | of_add_property(cache_dn, p); | ||
| 353 | } | ||
| 314 | } | 354 | } |
| 315 | 355 | ||
| 316 | static int coherency_type(void) | 356 | static int coherency_type(void) |
| @@ -375,9 +415,21 @@ static int __init coherency_late_init(void) | |||
| 375 | } | 415 | } |
| 376 | 416 | ||
| 377 | bus_register_notifier(&platform_bus_type, | 417 | bus_register_notifier(&platform_bus_type, |
| 378 | &mvebu_hwcc_platform_nb); | 418 | &mvebu_hwcc_nb); |
| 379 | 419 | ||
| 380 | return 0; | 420 | return 0; |
| 381 | } | 421 | } |
| 382 | 422 | ||
| 383 | postcore_initcall(coherency_late_init); | 423 | postcore_initcall(coherency_late_init); |
| 424 | |||
| 425 | #if IS_ENABLED(CONFIG_PCI) | ||
| 426 | static int __init coherency_pci_init(void) | ||
| 427 | { | ||
| 428 | if (coherency_available()) | ||
| 429 | bus_register_notifier(&pci_bus_type, | ||
| 430 | &mvebu_hwcc_nb); | ||
| 431 | return 0; | ||
| 432 | } | ||
| 433 | |||
| 434 | arch_initcall(coherency_pci_init); | ||
| 435 | #endif | ||
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 6828f9f157b0..510c29e079ca 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S | |||
| @@ -24,52 +24,69 @@ | |||
| 24 | #include <asm/cp15.h> | 24 | #include <asm/cp15.h> |
| 25 | 25 | ||
| 26 | .text | 26 | .text |
| 27 | /* Returns with the coherency address in r1 (r0 is untouched)*/ | 27 | /* Returns the coherency base address in r1 (r0 is untouched) */ |
| 28 | ENTRY(ll_get_coherency_base) | 28 | ENTRY(ll_get_coherency_base) |
| 29 | mrc p15, 0, r1, c1, c0, 0 | 29 | mrc p15, 0, r1, c1, c0, 0 |
| 30 | tst r1, #CR_M @ Check MMU bit enabled | 30 | tst r1, #CR_M @ Check MMU bit enabled |
| 31 | bne 1f | 31 | bne 1f |
| 32 | 32 | ||
| 33 | /* use physical address of the coherency register */ | 33 | /* |
| 34 | * MMU is disabled, use the physical address of the coherency | ||
| 35 | * base address. | ||
| 36 | */ | ||
| 34 | adr r1, 3f | 37 | adr r1, 3f |
| 35 | ldr r3, [r1] | 38 | ldr r3, [r1] |
| 36 | ldr r1, [r1, r3] | 39 | ldr r1, [r1, r3] |
| 37 | b 2f | 40 | b 2f |
| 38 | 1: | 41 | 1: |
| 39 | /* use virtual address of the coherency register */ | 42 | /* |
| 43 | * MMU is enabled, use the virtual address of the coherency | ||
| 44 | * base address. | ||
| 45 | */ | ||
| 40 | ldr r1, =coherency_base | 46 | ldr r1, =coherency_base |
| 41 | ldr r1, [r1] | 47 | ldr r1, [r1] |
| 42 | 2: | 48 | 2: |
| 43 | mov pc, lr | 49 | mov pc, lr |
| 44 | ENDPROC(ll_get_coherency_base) | 50 | ENDPROC(ll_get_coherency_base) |
| 45 | 51 | ||
| 46 | /* Returns with the CPU ID in r3 (r0 is untouched)*/ | 52 | /* |
| 47 | ENTRY(ll_get_cpuid) | 53 | * Returns the coherency CPU mask in r3 (r0 is untouched). This |
| 54 | * coherency CPU mask can be used with the coherency fabric | ||
| 55 | * configuration and control registers. Note that the mask is already | ||
| 56 | * endian-swapped as appropriate so that the calling functions do not | ||
| 57 | * have to care about endianness issues while accessing the coherency | ||
| 58 | * fabric registers | ||
| 59 | */ | ||
| 60 | ENTRY(ll_get_coherency_cpumask) | ||
| 48 | mrc 15, 0, r3, cr0, cr0, 5 | 61 | mrc 15, 0, r3, cr0, cr0, 5 |
| 49 | and r3, r3, #15 | 62 | and r3, r3, #15 |
| 50 | mov r2, #(1 << 24) | 63 | mov r2, #(1 << 24) |
| 51 | lsl r3, r2, r3 | 64 | lsl r3, r2, r3 |
| 52 | ARM_BE8(rev r1, r1) | 65 | ARM_BE8(rev r3, r3) |
| 53 | mov pc, lr | 66 | mov pc, lr |
| 54 | ENDPROC(ll_get_cpuid) | 67 | ENDPROC(ll_get_coherency_cpumask) |
| 55 | 68 | ||
| 56 | /* ll_add_cpu_to_smp_group, ll_enable_coherency and | 69 | /* |
| 57 | * ll_disable_coherency use strex/ldrex whereas MMU can be off. The | 70 | * ll_add_cpu_to_smp_group(), ll_enable_coherency() and |
| 58 | * Armada XP SoC has an exclusive monitor that can track transactions | 71 | * ll_disable_coherency() use the strex/ldrex instructions while the |
| 59 | * to Device and/or SO and as such also when MMU is disabled the | 72 | * MMU can be disabled. The Armada XP SoC has an exclusive monitor |
| 60 | * exclusive transactions will be functional | 73 | * that tracks transactions to Device and/or SO memory and thanks to |
| 74 | * that, exclusive transactions are functional even when the MMU is | ||
| 75 | * disabled. | ||
| 61 | */ | 76 | */ |
| 62 | 77 | ||
| 63 | ENTRY(ll_add_cpu_to_smp_group) | 78 | ENTRY(ll_add_cpu_to_smp_group) |
| 64 | /* | 79 | /* |
| 65 | * r0 being untouched in ll_get_coherency_base and | 80 | * As r0 is not modified by ll_get_coherency_base() and |
| 66 | * ll_get_cpuid, we can use it to save lr modifing it with the | 81 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
| 67 | * following bl | 82 | * and avoid it being modified by the branch and link |
| 83 | * calls. This function is used very early in the secondary | ||
| 84 | * CPU boot, and no stack is available at this point. | ||
| 68 | */ | 85 | */ |
| 69 | mov r0, lr | 86 | mov r0, lr |
| 70 | bl ll_get_coherency_base | 87 | bl ll_get_coherency_base |
| 71 | bl ll_get_cpuid | 88 | bl ll_get_coherency_cpumask |
| 72 | mov lr, r0 | 89 | mov lr, r0 |
| 73 | add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET | 90 | add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET |
| 74 | 1: | 91 | 1: |
| 75 | ldrex r2, [r0] | 92 | ldrex r2, [r0] |
| @@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group) | |||
| 82 | 99 | ||
| 83 | ENTRY(ll_enable_coherency) | 100 | ENTRY(ll_enable_coherency) |
| 84 | /* | 101 | /* |
| 85 | * r0 being untouched in ll_get_coherency_base and | 102 | * As r0 is not modified by ll_get_coherency_base() and |
| 86 | * ll_get_cpuid, we can use it to save lr modifing it with the | 103 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
| 87 | * following bl | 104 | * and avoid it being modified by the branch and link |
| 105 | * calls. This function is used very early in the secondary | ||
| 106 | * CPU boot, and no stack is available at this point. | ||
| 88 | */ | 107 | */ |
| 89 | mov r0, lr | 108 | mov r0, lr |
| 90 | bl ll_get_coherency_base | 109 | bl ll_get_coherency_base |
| 91 | bl ll_get_cpuid | 110 | bl ll_get_coherency_cpumask |
| 92 | mov lr, r0 | 111 | mov lr, r0 |
| 93 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET | 112 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET |
| 94 | 1: | 113 | 1: |
| @@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency) | |||
| 104 | 123 | ||
| 105 | ENTRY(ll_disable_coherency) | 124 | ENTRY(ll_disable_coherency) |
| 106 | /* | 125 | /* |
| 107 | * r0 being untouched in ll_get_coherency_base and | 126 | * As r0 is not modified by ll_get_coherency_base() and |
| 108 | * ll_get_cpuid, we can use it to save lr modifing it with the | 127 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
| 109 | * following bl | 128 | * and avoid it being modified by the branch and link |
| 129 | * calls. This function is used very early in the secondary | ||
| 130 | * CPU boot, and no stack is available at this point. | ||
| 110 | */ | 131 | */ |
| 111 | mov r0, lr | 132 | mov r0, lr |
| 112 | bl ll_get_coherency_base | 133 | bl ll_get_coherency_base |
| 113 | bl ll_get_cpuid | 134 | bl ll_get_coherency_cpumask |
| 114 | mov lr, r0 | 135 | mov lr, r0 |
| 115 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET | 136 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET |
| 116 | 1: | 137 | 1: |
| 117 | ldrex r2, [r0] | 138 | ldrex r2, [r0] |
