diff options
author | Arnd Bergmann <arnd@arndb.de> | 2014-05-23 12:50:19 -0400 |
---|---|---|
committer | Arnd Bergmann <arnd@arndb.de> | 2014-05-23 15:56:29 -0400 |
commit | 37f5f4e17399ffb572af3834b8045c5391a5bf0b (patch) | |
tree | 8f27a03601a41901e9d6f0c32260169b0167909b | |
parent | 650052b141fd5640176e54dd443e5ff13ed2b77f (diff) | |
parent | 07ae144be1b2ac45f893bc1ed3fe1a49f7128e46 (diff) |
Merge tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu into next/soc
Merge "mvebu SoC changes for v3.16 (incremental #2)" from Jason Cooper <jason@lakedaemon.net>:
- mvebu
- fix coherency on big-endian in -next
- hardware IO coherency
- L2/PCIe deadlock workaround
- small coherency cleanups
* tag 'mvebu-soc-3.16-2' of git://git.infradead.org/linux-mvebu:
ARM: mvebu: returns ll_get_cpuid() to ll_get_coherency_cpumask()
ARM: mvebu: improve comments in coherency_ll.S
ARM: mvebu: fix indentation of assembly instructions in coherency_ll.S
ARM: mvebu: fix big endian booting after coherency code rework
ARM: mvebu: coherency: fix registration of PCI bus notifier when !PCI
ARM: mvebu: implement L2/PCIe deadlock workaround
ARM: mvebu: use hardware I/O coherency also for PCI devices
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
-rw-r--r-- | arch/arm/mach-mvebu/coherency.c | 62 | ||||
-rw-r--r-- | arch/arm/mach-mvebu/coherency_ll.S | 77 |
2 files changed, 106 insertions, 33 deletions
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index d5a975b6a590..477202fd39cc 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c | |||
@@ -29,8 +29,10 @@ | |||
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/mbus.h> | 30 | #include <linux/mbus.h> |
31 | #include <linux/clk.h> | 31 | #include <linux/clk.h> |
32 | #include <linux/pci.h> | ||
32 | #include <asm/smp_plat.h> | 33 | #include <asm/smp_plat.h> |
33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
35 | #include <asm/mach/map.h> | ||
34 | #include "armada-370-xp.h" | 36 | #include "armada-370-xp.h" |
35 | #include "coherency.h" | 37 | #include "coherency.h" |
36 | #include "mvebu-soc-id.h" | 38 | #include "mvebu-soc-id.h" |
@@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = { | |||
274 | .set_dma_mask = arm_dma_set_mask, | 276 | .set_dma_mask = arm_dma_set_mask, |
275 | }; | 277 | }; |
276 | 278 | ||
277 | static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, | 279 | static int mvebu_hwcc_notifier(struct notifier_block *nb, |
278 | unsigned long event, void *__dev) | 280 | unsigned long event, void *__dev) |
279 | { | 281 | { |
280 | struct device *dev = __dev; | 282 | struct device *dev = __dev; |
281 | 283 | ||
@@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb, | |||
286 | return NOTIFY_OK; | 288 | return NOTIFY_OK; |
287 | } | 289 | } |
288 | 290 | ||
289 | static struct notifier_block mvebu_hwcc_platform_nb = { | 291 | static struct notifier_block mvebu_hwcc_nb = { |
290 | .notifier_call = mvebu_hwcc_platform_notifier, | 292 | .notifier_call = mvebu_hwcc_notifier, |
291 | }; | 293 | }; |
292 | 294 | ||
293 | static void __init armada_370_coherency_init(struct device_node *np) | 295 | static void __init armada_370_coherency_init(struct device_node *np) |
@@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np) | |||
308 | set_cpu_coherent(); | 310 | set_cpu_coherent(); |
309 | } | 311 | } |
310 | 312 | ||
313 | /* | ||
314 | * This ioremap hook is used on Armada 375/38x to ensure that PCIe | ||
315 | * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This | ||
316 | * is needed as a workaround for a deadlock issue between the PCIe | ||
317 | * interface and the cache controller. | ||
318 | */ | ||
319 | static void __iomem * | ||
320 | armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size, | ||
321 | unsigned int mtype, void *caller) | ||
322 | { | ||
323 | struct resource pcie_mem; | ||
324 | |||
325 | mvebu_mbus_get_pcie_mem_aperture(&pcie_mem); | ||
326 | |||
327 | if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end) | ||
328 | mtype = MT_UNCACHED; | ||
329 | |||
330 | return __arm_ioremap_caller(phys_addr, size, mtype, caller); | ||
331 | } | ||
332 | |||
311 | static void __init armada_375_380_coherency_init(struct device_node *np) | 333 | static void __init armada_375_380_coherency_init(struct device_node *np) |
312 | { | 334 | { |
335 | struct device_node *cache_dn; | ||
336 | |||
313 | coherency_cpu_base = of_iomap(np, 0); | 337 | coherency_cpu_base = of_iomap(np, 0); |
338 | arch_ioremap_caller = armada_pcie_wa_ioremap_caller; | ||
339 | |||
340 | /* | ||
341 | * Add the PL310 property "arm,io-coherent". This makes sure the | ||
342 | * outer sync operation is not used, which allows to | ||
343 | * workaround the system erratum that causes deadlocks when | ||
344 | * doing PCIe in an SMP situation on Armada 375 and Armada | ||
345 | * 38x. | ||
346 | */ | ||
347 | for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") { | ||
348 | struct property *p; | ||
349 | |||
350 | p = kzalloc(sizeof(*p), GFP_KERNEL); | ||
351 | p->name = kstrdup("arm,io-coherent", GFP_KERNEL); | ||
352 | of_add_property(cache_dn, p); | ||
353 | } | ||
314 | } | 354 | } |
315 | 355 | ||
316 | static int coherency_type(void) | 356 | static int coherency_type(void) |
@@ -375,9 +415,21 @@ static int __init coherency_late_init(void) | |||
375 | } | 415 | } |
376 | 416 | ||
377 | bus_register_notifier(&platform_bus_type, | 417 | bus_register_notifier(&platform_bus_type, |
378 | &mvebu_hwcc_platform_nb); | 418 | &mvebu_hwcc_nb); |
379 | 419 | ||
380 | return 0; | 420 | return 0; |
381 | } | 421 | } |
382 | 422 | ||
383 | postcore_initcall(coherency_late_init); | 423 | postcore_initcall(coherency_late_init); |
424 | |||
425 | #if IS_ENABLED(CONFIG_PCI) | ||
426 | static int __init coherency_pci_init(void) | ||
427 | { | ||
428 | if (coherency_available()) | ||
429 | bus_register_notifier(&pci_bus_type, | ||
430 | &mvebu_hwcc_nb); | ||
431 | return 0; | ||
432 | } | ||
433 | |||
434 | arch_initcall(coherency_pci_init); | ||
435 | #endif | ||
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 6828f9f157b0..510c29e079ca 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S | |||
@@ -24,52 +24,69 @@ | |||
24 | #include <asm/cp15.h> | 24 | #include <asm/cp15.h> |
25 | 25 | ||
26 | .text | 26 | .text |
27 | /* Returns with the coherency address in r1 (r0 is untouched)*/ | 27 | /* Returns the coherency base address in r1 (r0 is untouched) */ |
28 | ENTRY(ll_get_coherency_base) | 28 | ENTRY(ll_get_coherency_base) |
29 | mrc p15, 0, r1, c1, c0, 0 | 29 | mrc p15, 0, r1, c1, c0, 0 |
30 | tst r1, #CR_M @ Check MMU bit enabled | 30 | tst r1, #CR_M @ Check MMU bit enabled |
31 | bne 1f | 31 | bne 1f |
32 | 32 | ||
33 | /* use physical address of the coherency register */ | 33 | /* |
34 | * MMU is disabled, use the physical address of the coherency | ||
35 | * base address. | ||
36 | */ | ||
34 | adr r1, 3f | 37 | adr r1, 3f |
35 | ldr r3, [r1] | 38 | ldr r3, [r1] |
36 | ldr r1, [r1, r3] | 39 | ldr r1, [r1, r3] |
37 | b 2f | 40 | b 2f |
38 | 1: | 41 | 1: |
39 | /* use virtual address of the coherency register */ | 42 | /* |
43 | * MMU is enabled, use the virtual address of the coherency | ||
44 | * base address. | ||
45 | */ | ||
40 | ldr r1, =coherency_base | 46 | ldr r1, =coherency_base |
41 | ldr r1, [r1] | 47 | ldr r1, [r1] |
42 | 2: | 48 | 2: |
43 | mov pc, lr | 49 | mov pc, lr |
44 | ENDPROC(ll_get_coherency_base) | 50 | ENDPROC(ll_get_coherency_base) |
45 | 51 | ||
46 | /* Returns with the CPU ID in r3 (r0 is untouched)*/ | 52 | /* |
47 | ENTRY(ll_get_cpuid) | 53 | * Returns the coherency CPU mask in r3 (r0 is untouched). This |
54 | * coherency CPU mask can be used with the coherency fabric | ||
55 | * configuration and control registers. Note that the mask is already | ||
56 | * endian-swapped as appropriate so that the calling functions do not | ||
57 | * have to care about endianness issues while accessing the coherency | ||
58 | * fabric registers | ||
59 | */ | ||
60 | ENTRY(ll_get_coherency_cpumask) | ||
48 | mrc 15, 0, r3, cr0, cr0, 5 | 61 | mrc 15, 0, r3, cr0, cr0, 5 |
49 | and r3, r3, #15 | 62 | and r3, r3, #15 |
50 | mov r2, #(1 << 24) | 63 | mov r2, #(1 << 24) |
51 | lsl r3, r2, r3 | 64 | lsl r3, r2, r3 |
52 | ARM_BE8(rev r1, r1) | 65 | ARM_BE8(rev r3, r3) |
53 | mov pc, lr | 66 | mov pc, lr |
54 | ENDPROC(ll_get_cpuid) | 67 | ENDPROC(ll_get_coherency_cpumask) |
55 | 68 | ||
56 | /* ll_add_cpu_to_smp_group, ll_enable_coherency and | 69 | /* |
57 | * ll_disable_coherency use strex/ldrex whereas MMU can be off. The | 70 | * ll_add_cpu_to_smp_group(), ll_enable_coherency() and |
58 | * Armada XP SoC has an exclusive monitor that can track transactions | 71 | * ll_disable_coherency() use the strex/ldrex instructions while the |
59 | * to Device and/or SO and as such also when MMU is disabled the | 72 | * MMU can be disabled. The Armada XP SoC has an exclusive monitor |
60 | * exclusive transactions will be functional | 73 | * that tracks transactions to Device and/or SO memory and thanks to |
74 | * that, exclusive transactions are functional even when the MMU is | ||
75 | * disabled. | ||
61 | */ | 76 | */ |
62 | 77 | ||
63 | ENTRY(ll_add_cpu_to_smp_group) | 78 | ENTRY(ll_add_cpu_to_smp_group) |
64 | /* | 79 | /* |
65 | * r0 being untouched in ll_get_coherency_base and | 80 | * As r0 is not modified by ll_get_coherency_base() and |
66 | * ll_get_cpuid, we can use it to save lr modifing it with the | 81 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
67 | * following bl | 82 | * and avoid it being modified by the branch and link |
83 | * calls. This function is used very early in the secondary | ||
84 | * CPU boot, and no stack is available at this point. | ||
68 | */ | 85 | */ |
69 | mov r0, lr | 86 | mov r0, lr |
70 | bl ll_get_coherency_base | 87 | bl ll_get_coherency_base |
71 | bl ll_get_cpuid | 88 | bl ll_get_coherency_cpumask |
72 | mov lr, r0 | 89 | mov lr, r0 |
73 | add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET | 90 | add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET |
74 | 1: | 91 | 1: |
75 | ldrex r2, [r0] | 92 | ldrex r2, [r0] |
@@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group) | |||
82 | 99 | ||
83 | ENTRY(ll_enable_coherency) | 100 | ENTRY(ll_enable_coherency) |
84 | /* | 101 | /* |
85 | * r0 being untouched in ll_get_coherency_base and | 102 | * As r0 is not modified by ll_get_coherency_base() and |
86 | * ll_get_cpuid, we can use it to save lr modifing it with the | 103 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
87 | * following bl | 104 | * and avoid it being modified by the branch and link |
105 | * calls. This function is used very early in the secondary | ||
106 | * CPU boot, and no stack is available at this point. | ||
88 | */ | 107 | */ |
89 | mov r0, lr | 108 | mov r0, lr |
90 | bl ll_get_coherency_base | 109 | bl ll_get_coherency_base |
91 | bl ll_get_cpuid | 110 | bl ll_get_coherency_cpumask |
92 | mov lr, r0 | 111 | mov lr, r0 |
93 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET | 112 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET |
94 | 1: | 113 | 1: |
@@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency) | |||
104 | 123 | ||
105 | ENTRY(ll_disable_coherency) | 124 | ENTRY(ll_disable_coherency) |
106 | /* | 125 | /* |
107 | * r0 being untouched in ll_get_coherency_base and | 126 | * As r0 is not modified by ll_get_coherency_base() and |
108 | * ll_get_cpuid, we can use it to save lr modifing it with the | 127 | * ll_get_coherency_cpumask(), we use it to temporarly save lr |
109 | * following bl | 128 | * and avoid it being modified by the branch and link |
129 | * calls. This function is used very early in the secondary | ||
130 | * CPU boot, and no stack is available at this point. | ||
110 | */ | 131 | */ |
111 | mov r0, lr | 132 | mov r0, lr |
112 | bl ll_get_coherency_base | 133 | bl ll_get_coherency_base |
113 | bl ll_get_cpuid | 134 | bl ll_get_coherency_cpumask |
114 | mov lr, r0 | 135 | mov lr, r0 |
115 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET | 136 | add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET |
116 | 1: | 137 | 1: |
117 | ldrex r2, [r0] | 138 | ldrex r2, [r0] |