aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMikulas Patocka <mpatocka@redhat.com>2019-04-27 18:09:53 -0400
committerHelge Deller <deller@gmx.de>2019-05-03 17:47:41 -0400
commitb37d1c1898b288c69f3dc9267bc2c41af06f4a4b (patch)
treed041b4796035a447687ae8b1bc7d904c39f8729b
parentd19a12906e5e558c0f6b6cfece7b7caf1012ef95 (diff)
parisc: Use per-pagetable spinlock
PA-RISC uses a global spinlock to protect pagetable updates in the TLB fault handlers. When multiple cores are taking TLB faults simultaneously, the cache line containing the spinlock becomes a bottleneck. This patch embeds the spinlock in the top level page directory, so that every process has its own lock. It improves performance by 30% when doing parallel compilations. At least on the N class systems, only one PxTLB inter processor broadcast can be active at any one time on the Merced bus. If a Merced bus is found, this patch serializes the TLB flushes with the pa_tlb_flush_lock spinlock. v1: Initial patch by Mikulas v2: Added Merced detection by Helge v3: Revised TLB serialization by Dave & Helge Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: John David Anglin <dave.anglin@bell.net> Signed-off-by: Helge Deller <deller@gmx.de>
-rw-r--r--arch/parisc/include/asm/hardware.h2
-rw-r--r--arch/parisc/include/asm/pgalloc.h1
-rw-r--r--arch/parisc/include/asm/pgtable.h69
-rw-r--r--arch/parisc/include/asm/tlbflush.h24
-rw-r--r--arch/parisc/kernel/cache.c15
-rw-r--r--arch/parisc/kernel/drivers.c25
-rw-r--r--arch/parisc/kernel/entry.S8
-rw-r--r--arch/parisc/kernel/inventory.c7
-rw-r--r--arch/parisc/kernel/setup.c6
9 files changed, 108 insertions, 49 deletions
diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h
index d6e1ed145031..9d3d7737c58b 100644
--- a/arch/parisc/include/asm/hardware.h
+++ b/arch/parisc/include/asm/hardware.h
@@ -120,7 +120,7 @@ extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
120extern void init_parisc_bus(void); 120extern void init_parisc_bus(void);
121extern struct device *hwpath_to_device(struct hardware_path *modpath); 121extern struct device *hwpath_to_device(struct hardware_path *modpath);
122extern void device_to_hwpath(struct device *dev, struct hardware_path *path); 122extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
123 123extern int machine_has_merced_bus(void);
124 124
125/* inventory.c: */ 125/* inventory.c: */
126extern void do_memory_inventory(void); 126extern void do_memory_inventory(void);
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d05c678c77c4..ea75cc966dae 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
41 __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); 41 __pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
42#endif 42#endif
43 } 43 }
44 spin_lock_init(pgd_spinlock(actual_pgd));
44 return actual_pgd; 45 return actual_pgd;
45} 46}
46 47
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index c7bb74e22436..a39b079e73f2 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -17,7 +17,7 @@
17#include <asm/processor.h> 17#include <asm/processor.h>
18#include <asm/cache.h> 18#include <asm/cache.h>
19 19
20extern spinlock_t pa_tlb_lock; 20static inline spinlock_t *pgd_spinlock(pgd_t *);
21 21
22/* 22/*
23 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel 23 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -34,16 +34,46 @@ extern spinlock_t pa_tlb_lock;
34 */ 34 */
35#define kern_addr_valid(addr) (1) 35#define kern_addr_valid(addr) (1)
36 36
37/* Purge data and instruction TLB entries. Must be called holding 37/* This is for the serialization of PxTLB broadcasts. At least on the N class
38 * the pa_tlb_lock. The TLB purge instructions are slow on SMP 38 * systems, only one PxTLB inter processor broadcast can be active at any one
39 * machines since the purge must be broadcast to all CPUs. 39 * time on the Merced bus.
40
41 * PTE updates are protected by locks in the PMD.
42 */
43extern spinlock_t pa_tlb_flush_lock;
44extern spinlock_t pa_swapper_pg_lock;
45#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
46extern int pa_serialize_tlb_flushes;
47#else
48#define pa_serialize_tlb_flushes (0)
49#endif
50
51#define purge_tlb_start(flags) do { \
52 if (pa_serialize_tlb_flushes) \
53 spin_lock_irqsave(&pa_tlb_flush_lock, flags); \
54 else \
55 local_irq_save(flags); \
56 } while (0)
57#define purge_tlb_end(flags) do { \
58 if (pa_serialize_tlb_flushes) \
59 spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \
60 else \
61 local_irq_restore(flags); \
62 } while (0)
63
64/* Purge data and instruction TLB entries. The TLB purge instructions
65 * are slow on SMP machines since the purge must be broadcast to all CPUs.
40 */ 66 */
41 67
42static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) 68static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
43{ 69{
70 unsigned long flags;
71
72 purge_tlb_start(flags);
44 mtsp(mm->context, 1); 73 mtsp(mm->context, 1);
45 pdtlb(addr); 74 pdtlb(addr);
46 pitlb(addr); 75 pitlb(addr);
76 purge_tlb_end(flags);
47} 77}
48 78
49/* Certain architectures need to do special things when PTEs 79/* Certain architectures need to do special things when PTEs
@@ -59,11 +89,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
59 do { \ 89 do { \
60 pte_t old_pte; \ 90 pte_t old_pte; \
61 unsigned long flags; \ 91 unsigned long flags; \
62 spin_lock_irqsave(&pa_tlb_lock, flags); \ 92 spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
63 old_pte = *ptep; \ 93 old_pte = *ptep; \
64 set_pte(ptep, pteval); \ 94 set_pte(ptep, pteval); \
65 purge_tlb_entries(mm, addr); \ 95 purge_tlb_entries(mm, addr); \
66 spin_unlock_irqrestore(&pa_tlb_lock, flags); \ 96 spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
67 } while (0) 97 } while (0)
68 98
69#endif /* !__ASSEMBLY__ */ 99#endif /* !__ASSEMBLY__ */
@@ -88,10 +118,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
88#if CONFIG_PGTABLE_LEVELS == 3 118#if CONFIG_PGTABLE_LEVELS == 3
89#define PGD_ORDER 1 /* Number of pages per pgd */ 119#define PGD_ORDER 1 /* Number of pages per pgd */
90#define PMD_ORDER 1 /* Number of pages per pmd */ 120#define PMD_ORDER 1 /* Number of pages per pmd */
91#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ 121#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */
92#else 122#else
93#define PGD_ORDER 1 /* Number of pages per pgd */ 123#define PGD_ORDER 1 /* Number of pages per pgd */
94#define PGD_ALLOC_ORDER PGD_ORDER 124#define PGD_ALLOC_ORDER (PGD_ORDER + 1)
95#endif 125#endif
96 126
97/* Definitions for 3rd level (we use PLD here for Page Lower directory 127/* Definitions for 3rd level (we use PLD here for Page Lower directory
@@ -459,6 +489,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
459#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 489#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
460#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 490#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
461 491
492
493static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
494{
495 if (unlikely(pgd == swapper_pg_dir))
496 return &pa_swapper_pg_lock;
497 return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
498}
499
500
462static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) 501static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
463{ 502{
464 pte_t pte; 503 pte_t pte;
@@ -467,15 +506,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
467 if (!pte_young(*ptep)) 506 if (!pte_young(*ptep))
468 return 0; 507 return 0;
469 508
470 spin_lock_irqsave(&pa_tlb_lock, flags); 509 spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
471 pte = *ptep; 510 pte = *ptep;
472 if (!pte_young(pte)) { 511 if (!pte_young(pte)) {
473 spin_unlock_irqrestore(&pa_tlb_lock, flags); 512 spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
474 return 0; 513 return 0;
475 } 514 }
476 set_pte(ptep, pte_mkold(pte)); 515 set_pte(ptep, pte_mkold(pte));
477 purge_tlb_entries(vma->vm_mm, addr); 516 purge_tlb_entries(vma->vm_mm, addr);
478 spin_unlock_irqrestore(&pa_tlb_lock, flags); 517 spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
479 return 1; 518 return 1;
480} 519}
481 520
@@ -485,11 +524,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
485 pte_t old_pte; 524 pte_t old_pte;
486 unsigned long flags; 525 unsigned long flags;
487 526
488 spin_lock_irqsave(&pa_tlb_lock, flags); 527 spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
489 old_pte = *ptep; 528 old_pte = *ptep;
490 set_pte(ptep, __pte(0)); 529 set_pte(ptep, __pte(0));
491 purge_tlb_entries(mm, addr); 530 purge_tlb_entries(mm, addr);
492 spin_unlock_irqrestore(&pa_tlb_lock, flags); 531 spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
493 532
494 return old_pte; 533 return old_pte;
495} 534}
@@ -497,10 +536,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
497static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 536static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
498{ 537{
499 unsigned long flags; 538 unsigned long flags;
500 spin_lock_irqsave(&pa_tlb_lock, flags); 539 spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
501 set_pte(ptep, pte_wrprotect(*ptep)); 540 set_pte(ptep, pte_wrprotect(*ptep));
502 purge_tlb_entries(mm, addr); 541 purge_tlb_entries(mm, addr);
503 spin_unlock_irqrestore(&pa_tlb_lock, flags); 542 spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
504} 543}
505 544
506#define pte_same(A,B) (pte_val(A) == pte_val(B)) 545#define pte_same(A,B) (pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 6804374efa66..c5ded01d45be 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -8,21 +8,6 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <asm/mmu_context.h> 9#include <asm/mmu_context.h>
10 10
11
12/* This is for the serialisation of PxTLB broadcasts. At least on the
13 * N class systems, only one PxTLB inter processor broadcast can be
14 * active at any one time on the Merced bus. This tlb purge
15 * synchronisation is fairly lightweight and harmless so we activate
16 * it on all systems not just the N class.
17
18 * It is also used to ensure PTE updates are atomic and consistent
19 * with the TLB.
20 */
21extern spinlock_t pa_tlb_lock;
22
23#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_lock, flags)
24#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_lock, flags)
25
26extern void flush_tlb_all(void); 11extern void flush_tlb_all(void);
27extern void flush_tlb_all_local(void *); 12extern void flush_tlb_all_local(void *);
28 13
@@ -79,13 +64,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
79static inline void flush_tlb_page(struct vm_area_struct *vma, 64static inline void flush_tlb_page(struct vm_area_struct *vma,
80 unsigned long addr) 65 unsigned long addr)
81{ 66{
82 unsigned long flags, sid; 67 purge_tlb_entries(vma->vm_mm, addr);
83
84 sid = vma->vm_mm->context;
85 purge_tlb_start(flags);
86 mtsp(sid, 1);
87 pdtlb(addr);
88 pitlb(addr);
89 purge_tlb_end(flags);
90} 68}
91#endif 69#endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 804880efa11e..0338561968a4 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -40,12 +40,19 @@ void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
40void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); 40void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
41 41
42 42
43/* On some machines (e.g. ones with the Merced bus), there can be 43/* On some machines (i.e., ones with the Merced bus), there can be
44 * only a single PxTLB broadcast at a time; this must be guaranteed 44 * only a single PxTLB broadcast at a time; this must be guaranteed
45 * by software. We put a spinlock around all TLB flushes to 45 * by software. We need a spinlock around all TLB flushes to ensure
46 * ensure this. 46 * this.
47 */ 47 */
48DEFINE_SPINLOCK(pa_tlb_lock); 48DEFINE_SPINLOCK(pa_tlb_flush_lock);
49
50/* Swapper page setup lock. */
51DEFINE_SPINLOCK(pa_swapper_pg_lock);
52
53#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
54int pa_serialize_tlb_flushes __read_mostly;
55#endif
49 56
50struct pdc_cache_info cache_info __read_mostly; 57struct pdc_cache_info cache_info __read_mostly;
51#ifndef CONFIG_PA20 58#ifndef CONFIG_PA20
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 5eb979d04b90..15e7b3be7b6b 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -38,6 +38,7 @@
38#include <asm/io.h> 38#include <asm/io.h>
39#include <asm/pdc.h> 39#include <asm/pdc.h>
40#include <asm/parisc-device.h> 40#include <asm/parisc-device.h>
41#include <asm/ropes.h>
41 42
42/* See comments in include/asm-parisc/pci.h */ 43/* See comments in include/asm-parisc/pci.h */
43const struct dma_map_ops *hppa_dma_ops __read_mostly; 44const struct dma_map_ops *hppa_dma_ops __read_mostly;
@@ -257,6 +258,30 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
257 return ret ? d.dev : NULL; 258 return ret ? d.dev : NULL;
258} 259}
259 260
261static int __init is_IKE_device(struct device *dev, void *data)
262{
263 struct parisc_device *pdev = to_parisc_device(dev);
264
265 if (!check_dev(dev))
266 return 0;
267 if (pdev->id.hw_type != HPHW_BCPORT)
268 return 0;
269 if (IS_IKE(pdev) ||
270 (pdev->id.hversion == REO_MERCED_PORT) ||
271 (pdev->id.hversion == REOG_MERCED_PORT)) {
272 return 1;
273 }
274 return 0;
275}
276
277int __init machine_has_merced_bus(void)
278{
279 int ret;
280
281 ret = for_each_padev(is_IKE_device, NULL);
282 return ret ? 1 : 0;
283}
284
260/** 285/**
261 * find_pa_parent_type - Find a parent of a specific type 286 * find_pa_parent_type - Find a parent of a specific type
262 * @dev: The device to start searching from 287 * @dev: The device to start searching from
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 5796524a3137..a1fc04570ade 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -50,12 +50,8 @@
50 50
51 .import pa_tlb_lock,data 51 .import pa_tlb_lock,data
52 .macro load_pa_tlb_lock reg 52 .macro load_pa_tlb_lock reg
53#if __PA_LDCW_ALIGNMENT > 4 53 mfctl %cr25,\reg
54 load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg 54 addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
55 depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
56#else
57 load32 PA(pa_tlb_lock), \reg
58#endif
59 .endm 55 .endm
60 56
61 /* space_to_prot macro creates a prot id from a space id */ 57 /* space_to_prot macro creates a prot id from a space id */
diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c
index 35d05fdd7483..6f2d611347a1 100644
--- a/arch/parisc/kernel/inventory.c
+++ b/arch/parisc/kernel/inventory.c
@@ -31,6 +31,7 @@
31#include <asm/processor.h> 31#include <asm/processor.h>
32#include <asm/page.h> 32#include <asm/page.h>
33#include <asm/parisc-device.h> 33#include <asm/parisc-device.h>
34#include <asm/tlbflush.h>
34 35
35/* 36/*
36** Debug options 37** Debug options
@@ -638,4 +639,10 @@ void __init do_device_inventory(void)
638 } 639 }
639 printk(KERN_INFO "Found devices:\n"); 640 printk(KERN_INFO "Found devices:\n");
640 print_parisc_devices(); 641 print_parisc_devices();
642
643#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
644 pa_serialize_tlb_flushes = machine_has_merced_bus();
645 if (pa_serialize_tlb_flushes)
646 pr_info("Merced bus found: Enable PxTLB serialization.\n");
647#endif
641} 648}
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index d908058d05c1..e05cb2a5c16d 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -343,6 +343,12 @@ static int __init parisc_init(void)
343 boot_cpu_data.cpu_hz / 1000000, 343 boot_cpu_data.cpu_hz / 1000000,
344 boot_cpu_data.cpu_hz % 1000000 ); 344 boot_cpu_data.cpu_hz % 1000000 );
345 345
346#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
347 /* Don't serialize TLB flushes if we run on one CPU only. */
348 if (num_online_cpus() == 1)
349 pa_serialize_tlb_flushes = 0;
350#endif
351
346 apply_alternatives_all(); 352 apply_alternatives_all();
347 parisc_setup_cache_timing(); 353 parisc_setup_cache_timing();
348 354