aboutsummaryrefslogtreecommitdiffstats
path: root/arch/parisc/include
diff options
context:
space:
mode:
authorJohn David Anglin <dave.anglin@bell.net>2015-07-01 17:18:37 -0400
committerHelge Deller <deller@gmx.de>2015-07-10 15:47:47 -0400
commit01ab60570427caa24b9debc369e452e86cd9beb4 (patch)
tree473a38189494252e70a98a17bc342015ee0c681f /arch/parisc/include
parentcb908ed3495496b9973a2b9ed1a60f43933fdf01 (diff)
parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results
The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin <dave.anglin@bell.net> Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller <deller@gmx.de>
Diffstat (limited to 'arch/parisc/include')
-rw-r--r--arch/parisc/include/asm/pgtable.h55
-rw-r--r--arch/parisc/include/asm/tlbflush.h53
2 files changed, 66 insertions, 42 deletions
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 0a183756d6ec..f93c4a4e6580 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -16,7 +16,7 @@
16#include <asm/processor.h> 16#include <asm/processor.h>
17#include <asm/cache.h> 17#include <asm/cache.h>
18 18
19extern spinlock_t pa_dbit_lock; 19extern spinlock_t pa_tlb_lock;
20 20
21/* 21/*
22 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel 22 * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock;
33 */ 33 */
34#define kern_addr_valid(addr) (1) 34#define kern_addr_valid(addr) (1)
35 35
36/* Purge data and instruction TLB entries. Must be called holding
37 * the pa_tlb_lock. The TLB purge instructions are slow on SMP
38 * machines since the purge must be broadcast to all CPUs.
39 */
40
41static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
42{
43 mtsp(mm->context, 1);
44 pdtlb(addr);
45 if (unlikely(split_tlb))
46 pitlb(addr);
47}
48
36/* Certain architectures need to do special things when PTEs 49/* Certain architectures need to do special things when PTEs
37 * within a page table are directly modified. Thus, the following 50 * within a page table are directly modified. Thus, the following
38 * hook is made available. 51 * hook is made available.
@@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock;
42 *(pteptr) = (pteval); \ 55 *(pteptr) = (pteval); \
43 } while(0) 56 } while(0)
44 57
45extern void purge_tlb_entries(struct mm_struct *, unsigned long); 58#define pte_inserted(x) \
59 ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \
60 == (_PAGE_PRESENT|_PAGE_ACCESSED))
46 61
47#define set_pte_at(mm, addr, ptep, pteval) \ 62#define set_pte_at(mm, addr, ptep, pteval) \
48 do { \ 63 do { \
64 pte_t old_pte; \
49 unsigned long flags; \ 65 unsigned long flags; \
50 spin_lock_irqsave(&pa_dbit_lock, flags); \ 66 spin_lock_irqsave(&pa_tlb_lock, flags); \
51 set_pte(ptep, pteval); \ 67 old_pte = *ptep; \
52 purge_tlb_entries(mm, addr); \ 68 set_pte(ptep, pteval); \
53 spin_unlock_irqrestore(&pa_dbit_lock, flags); \ 69 if (pte_inserted(old_pte)) \
70 purge_tlb_entries(mm, addr); \
71 spin_unlock_irqrestore(&pa_tlb_lock, flags); \
54 } while (0) 72 } while (0)
55 73
56#endif /* !__ASSEMBLY__ */ 74#endif /* !__ASSEMBLY__ */
@@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page;
268 286
269#define pte_none(x) (pte_val(x) == 0) 287#define pte_none(x) (pte_val(x) == 0)
270#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) 288#define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
271#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) 289#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0))
272 290
273#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) 291#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK)
274#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) 292#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
435 if (!pte_young(*ptep)) 453 if (!pte_young(*ptep))
436 return 0; 454 return 0;
437 455
438 spin_lock_irqsave(&pa_dbit_lock, flags); 456 spin_lock_irqsave(&pa_tlb_lock, flags);
439 pte = *ptep; 457 pte = *ptep;
440 if (!pte_young(pte)) { 458 if (!pte_young(pte)) {
441 spin_unlock_irqrestore(&pa_dbit_lock, flags); 459 spin_unlock_irqrestore(&pa_tlb_lock, flags);
442 return 0; 460 return 0;
443 } 461 }
444 set_pte(ptep, pte_mkold(pte)); 462 set_pte(ptep, pte_mkold(pte));
445 purge_tlb_entries(vma->vm_mm, addr); 463 purge_tlb_entries(vma->vm_mm, addr);
446 spin_unlock_irqrestore(&pa_dbit_lock, flags); 464 spin_unlock_irqrestore(&pa_tlb_lock, flags);
447 return 1; 465 return 1;
448} 466}
449 467
@@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
453 pte_t old_pte; 471 pte_t old_pte;
454 unsigned long flags; 472 unsigned long flags;
455 473
456 spin_lock_irqsave(&pa_dbit_lock, flags); 474 spin_lock_irqsave(&pa_tlb_lock, flags);
457 old_pte = *ptep; 475 old_pte = *ptep;
458 pte_clear(mm,addr,ptep); 476 set_pte(ptep, __pte(0));
459 purge_tlb_entries(mm, addr); 477 if (pte_inserted(old_pte))
460 spin_unlock_irqrestore(&pa_dbit_lock, flags); 478 purge_tlb_entries(mm, addr);
479 spin_unlock_irqrestore(&pa_tlb_lock, flags);
461 480
462 return old_pte; 481 return old_pte;
463} 482}
@@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
465static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 484static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
466{ 485{
467 unsigned long flags; 486 unsigned long flags;
468 spin_lock_irqsave(&pa_dbit_lock, flags); 487 spin_lock_irqsave(&pa_tlb_lock, flags);
469 set_pte(ptep, pte_wrprotect(*ptep)); 488 set_pte(ptep, pte_wrprotect(*ptep));
470 purge_tlb_entries(mm, addr); 489 purge_tlb_entries(mm, addr);
471 spin_unlock_irqrestore(&pa_dbit_lock, flags); 490 spin_unlock_irqrestore(&pa_tlb_lock, flags);
472} 491}
473 492
474#define pte_same(A,B) (pte_val(A) == pte_val(B)) 493#define pte_same(A,B) (pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 9d086a599fa0..e84b96478193 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -13,6 +13,9 @@
13 * active at any one time on the Merced bus. This tlb purge 13 * active at any one time on the Merced bus. This tlb purge
14 * synchronisation is fairly lightweight and harmless so we activate 14 * synchronisation is fairly lightweight and harmless so we activate
15 * it on all systems not just the N class. 15 * it on all systems not just the N class.
16
17 * It is also used to ensure PTE updates are atomic and consistent
18 * with the TLB.
16 */ 19 */
17extern spinlock_t pa_tlb_lock; 20extern spinlock_t pa_tlb_lock;
18 21
@@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *);
24 27
25#define smp_flush_tlb_all() flush_tlb_all() 28#define smp_flush_tlb_all() flush_tlb_all()
26 29
30int __flush_tlb_range(unsigned long sid,
31 unsigned long start, unsigned long end);
32
33#define flush_tlb_range(vma, start, end) \
34 __flush_tlb_range((vma)->vm_mm->context, start, end)
35
36#define flush_tlb_kernel_range(start, end) \
37 __flush_tlb_range(0, start, end)
38
27/* 39/*
28 * flush_tlb_mm() 40 * flush_tlb_mm()
29 * 41 *
30 * XXX This code is NOT valid for HP-UX compatibility processes, 42 * The code to switch to a new context is NOT valid for processes
31 * (although it will probably work 99% of the time). HP-UX 43 * which play with the space id's. Thus, we have to preserve the
32 * processes are free to play with the space id's and save them 44 * space and just flush the entire tlb. However, the compilers,
33 * over long periods of time, etc. so we have to preserve the 45 * dynamic linker, etc, do not manipulate space id's, so there
34 * space and just flush the entire tlb. We need to check the 46 * could be a significant performance benefit in switching contexts
35 * personality in order to do that, but the personality is not 47 * and not flushing the whole tlb.
36 * currently being set correctly.
37 *
38 * Of course, Linux processes could do the same thing, but
39 * we don't support that (and the compilers, dynamic linker,
40 * etc. do not do that).
41 */ 48 */
42 49
43static inline void flush_tlb_mm(struct mm_struct *mm) 50static inline void flush_tlb_mm(struct mm_struct *mm)
@@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
45 BUG_ON(mm == &init_mm); /* Should never happen */ 52 BUG_ON(mm == &init_mm); /* Should never happen */
46 53
47#if 1 || defined(CONFIG_SMP) 54#if 1 || defined(CONFIG_SMP)
55 /* Except for very small threads, flushing the whole TLB is
56 * faster than using __flush_tlb_range. The pdtlb and pitlb
57 * instructions are very slow because of the TLB broadcast.
58 * It might be faster to do local range flushes on all CPUs
59 * on PA 2.0 systems.
60 */
48 flush_tlb_all(); 61 flush_tlb_all();
49#else 62#else
50 /* FIXME: currently broken, causing space id and protection ids 63 /* FIXME: currently broken, causing space id and protection ids
51 * to go out of sync, resulting in faults on userspace accesses. 64 * to go out of sync, resulting in faults on userspace accesses.
65 * This approach needs further investigation since running many
66 * small applications (e.g., GCC testsuite) is faster on HP-UX.
52 */ 67 */
53 if (mm) { 68 if (mm) {
54 if (mm->context != 0) 69 if (mm->context != 0)
@@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
65{ 80{
66 unsigned long flags, sid; 81 unsigned long flags, sid;
67 82
68 /* For one page, it's not worth testing the split_tlb variable */
69
70 mb();
71 sid = vma->vm_mm->context; 83 sid = vma->vm_mm->context;
72 purge_tlb_start(flags); 84 purge_tlb_start(flags);
73 mtsp(sid, 1); 85 mtsp(sid, 1);
74 pdtlb(addr); 86 pdtlb(addr);
75 pitlb(addr); 87 if (unlikely(split_tlb))
88 pitlb(addr);
76 purge_tlb_end(flags); 89 purge_tlb_end(flags);
77} 90}
78
79void __flush_tlb_range(unsigned long sid,
80 unsigned long start, unsigned long end);
81
82#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
83
84#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
85
86#endif 91#endif