aboutsummaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
authorSteven Capper <steve.capper@linaro.org>2013-10-14 04:49:10 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2013-10-29 07:06:15 -0400
commita3a9ea656d19251326cdeaaa0b5adbfac41ddacf (patch)
treeb13aa05569b56683e8fda81e565ebad53d6f94a3 /arch/arm
parent92871b94a5f9892e324c31960678387922c75049 (diff)
ARM: 7858/1: mm: make UACCESS_WITH_MEMCPY huge page aware
The memory pinning code in uaccess_with_memcpy.c does not check for HugeTLB or THP pmds, and will enter an infinite loop should a __copy_to_user or __clear_user occur against a huge page. This patch adds detection code for huge pages to pin_page_for_write. As this code can be executed in a fast path it refers to the actual pmds rather than the vma. If a HugeTLB or THP is found (they have the same pmd representation on ARM), the page table spinlock is taken to prevent modification whilst the page is pinned. On ARM, huge pages are only represented as pmds, thus no huge pud checks are performed. (For huge puds one would lock the page table in a similar manner as in the pmd case). Two helper functions are introduced; pmd_thp_or_huge will check whether or not a page is huge or transparent huge (which have the same pmd layout on ARM), and pmd_hugewillfault will detect whether or not a page fault will occur on write to the page. Running the following test (with the chunking from read_zero removed): $ dd if=/dev/zero of=/dev/null bs=10M count=1024 Gave: 2.3 GB/s backed by normal pages, 2.9 GB/s backed by huge pages, 5.1 GB/s backed by huge pages, with page mask=HPAGE_MASK. After some discussion, it was decided not to adopt the HPAGE_MASK, as this would have a significant detrimental effect on the overall system latency due to page_table_lock being held for too long. This could be revisited if split huge page locks are adopted. Signed-off-by: Steve Capper <steve.capper@linaro.org> Reviewed-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/include/asm/pgtable-2level.h7
-rw-r--r--arch/arm/include/asm/pgtable-3level.h3
-rw-r--r--arch/arm/lib/uaccess_with_memcpy.c41
3 files changed, 48 insertions, 3 deletions
diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index f97ee02386ee..86a659a19526 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -181,6 +181,13 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
181 181
182#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext) 182#define set_pte_ext(ptep,pte,ext) cpu_set_pte_ext(ptep,pte,ext)
183 183
184/*
185 * We don't have huge page support for short descriptors, for the moment
186 * define empty stubs for use by pin_page_for_write.
187 */
188#define pmd_hugewillfault(pmd) (0)
189#define pmd_thp_or_huge(pmd) (0)
190
184#endif /* __ASSEMBLY__ */ 191#endif /* __ASSEMBLY__ */
185 192
186#endif /* _ASM_PGTABLE_2LEVEL_H */ 193#endif /* _ASM_PGTABLE_2LEVEL_H */
diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index 5689c18c85f5..39c54cfa03e9 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -206,6 +206,9 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
206#define __HAVE_ARCH_PMD_WRITE 206#define __HAVE_ARCH_PMD_WRITE
207#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) 207#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
208 208
209#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
210#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
211
209#ifdef CONFIG_TRANSPARENT_HUGEPAGE 212#ifdef CONFIG_TRANSPARENT_HUGEPAGE
210#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) 213#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
211#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) 214#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index 025f742dd4df..3e58d710013c 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -18,6 +18,7 @@
18#include <linux/hardirq.h> /* for in_atomic() */ 18#include <linux/hardirq.h> /* for in_atomic() */
19#include <linux/gfp.h> 19#include <linux/gfp.h>
20#include <linux/highmem.h> 20#include <linux/highmem.h>
21#include <linux/hugetlb.h>
21#include <asm/current.h> 22#include <asm/current.h>
22#include <asm/page.h> 23#include <asm/page.h>
23 24
@@ -40,7 +41,35 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
40 return 0; 41 return 0;
41 42
42 pmd = pmd_offset(pud, addr); 43 pmd = pmd_offset(pud, addr);
43 if (unlikely(pmd_none(*pmd) || pmd_bad(*pmd))) 44 if (unlikely(pmd_none(*pmd)))
45 return 0;
46
47 /*
48 * A pmd can be bad if it refers to a HugeTLB or THP page.
49 *
50 * Both THP and HugeTLB pages have the same pmd layout
51 * and should not be manipulated by the pte functions.
52 *
53 * Lock the page table for the destination and check
54 * to see that it's still huge and whether or not we will
55 * need to fault on write, or if we have a splitting THP.
56 */
57 if (unlikely(pmd_thp_or_huge(*pmd))) {
58 ptl = &current->mm->page_table_lock;
59 spin_lock(ptl);
60 if (unlikely(!pmd_thp_or_huge(*pmd)
61 || pmd_hugewillfault(*pmd)
62 || pmd_trans_splitting(*pmd))) {
63 spin_unlock(ptl);
64 return 0;
65 }
66
67 *ptep = NULL;
68 *ptlp = ptl;
69 return 1;
70 }
71
72 if (unlikely(pmd_bad(*pmd)))
44 return 0; 73 return 0;
45 74
46 pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl); 75 pte = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
@@ -94,7 +123,10 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
94 from += tocopy; 123 from += tocopy;
95 n -= tocopy; 124 n -= tocopy;
96 125
97 pte_unmap_unlock(pte, ptl); 126 if (pte)
127 pte_unmap_unlock(pte, ptl);
128 else
129 spin_unlock(ptl);
98 } 130 }
99 if (!atomic) 131 if (!atomic)
100 up_read(&current->mm->mmap_sem); 132 up_read(&current->mm->mmap_sem);
@@ -147,7 +179,10 @@ __clear_user_memset(void __user *addr, unsigned long n)
147 addr += tocopy; 179 addr += tocopy;
148 n -= tocopy; 180 n -= tocopy;
149 181
150 pte_unmap_unlock(pte, ptl); 182 if (pte)
183 pte_unmap_unlock(pte, ptl);
184 else
185 spin_unlock(ptl);
151 } 186 }
152 up_read(&current->mm->mmap_sem); 187 up_read(&current->mm->mmap_sem);
153 188