aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/mm
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2012-04-01 14:04:21 -0400
committerChris Metcalf <cmetcalf@tilera.com>2012-05-25 12:48:27 -0400
commit621b19551507c8fd9d721f4038509c5bb155a983 (patch)
tree62d8d5e7a783364940153b4523fcfba821cee241 /arch/tile/mm
parentd9ed9faac283a3be73f0e11a2ef49ee55aece4db (diff)
arch/tile: support multiple huge page sizes dynamically
This change adds support for a new "super" bit in the PTE, using the new arch_make_huge_pte() method. The Tilera hypervisor sees the bit set at a given level of the page table and gangs together 4, 16, or 64 consecutive pages from that level of the hierarchy to create a larger TLB entry. One extra "super" page size can be specified at each of the three levels of the page table hierarchy on tilegx, using the "hugepagesz" argument on the boot command line. A new hypervisor API is added to allow Linux to tell the hypervisor how many PTEs to gang together at each level of the page table. To allow pre-allocating huge pages larger than the buddy allocator can handle, this change modifies the Tilera bootmem support to put all of memory on tilegx platforms into bootmem. As part of this change I eliminate the vestigial CONFIG_HIGHPTE support, which never worked anyway, and eliminate the hv_page_size() API in favor of the standard vma_kernel_pagesize() API. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/mm')
-rw-r--r--arch/tile/mm/fault.c2
-rw-r--r--arch/tile/mm/homecache.c1
-rw-r--r--arch/tile/mm/hugetlbpage.c285
-rw-r--r--arch/tile/mm/init.c4
-rw-r--r--arch/tile/mm/pgtable.c13
5 files changed, 226 insertions, 79 deletions
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 22e58f51ed23..54f18fc25ed0 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -187,7 +187,7 @@ static pgd_t *get_current_pgd(void)
187 HV_Context ctx = hv_inquire_context(); 187 HV_Context ctx = hv_inquire_context();
188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; 188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
189 struct page *pgd_page = pfn_to_page(pgd_pfn); 189 struct page *pgd_page = pfn_to_page(pgd_pfn);
190 BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ 190 BUG_ON(PageHighMem(pgd_page));
191 return (pgd_t *) __va(ctx.page_table); 191 return (pgd_t *) __va(ctx.page_table);
192} 192}
193 193
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 499f73770b05..dbcbdf7b8aa8 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -30,6 +30,7 @@
30#include <linux/cache.h> 30#include <linux/cache.h>
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/hugetlb.h>
33 34
34#include <asm/page.h> 35#include <asm/page.h>
35#include <asm/sections.h> 36#include <asm/sections.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 42cfcba4e1ef..812e2d037972 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -27,85 +27,161 @@
27#include <linux/mman.h> 27#include <linux/mman.h>
28#include <asm/tlb.h> 28#include <asm/tlb.h>
29#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
30#include <asm/setup.h>
31
32#ifdef CONFIG_HUGETLB_SUPER_PAGES
33
34/*
35 * Provide an additional huge page size (in addition to the regular default
36 * huge page size) if no "hugepagesz" arguments are specified.
37 * Note that it must be smaller than the default huge page size so
38 * that it's possible to allocate them on demand from the buddy allocator.
39 * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40 * or not define it at all.
41 */
42#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43
44/* "Extra" page-size multipliers, one per level of the page table. */
45int huge_shift[HUGE_SHIFT_ENTRIES] = {
46#ifdef ADDITIONAL_HUGE_SIZE
47#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48 [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49#endif
50};
51
52/*
53 * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54 * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55 * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56 * but otherwise allocate the page table using the kernel versions.
57 */
58static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59 unsigned long address)
60{
61 pte_t *new;
62
63 if (pmd_none(*pmd)) {
64 new = pte_alloc_one_kernel(mm, address);
65 if (!new)
66 return NULL;
67
68 smp_wmb(); /* See comment in __pte_alloc */
69
70 spin_lock(&mm->page_table_lock);
71 if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
72 mm->nr_ptes++;
73 pmd_populate_kernel(mm, pmd, new);
74 new = NULL;
75 } else
76 VM_BUG_ON(pmd_trans_splitting(*pmd));
77 spin_unlock(&mm->page_table_lock);
78 if (new)
79 pte_free_kernel(mm, new);
80 }
81
82 return pte_offset_kernel(pmd, address);
83}
84#endif
30 85
31pte_t *huge_pte_alloc(struct mm_struct *mm, 86pte_t *huge_pte_alloc(struct mm_struct *mm,
32 unsigned long addr, unsigned long sz) 87 unsigned long addr, unsigned long sz)
33{ 88{
34 pgd_t *pgd; 89 pgd_t *pgd;
35 pud_t *pud; 90 pud_t *pud;
36 pte_t *pte = NULL;
37 91
38 /* We do not yet support multiple huge page sizes. */ 92 addr &= -sz; /* Mask off any low bits in the address. */
39 BUG_ON(sz != PMD_SIZE);
40 93
41 pgd = pgd_offset(mm, addr); 94 pgd = pgd_offset(mm, addr);
42 pud = pud_alloc(mm, pgd, addr); 95 pud = pud_alloc(mm, pgd, addr);
43 if (pud)
44 pte = (pte_t *) pmd_alloc(mm, pud, addr);
45 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
46 96
47 return pte; 97#ifdef CONFIG_HUGETLB_SUPER_PAGES
98 if (sz >= PGDIR_SIZE) {
99 BUG_ON(sz != PGDIR_SIZE &&
100 sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101 return (pte_t *)pud;
102 } else {
103 pmd_t *pmd = pmd_alloc(mm, pud, addr);
104 if (sz >= PMD_SIZE) {
105 BUG_ON(sz != PMD_SIZE &&
106 sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107 return (pte_t *)pmd;
108 }
109 else {
110 if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111 panic("Unexpected page size %#lx\n", sz);
112 return pte_alloc_hugetlb(mm, pmd, addr);
113 }
114 }
115#else
116 BUG_ON(sz != PMD_SIZE);
117 return (pte_t *) pmd_alloc(mm, pud, addr);
118#endif
48} 119}
49 120
50pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 121static pte_t *get_pte(pte_t *base, int index, int level)
51{ 122{
52 pgd_t *pgd; 123 pte_t *ptep = base + index;
53 pud_t *pud; 124#ifdef CONFIG_HUGETLB_SUPER_PAGES
54 pmd_t *pmd = NULL; 125 if (!pte_present(*ptep) && huge_shift[level] != 0) {
55 126 unsigned long mask = -1UL << huge_shift[level];
56 pgd = pgd_offset(mm, addr); 127 pte_t *super_ptep = base + (index & mask);
57 if (pgd_present(*pgd)) { 128 pte_t pte = *super_ptep;
58 pud = pud_offset(pgd, addr); 129 if (pte_present(pte) && pte_super(pte))
59 if (pud_present(*pud)) 130 ptep = super_ptep;
60 pmd = pmd_offset(pud, addr);
61 } 131 }
62 return (pte_t *) pmd; 132#endif
133 return ptep;
63} 134}
64 135
65#ifdef HUGETLB_TEST 136pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
66struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
67 int write)
68{ 137{
69 unsigned long start = address; 138 pgd_t *pgd;
70 int length = 1; 139 pud_t *pud;
71 int nr; 140 pmd_t *pmd;
72 struct page *page; 141#ifdef CONFIG_HUGETLB_SUPER_PAGES
73 struct vm_area_struct *vma; 142 pte_t *pte;
74 143#endif
75 vma = find_vma(mm, addr);
76 if (!vma || !is_vm_hugetlb_page(vma))
77 return ERR_PTR(-EINVAL);
78
79 pte = huge_pte_offset(mm, address);
80 144
81 /* hugetlb should be locked, and hence, prefaulted */ 145 /* Get the top-level page table entry. */
82 WARN_ON(!pte || pte_none(*pte)); 146 pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147 if (!pgd_present(*pgd))
148 return NULL;
83 149
84 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 150 /* We don't have four levels. */
151 pud = pud_offset(pgd, addr);
152#ifndef __PAGETABLE_PUD_FOLDED
153# error support fourth page table level
154#endif
85 155
86 WARN_ON(!PageHead(page)); 156 /* Check for an L0 huge PTE, if we have three levels. */
157#ifndef __PAGETABLE_PMD_FOLDED
158 if (pud_huge(*pud))
159 return (pte_t *)pud;
87 160
88 return page; 161 pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
89} 162 pmd_index(addr), 1);
90 163 if (!pmd_present(*pmd))
91int pmd_huge(pmd_t pmd) 164 return NULL;
92{ 165#else
93 return 0; 166 pmd = pmd_offset(pud, addr);
94} 167#endif
95 168
96int pud_huge(pud_t pud) 169 /* Check for an L1 huge PTE. */
97{ 170 if (pmd_huge(*pmd))
98 return 0; 171 return (pte_t *)pmd;
99} 172
173#ifdef CONFIG_HUGETLB_SUPER_PAGES
174 /* Check for an L2 huge PTE. */
175 pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176 if (!pte_present(*pte))
177 return NULL;
178 if (pte_super(*pte))
179 return pte;
180#endif
100 181
101struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
102 pmd_t *pmd, int write)
103{
104 return NULL; 182 return NULL;
105} 183}
106 184
107#else
108
109struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 185struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
110 int write) 186 int write)
111{ 187{
@@ -149,8 +225,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
149 return 0; 225 return 0;
150} 226}
151 227
152#endif
153
154#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 228#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
155static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, 229static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
156 unsigned long addr, unsigned long len, 230 unsigned long addr, unsigned long len,
@@ -322,21 +396,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
322 return hugetlb_get_unmapped_area_topdown(file, addr, len, 396 return hugetlb_get_unmapped_area_topdown(file, addr, len,
323 pgoff, flags); 397 pgoff, flags);
324} 398}
399#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
325 400
326static __init int setup_hugepagesz(char *opt) 401#ifdef CONFIG_HUGETLB_SUPER_PAGES
402static __init int __setup_hugepagesz(unsigned long ps)
327{ 403{
328 unsigned long ps = memparse(opt, &opt); 404 int log_ps = __builtin_ctzl(ps);
329 if (ps == PMD_SIZE) { 405 int level, base_shift;
330 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 406
331 } else if (ps == PUD_SIZE) { 407 if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
332 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 408 pr_warn("Not enabling %ld byte huge pages;"
409 " must be a power of four.\n", ps);
410 return -EINVAL;
411 }
412
413 if (ps > 64*1024*1024*1024UL) {
414 pr_warn("Not enabling %ld MB huge pages;"
415 " largest legal value is 64 GB .\n", ps >> 20);
416 return -EINVAL;
417 } else if (ps >= PUD_SIZE) {
418 static long hv_jpage_size;
419 if (hv_jpage_size == 0)
420 hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
421 if (hv_jpage_size != PUD_SIZE) {
422 pr_warn("Not enabling >= %ld MB huge pages:"
423 " hypervisor reports size %ld\n",
424 PUD_SIZE >> 20, hv_jpage_size);
425 return -EINVAL;
426 }
427 level = 0;
428 base_shift = PUD_SHIFT;
429 } else if (ps >= PMD_SIZE) {
430 level = 1;
431 base_shift = PMD_SHIFT;
432 } else if (ps > PAGE_SIZE) {
433 level = 2;
434 base_shift = PAGE_SHIFT;
333 } else { 435 } else {
334 pr_err("hugepagesz: Unsupported page size %lu M\n", 436 pr_err("hugepagesz: huge page size %ld too small\n", ps);
335 ps >> 20); 437 return -EINVAL;
336 return 0;
337 } 438 }
338 return 1; 439
440 if (log_ps != base_shift) {
441 int shift_val = log_ps - base_shift;
442 if (huge_shift[level] != 0) {
443 int old_shift = base_shift + huge_shift[level];
444 pr_warn("Not enabling %ld MB huge pages;"
445 " already have size %ld MB.\n",
446 ps >> 20, (1UL << old_shift) >> 20);
447 return -EINVAL;
448 }
449 if (hv_set_pte_super_shift(level, shift_val) != 0) {
450 pr_warn("Not enabling %ld MB huge pages;"
451 " no hypervisor support.\n", ps >> 20);
452 return -EINVAL;
453 }
454 printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
455 huge_shift[level] = shift_val;
456 }
457
458 hugetlb_add_hstate(log_ps - PAGE_SHIFT);
459
460 return 0;
461}
462
463static bool saw_hugepagesz;
464
465static __init int setup_hugepagesz(char *opt)
466{
467 if (!saw_hugepagesz) {
468 saw_hugepagesz = true;
469 memset(huge_shift, 0, sizeof(huge_shift));
470 }
471 return __setup_hugepagesz(memparse(opt, NULL));
339} 472}
340__setup("hugepagesz=", setup_hugepagesz); 473__setup("hugepagesz=", setup_hugepagesz);
341 474
342#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ 475#ifdef ADDITIONAL_HUGE_SIZE
476/*
477 * Provide an additional huge page size if no "hugepagesz" args are given.
478 * In that case, all the cores have properly set up their hv super_shift
479 * already, but we need to notify the hugetlb code to enable the
480 * new huge page size from the Linux point of view.
481 */
482static __init int add_default_hugepagesz(void)
483{
484 if (!saw_hugepagesz) {
485 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
486 ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
487 BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
488 ADDITIONAL_HUGE_SIZE);
489 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
490 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
491 }
492 return 0;
493}
494arch_initcall(add_default_hugepagesz);
495#endif
496
497#endif /* CONFIG_HUGETLB_SUPER_PAGES */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index c04fbfd93fc5..630dd2ce2afe 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -698,6 +698,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
698#endif /* CONFIG_HIGHMEM */ 698#endif /* CONFIG_HIGHMEM */
699 699
700 700
701#ifndef CONFIG_64BIT
701static void __init init_free_pfn_range(unsigned long start, unsigned long end) 702static void __init init_free_pfn_range(unsigned long start, unsigned long end)
702{ 703{
703 unsigned long pfn; 704 unsigned long pfn;
@@ -770,6 +771,7 @@ static void __init set_non_bootmem_pages_init(void)
770 init_free_pfn_range(start, end); 771 init_free_pfn_range(start, end);
771 } 772 }
772} 773}
774#endif
773 775
774/* 776/*
775 * paging_init() sets up the page tables - note that all of lowmem is 777 * paging_init() sets up the page tables - note that all of lowmem is
@@ -858,8 +860,10 @@ void __init mem_init(void)
858 /* this will put all bootmem onto the freelists */ 860 /* this will put all bootmem onto the freelists */
859 totalram_pages += free_all_bootmem(); 861 totalram_pages += free_all_bootmem();
860 862
863#ifndef CONFIG_64BIT
861 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ 864 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */
862 set_non_bootmem_pages_init(); 865 set_non_bootmem_pages_init();
866#endif
863 867
864 codesize = (unsigned long)&_etext - (unsigned long)&_text; 868 codesize = (unsigned long)&_etext - (unsigned long)&_text;
865 datasize = (unsigned long)&_end - (unsigned long)&_sdata; 869 datasize = (unsigned long)&_end - (unsigned long)&_sdata;
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 3d7074347e6d..345edfed9fcd 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -132,15 +132,6 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
133} 133}
134 134
135#if defined(CONFIG_HIGHPTE)
136pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
137{
138 pte_t *pte = kmap_atomic(pmd_page(*dir)) +
139 (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
140 return &pte[pte_index(address)];
141}
142#endif
143
144/** 135/**
145 * shatter_huge_page() - ensure a given address is mapped by a small page. 136 * shatter_huge_page() - ensure a given address is mapped by a small page.
146 * 137 *
@@ -296,10 +287,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
296 struct page *p; 287 struct page *p;
297 int i; 288 int i;
298 289
299#ifdef CONFIG_HIGHPTE
300 flags |= __GFP_HIGHMEM;
301#endif
302
303 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); 290 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
304 if (p == NULL) 291 if (p == NULL)
305 return NULL; 292 return NULL;