aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2012-04-01 14:04:21 -0400
committerChris Metcalf <cmetcalf@tilera.com>2012-05-25 12:48:27 -0400
commit621b19551507c8fd9d721f4038509c5bb155a983 (patch)
tree62d8d5e7a783364940153b4523fcfba821cee241 /arch/tile
parentd9ed9faac283a3be73f0e11a2ef49ee55aece4db (diff)
arch/tile: support multiple huge page sizes dynamically
This change adds support for a new "super" bit in the PTE, using the new arch_make_huge_pte() method. The Tilera hypervisor sees the bit set at a given level of the page table and gangs together 4, 16, or 64 consecutive pages from that level of the hierarchy to create a larger TLB entry. One extra "super" page size can be specified at each of the three levels of the page table hierarchy on tilegx, using the "hugepagesz" argument on the boot command line. A new hypervisor API is added to allow Linux to tell the hypervisor how many PTEs to gang together at each level of the page table. To allow pre-allocating huge pages larger than the buddy allocator can handle, this change modifies the Tilera bootmem support to put all of memory on tilegx platforms into bootmem. As part of this change I eliminate the vestigial CONFIG_HIGHPTE support, which never worked anyway, and eliminate the hv_page_size() API in favor of the standard vma_kernel_pagesize() API. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile')
-rw-r--r--arch/tile/Kconfig8
-rw-r--r--arch/tile/include/asm/hugetlb.h21
-rw-r--r--arch/tile/include/asm/page.h5
-rw-r--r--arch/tile/include/asm/pgtable.h12
-rw-r--r--arch/tile/include/asm/tlbflush.h17
-rw-r--r--arch/tile/include/hv/hypervisor.h70
-rw-r--r--arch/tile/kernel/hvglue.lds3
-rw-r--r--arch/tile/kernel/proc.c1
-rw-r--r--arch/tile/kernel/setup.c161
-rw-r--r--arch/tile/kernel/tlb.c11
-rw-r--r--arch/tile/mm/fault.c2
-rw-r--r--arch/tile/mm/homecache.c1
-rw-r--r--arch/tile/mm/hugetlbpage.c285
-rw-r--r--arch/tile/mm/init.c4
-rw-r--r--arch/tile/mm/pgtable.c13
15 files changed, 456 insertions, 158 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 38c3957e0b40..cc5664286a1c 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -47,6 +47,14 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
47config SYS_SUPPORTS_HUGETLBFS 47config SYS_SUPPORTS_HUGETLBFS
48 def_bool y 48 def_bool y
49 49
50# Support for additional huge page sizes besides HPAGE_SIZE.
51# The software support is currently only present in the TILE-Gx
52# hypervisor. TILEPro in any case does not support page sizes
53# larger than the default HPAGE_SIZE.
54config HUGETLB_SUPER_PAGES
55 depends on HUGETLB_PAGE && TILEGX
56 def_bool y
57
50config GENERIC_CLOCKEVENTS 58config GENERIC_CLOCKEVENTS
51 def_bool y 59 def_bool y
52 60
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index d396d1805163..b2042380a5aa 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,4 +106,25 @@ static inline void arch_release_hugepage(struct page *page)
106{ 106{
107} 107}
108 108
109#ifdef CONFIG_HUGETLB_SUPER_PAGES
110static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
111 struct page *page, int writable)
112{
113 size_t pagesize = huge_page_size(hstate_vma(vma));
114 if (pagesize != PUD_SIZE && pagesize != PMD_SIZE)
115 entry = pte_mksuper(entry);
116 return entry;
117}
118#define arch_make_huge_pte arch_make_huge_pte
119
120/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */
121enum {
122 HUGE_SHIFT_PGDIR = 0,
123 HUGE_SHIFT_PMD = 1,
124 HUGE_SHIFT_PAGE = 2,
125 HUGE_SHIFT_ENTRIES
126};
127extern int huge_shift[HUGE_SHIFT_ENTRIES];
128#endif
129
109#endif /* _ASM_TILE_HUGETLB_H */ 130#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index c750943f961e..9d9131e5c552 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -87,8 +87,7 @@ typedef HV_PTE pgprot_t;
87/* 87/*
88 * User L2 page tables are managed as one L2 page table per page, 88 * User L2 page tables are managed as one L2 page table per page,
89 * because we use the page allocator for them. This keeps the allocation 89 * because we use the page allocator for them. This keeps the allocation
90 * simple and makes it potentially useful to implement HIGHPTE at some point. 90 * simple, but it's also inefficient, since L2 page tables are much smaller
91 * However, it's also inefficient, since L2 page tables are much smaller
92 * than pages (currently 2KB vs 64KB). So we should revisit this. 91 * than pages (currently 2KB vs 64KB). So we should revisit this.
93 */ 92 */
94typedef struct page *pgtable_t; 93typedef struct page *pgtable_t;
@@ -137,7 +136,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
137 136
138#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 137#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
139 138
140#define HUGE_MAX_HSTATE 2 139#define HUGE_MAX_HSTATE 6
141 140
142#ifdef CONFIG_HUGETLB_PAGE 141#ifdef CONFIG_HUGETLB_PAGE
143#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 142#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 319f4826d972..73b1a4c9ad03 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -71,6 +71,7 @@ extern void set_page_homes(void);
71 71
72#define _PAGE_PRESENT HV_PTE_PRESENT 72#define _PAGE_PRESENT HV_PTE_PRESENT
73#define _PAGE_HUGE_PAGE HV_PTE_PAGE 73#define _PAGE_HUGE_PAGE HV_PTE_PAGE
74#define _PAGE_SUPER_PAGE HV_PTE_SUPER
74#define _PAGE_READABLE HV_PTE_READABLE 75#define _PAGE_READABLE HV_PTE_READABLE
75#define _PAGE_WRITABLE HV_PTE_WRITABLE 76#define _PAGE_WRITABLE HV_PTE_WRITABLE
76#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE 77#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE
@@ -87,6 +88,7 @@ extern void set_page_homes(void);
87#define _PAGE_ALL (\ 88#define _PAGE_ALL (\
88 _PAGE_PRESENT | \ 89 _PAGE_PRESENT | \
89 _PAGE_HUGE_PAGE | \ 90 _PAGE_HUGE_PAGE | \
91 _PAGE_SUPER_PAGE | \
90 _PAGE_READABLE | \ 92 _PAGE_READABLE | \
91 _PAGE_WRITABLE | \ 93 _PAGE_WRITABLE | \
92 _PAGE_EXECUTABLE | \ 94 _PAGE_EXECUTABLE | \
@@ -197,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep)
197#define pte_write hv_pte_get_writable 199#define pte_write hv_pte_get_writable
198#define pte_exec hv_pte_get_executable 200#define pte_exec hv_pte_get_executable
199#define pte_huge hv_pte_get_page 201#define pte_huge hv_pte_get_page
202#define pte_super hv_pte_get_super
200#define pte_rdprotect hv_pte_clear_readable 203#define pte_rdprotect hv_pte_clear_readable
201#define pte_exprotect hv_pte_clear_executable 204#define pte_exprotect hv_pte_clear_executable
202#define pte_mkclean hv_pte_clear_dirty 205#define pte_mkclean hv_pte_clear_dirty
@@ -209,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep)
209#define pte_mkyoung hv_pte_set_accessed 212#define pte_mkyoung hv_pte_set_accessed
210#define pte_mkwrite hv_pte_set_writable 213#define pte_mkwrite hv_pte_set_writable
211#define pte_mkhuge hv_pte_set_page 214#define pte_mkhuge hv_pte_set_page
215#define pte_mksuper hv_pte_set_super
212 216
213#define pte_special(pte) 0 217#define pte_special(pte) 0
214#define pte_mkspecial(pte) (pte) 218#define pte_mkspecial(pte) (pte)
@@ -338,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
338 */ 342 */
339#define pgd_offset_k(address) pgd_offset(&init_mm, address) 343#define pgd_offset_k(address) pgd_offset(&init_mm, address)
340 344
341#if defined(CONFIG_HIGHPTE)
342extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
343#define pte_unmap(pte) kunmap_atomic(pte)
344#else
345#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) 345#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
346#define pte_unmap(pte) do { } while (0) 346#define pte_unmap(pte) do { } while (0)
347#endif
348 347
349/* Clear a non-executable kernel PTE and flush it from the TLB. */ 348/* Clear a non-executable kernel PTE and flush it from the TLB. */
350#define kpte_clear_flush(ptep, vaddr) \ 349#define kpte_clear_flush(ptep, vaddr) \
@@ -537,7 +536,8 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
537/* Support /proc/NN/pgtable API. */ 536/* Support /proc/NN/pgtable API. */
538struct seq_file; 537struct seq_file;
539int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, 538int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
540 unsigned long vaddr, pte_t *ptep, void **datap); 539 unsigned long vaddr, unsigned long pagesize,
540 pte_t *ptep, void **datap);
541 541
542#endif /* !__ASSEMBLY__ */ 542#endif /* !__ASSEMBLY__ */
543 543
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
index 96199d214fb8..dcf91b25a1e5 100644
--- a/arch/tile/include/asm/tlbflush.h
+++ b/arch/tile/include/asm/tlbflush.h
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid);
38/* The hypervisor tells us what ASIDs are available to us. */ 38/* The hypervisor tells us what ASIDs are available to us. */
39extern int min_asid, max_asid; 39extern int min_asid, max_asid;
40 40
41static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
42{
43 return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
44}
45
46/* Pass as vma pointer for non-executable mapping, if no vma available. */ 41/* Pass as vma pointer for non-executable mapping, if no vma available. */
47#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) 42#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL)
48 43
49/* Flush a single user page on this cpu. */ 44/* Flush a single user page on this cpu. */
50static inline void local_flush_tlb_page(const struct vm_area_struct *vma, 45static inline void local_flush_tlb_page(struct vm_area_struct *vma,
51 unsigned long addr, 46 unsigned long addr,
52 unsigned long page_size) 47 unsigned long page_size)
53{ 48{
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
60} 55}
61 56
62/* Flush range of user pages on this cpu. */ 57/* Flush range of user pages on this cpu. */
63static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, 58static inline void local_flush_tlb_pages(struct vm_area_struct *vma,
64 unsigned long addr, 59 unsigned long addr,
65 unsigned long page_size, 60 unsigned long page_size,
66 unsigned long len) 61 unsigned long len)
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void);
117extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); 112extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
118extern void flush_tlb_current_task(void); 113extern void flush_tlb_current_task(void);
119extern void flush_tlb_mm(struct mm_struct *); 114extern void flush_tlb_mm(struct mm_struct *);
120extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); 115extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
121extern void flush_tlb_page_mm(const struct vm_area_struct *, 116extern void flush_tlb_page_mm(struct vm_area_struct *,
122 struct mm_struct *, unsigned long); 117 struct mm_struct *, unsigned long);
123extern void flush_tlb_range(const struct vm_area_struct *, 118extern void flush_tlb_range(struct vm_area_struct *,
124 unsigned long start, unsigned long end); 119 unsigned long start, unsigned long end);
125 120
126#define flush_tlb() flush_tlb_current_task() 121#define flush_tlb() flush_tlb_current_task()
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index f27871775b7a..85e5cab4c2f0 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -66,6 +66,22 @@
66#define HV_DEFAULT_PAGE_SIZE_LARGE \ 66#define HV_DEFAULT_PAGE_SIZE_LARGE \
67 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) 67 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE)
68 68
69#if CHIP_VA_WIDTH() > 32
70
71/** The log2 of the initial size of jumbo pages, in bytes.
72 * See HV_DEFAULT_PAGE_SIZE_JUMBO.
73 */
74#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32
75
76/** The initial size of jumbo pages, in bytes. This value should
77 * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO).
78 * It may also be modified when installing a new context.
79 */
80#define HV_DEFAULT_PAGE_SIZE_JUMBO \
81 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO)
82
83#endif
84
69/** The log2 of the granularity at which page tables must be aligned; 85/** The log2 of the granularity at which page tables must be aligned;
70 * in other words, the CPA for a page table must have this many zero 86 * in other words, the CPA for a page table must have this many zero
71 * bits at the bottom of the address. 87 * bits at the bottom of the address.
@@ -284,8 +300,11 @@
284#define HV_DISPATCH_GET_IPI_PTE 56 300#define HV_DISPATCH_GET_IPI_PTE 56
285#endif 301#endif
286 302
303/** hv_set_pte_super_shift */
304#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57
305
287/** One more than the largest dispatch value */ 306/** One more than the largest dispatch value */
288#define _HV_DISPATCH_END 57 307#define _HV_DISPATCH_END 58
289 308
290 309
291#ifndef __ASSEMBLER__ 310#ifndef __ASSEMBLER__
@@ -413,6 +432,11 @@ typedef enum {
413 */ 432 */
414 HV_SYSCONF_VALID_PAGE_SIZES = 7, 433 HV_SYSCONF_VALID_PAGE_SIZES = 7,
415 434
435 /** The size of jumbo pages, in bytes.
436 * If no jumbo pages are available, zero will be returned.
437 */
438 HV_SYSCONF_PAGE_SIZE_JUMBO = 8,
439
416} HV_SysconfQuery; 440} HV_SysconfQuery;
417 441
418/** Offset to subtract from returned Kelvin temperature to get degrees 442/** Offset to subtract from returned Kelvin temperature to get degrees
@@ -695,6 +719,29 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
695 719
696#ifndef __ASSEMBLER__ 720#ifndef __ASSEMBLER__
697 721
722
723/** Set the number of pages ganged together by HV_PTE_SUPER at a
724 * particular level of the page table.
725 *
726 * The current TILE-Gx hardware only supports powers of four
727 * (i.e. log2_count must be a multiple of two), and the requested
728 * "super" page size must be less than the span of the next level in
729 * the page table. The largest size that can be requested is 64GB.
730 *
731 * The shift value is initially "0" for all page table levels,
732 * indicating that the HV_PTE_SUPER bit is effectively ignored.
733 *
734 * If you change the count from one non-zero value to another, the
735 * hypervisor will flush the entire TLB and TSB to avoid confusion.
736 *
737 * @param level Page table level (0, 1, or 2)
738 * @param log2_count Base-2 log of the number of pages to gang together,
739 * i.e. how much to shift left the base page size for the super page size.
740 * @return Zero on success, or a hypervisor error code on failure.
741 */
742int hv_set_pte_super_shift(int level, int log2_count);
743
744
698/** Value returned from hv_inquire_context(). */ 745/** Value returned from hv_inquire_context(). */
699typedef struct 746typedef struct
700{ 747{
@@ -1891,8 +1938,9 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1891#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ 1938#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */
1892#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ 1939#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */
1893#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ 1940#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */
1894 /* Bits 13-15 are reserved for 1941 /* Bits 13-14 are reserved for
1895 future use. */ 1942 future use. */
1943#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */
1896#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ 1944#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */
1897#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ 1945#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */
1898#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ 1946#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */
@@ -1987,7 +2035,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1987 2035
1988/** Does this PTE map a page? 2036/** Does this PTE map a page?
1989 * 2037 *
1990 * If this bit is set in the level-1 page table, the entry should be 2038 * If this bit is set in a level-0 page table, the entry should be
2039 * interpreted as a level-2 page table entry mapping a jumbo page.
2040 *
2041 * If this bit is set in a level-1 page table, the entry should be
1991 * interpreted as a level-2 page table entry mapping a large page. 2042 * interpreted as a level-2 page table entry mapping a large page.
1992 * 2043 *
1993 * This bit should not be modified by the client while PRESENT is set, as 2044 * This bit should not be modified by the client while PRESENT is set, as
@@ -1997,6 +2048,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1997 */ 2048 */
1998#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) 2049#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
1999 2050
2051/** Does this PTE implicitly reference multiple pages?
2052 *
2053 * If this bit is set in the page table (either in the level-2 page table,
2054 * or in a higher level page table in conjunction with the PAGE bit)
2055 * then the PTE specifies a range of contiguous pages, not a single page.
2056 * The hv_set_pte_super_shift() allows you to specify the count for
2057 * each level of the page table.
2058 *
2059 * Note: this bit is not supported on TILEPro systems.
2060 */
2061#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER)
2062
2000/** Is this a global (non-ASID) mapping? 2063/** Is this a global (non-ASID) mapping?
2001 * 2064 *
2002 * If this bit is set, the translations established by this PTE will 2065 * If this bit is set, the translations established by this PTE will
@@ -2215,6 +2278,7 @@ hv_pte_clear_##name(HV_PTE pte) \
2215 */ 2278 */
2216_HV_BIT(present, PRESENT) 2279_HV_BIT(present, PRESENT)
2217_HV_BIT(page, PAGE) 2280_HV_BIT(page, PAGE)
2281_HV_BIT(super, SUPER)
2218_HV_BIT(client0, CLIENT0) 2282_HV_BIT(client0, CLIENT0)
2219_HV_BIT(client1, CLIENT1) 2283_HV_BIT(client1, CLIENT1)
2220_HV_BIT(client2, CLIENT2) 2284_HV_BIT(client2, CLIENT2)
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
index 2b7cd0a659a9..d44c5a67a1ed 100644
--- a/arch/tile/kernel/hvglue.lds
+++ b/arch/tile/kernel/hvglue.lds
@@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0;
55hv_inquire_realpa = TEXT_OFFSET + 0x106c0; 55hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
56hv_flush_all = TEXT_OFFSET + 0x106e0; 56hv_flush_all = TEXT_OFFSET + 0x106e0;
57hv_get_ipi_pte = TEXT_OFFSET + 0x10700; 57hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
58hv_glue_internals = TEXT_OFFSET + 0x10720; 58hv_set_pte_super_shift = TEXT_OFFSET + 0x10720;
59hv_glue_internals = TEXT_OFFSET + 0x10740;
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 446a7f52cc11..dafc447b5125 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -22,6 +22,7 @@
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/sysctl.h> 23#include <linux/sysctl.h>
24#include <linux/hardirq.h> 24#include <linux/hardirq.h>
25#include <linux/hugetlb.h>
25#include <linux/mman.h> 26#include <linux/mman.h>
26#include <asm/unaligned.h> 27#include <asm/unaligned.h>
27#include <asm/pgtable.h> 28#include <asm/pgtable.h>
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 32948e21113a..445c220eae51 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -28,6 +28,7 @@
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/timex.h> 30#include <linux/timex.h>
31#include <linux/hugetlb.h>
31#include <asm/setup.h> 32#include <asm/setup.h>
32#include <asm/sections.h> 33#include <asm/sections.h>
33#include <asm/cacheflush.h> 34#include <asm/cacheflush.h>
@@ -49,9 +50,6 @@ char chip_model[64] __write_once;
49struct pglist_data node_data[MAX_NUMNODES] __read_mostly; 50struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
50EXPORT_SYMBOL(node_data); 51EXPORT_SYMBOL(node_data);
51 52
52/* We only create bootmem data on node 0. */
53static bootmem_data_t __initdata node0_bdata;
54
55/* Information on the NUMA nodes that we compute early */ 53/* Information on the NUMA nodes that we compute early */
56unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; 54unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
57unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; 55unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
@@ -518,37 +516,96 @@ static void __init setup_memory(void)
518#endif 516#endif
519} 517}
520 518
521static void __init setup_bootmem_allocator(void) 519/*
520 * On 32-bit machines, we only put bootmem on the low controller,
521 * since PAs > 4GB can't be used in bootmem. In principle one could
522 * imagine, e.g., multiple 1 GB controllers all of which could support
523 * bootmem, but in practice using controllers this small isn't a
524 * particularly interesting scenario, so we just keep it simple and
525 * use only the first controller for bootmem on 32-bit machines.
526 */
527static inline int node_has_bootmem(int nid)
522{ 528{
523 unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; 529#ifdef CONFIG_64BIT
530 return 1;
531#else
532 return nid == 0;
533#endif
534}
524 535
525 /* Provide a node 0 bdata. */ 536static inline unsigned long alloc_bootmem_pfn(int nid,
526 NODE_DATA(0)->bdata = &node0_bdata; 537 unsigned long size,
538 unsigned long goal)
539{
540 void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
541 PAGE_SIZE, goal);
542 unsigned long pfn = kaddr_to_pfn(kva);
543 BUG_ON(goal && PFN_PHYS(pfn) != goal);
544 return pfn;
545}
527 546
528#ifdef CONFIG_PCI 547static void __init setup_bootmem_allocator_node(int i)
529 /* Don't let boot memory alias the PCI region. */ 548{
530 last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); 549 unsigned long start, end, mapsize, mapstart;
550
551 if (node_has_bootmem(i)) {
552 NODE_DATA(i)->bdata = &bootmem_node_data[i];
553 } else {
554 /* Share controller zero's bdata for now. */
555 NODE_DATA(i)->bdata = &bootmem_node_data[0];
556 return;
557 }
558
559 /* Skip up to after the bss in node 0. */
560 start = (i == 0) ? min_low_pfn : node_start_pfn[i];
561
562 /* Only lowmem, if we're a HIGHMEM build. */
563#ifdef CONFIG_HIGHMEM
564 end = node_lowmem_end_pfn[i];
531#else 565#else
532 last_alloc_pfn = max_low_pfn; 566 end = node_end_pfn[i];
533#endif 567#endif
534 568
535 /* 569 /* No memory here. */
536 * Initialize the boot-time allocator (with low memory only): 570 if (end == start)
537 * The first argument says where to put the bitmap, and the 571 return;
538 * second says where the end of allocatable memory is. 572
539 */ 573 /* Figure out where the bootmem bitmap is located. */
540 bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); 574 mapsize = bootmem_bootmap_pages(end - start);
575 if (i == 0) {
576 /* Use some space right before the heap on node 0. */
577 mapstart = start;
578 start += mapsize;
579 } else {
580 /* Allocate bitmap on node 0 to avoid page table issues. */
581 mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
582 }
541 583
584 /* Initialize a node. */
585 init_bootmem_node(NODE_DATA(i), mapstart, start, end);
586
587 /* Free all the space back into the allocator. */
588 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
589
590#if defined(CONFIG_PCI)
542 /* 591 /*
543 * Let the bootmem allocator use all the space we've given it 592 * Throw away any memory aliased by the PCI region. FIXME: this
544 * except for its own bitmap. 593 * is a temporary hack to work around bug 10502, and needs to be
594 * fixed properly.
545 */ 595 */
546 first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); 596 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
547 if (first_alloc_pfn >= last_alloc_pfn) 597 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
548 early_panic("Not enough memory on controller 0 for bootmem\n"); 598 PFN_PHYS(pci_reserve_end_pfn -
599 pci_reserve_start_pfn),
600 BOOTMEM_EXCLUSIVE);
601#endif
602}
549 603
550 free_bootmem(PFN_PHYS(first_alloc_pfn), 604static void __init setup_bootmem_allocator(void)
551 PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); 605{
606 int i;
607 for (i = 0; i < MAX_NUMNODES; ++i)
608 setup_bootmem_allocator_node(i);
552 609
553#ifdef CONFIG_KEXEC 610#ifdef CONFIG_KEXEC
554 if (crashk_res.start != crashk_res.end) 611 if (crashk_res.start != crashk_res.end)
@@ -579,14 +636,6 @@ static int __init percpu_size(void)
579 return size; 636 return size;
580} 637}
581 638
582static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
583{
584 void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
585 unsigned long pfn = kaddr_to_pfn(kva);
586 BUG_ON(goal && PFN_PHYS(pfn) != goal);
587 return pfn;
588}
589
590static void __init zone_sizes_init(void) 639static void __init zone_sizes_init(void)
591{ 640{
592 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 641 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
@@ -624,21 +673,22 @@ static void __init zone_sizes_init(void)
624 * though, there'll be no lowmem, so we just alloc_bootmem 673 * though, there'll be no lowmem, so we just alloc_bootmem
625 * the memmap. There will be no percpu memory either. 674 * the memmap. There will be no percpu memory either.
626 */ 675 */
627 if (__pfn_to_highbits(start) == 0) { 676 if (i != 0 && cpu_isset(i, isolnodes)) {
628 /* In low PAs, allocate via bootmem. */ 677 node_memmap_pfn[i] =
678 alloc_bootmem_pfn(0, memmap_size, 0);
679 BUG_ON(node_percpu[i] != 0);
680 } else if (node_has_bootmem(start)) {
629 unsigned long goal = 0; 681 unsigned long goal = 0;
630 node_memmap_pfn[i] = 682 node_memmap_pfn[i] =
631 alloc_bootmem_pfn(memmap_size, goal); 683 alloc_bootmem_pfn(i, memmap_size, 0);
632 if (kdata_huge) 684 if (kdata_huge)
633 goal = PFN_PHYS(lowmem_end) - node_percpu[i]; 685 goal = PFN_PHYS(lowmem_end) - node_percpu[i];
634 if (node_percpu[i]) 686 if (node_percpu[i])
635 node_percpu_pfn[i] = 687 node_percpu_pfn[i] =
636 alloc_bootmem_pfn(node_percpu[i], goal); 688 alloc_bootmem_pfn(i, node_percpu[i],
637 } else if (cpu_isset(i, isolnodes)) { 689 goal);
638 node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
639 BUG_ON(node_percpu[i] != 0);
640 } else { 690 } else {
641 /* In high PAs, just reserve some pages. */ 691 /* In non-bootmem zones, just reserve some pages. */
642 node_memmap_pfn[i] = node_free_pfn[i]; 692 node_memmap_pfn[i] = node_free_pfn[i];
643 node_free_pfn[i] += PFN_UP(memmap_size); 693 node_free_pfn[i] += PFN_UP(memmap_size);
644 if (!kdata_huge) { 694 if (!kdata_huge) {
@@ -662,16 +712,9 @@ static void __init zone_sizes_init(void)
662 zones_size[ZONE_NORMAL] = end - start; 712 zones_size[ZONE_NORMAL] = end - start;
663#endif 713#endif
664 714
665 /* 715 /* Take zone metadata from controller 0 if we're isolnode. */
666 * Everyone shares node 0's bootmem allocator, but 716 if (node_isset(i, isolnodes))
667 * we use alloc_remap(), above, to put the actual 717 NODE_DATA(i)->bdata = &bootmem_node_data[0];
668 * struct page array on the individual controllers,
669 * which is most of the data that we actually care about.
670 * We can't place bootmem allocators on the other
671 * controllers since the bootmem allocator can only
672 * operate on 32-bit physical addresses.
673 */
674 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
675 718
676 free_area_init_node(i, zones_size, start, NULL); 719 free_area_init_node(i, zones_size, start, NULL);
677 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", 720 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
@@ -854,6 +897,22 @@ subsys_initcall(topology_init);
854 897
855#endif /* CONFIG_NUMA */ 898#endif /* CONFIG_NUMA */
856 899
900/*
901 * Initialize hugepage support on this cpu. We do this on all cores
902 * early in boot: before argument parsing for the boot cpu, and after
903 * argument parsing but before the init functions run on the secondaries.
904 * So the values we set up here in the hypervisor may be overridden on
905 * the boot cpu as arguments are parsed.
906 */
907static __cpuinit void init_super_pages(void)
908{
909#ifdef CONFIG_HUGETLB_SUPER_PAGES
910 int i;
911 for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
912 hv_set_pte_super_shift(i, huge_shift[i]);
913#endif
914}
915
857/** 916/**
858 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. 917 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
859 * @boot: Is this the boot cpu? 918 * @boot: Is this the boot cpu?
@@ -908,6 +967,8 @@ void __cpuinit setup_cpu(int boot)
908 /* Reset the network state on this cpu. */ 967 /* Reset the network state on this cpu. */
909 reset_network_state(); 968 reset_network_state();
910#endif 969#endif
970
971 init_super_pages();
911} 972}
912 973
913#ifdef CONFIG_BLK_DEV_INITRD 974#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
index a5f241c24cac..3fd54d5bbd4c 100644
--- a/arch/tile/kernel/tlb.c
+++ b/arch/tile/kernel/tlb.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/hugetlb.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
19#include <asm/homecache.h> 20#include <asm/homecache.h>
20#include <hv/hypervisor.h> 21#include <hv/hypervisor.h>
@@ -49,25 +50,25 @@ void flush_tlb_current_task(void)
49 flush_tlb_mm(current->mm); 50 flush_tlb_mm(current->mm);
50} 51}
51 52
52void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, 53void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm,
53 unsigned long va) 54 unsigned long va)
54{ 55{
55 unsigned long size = hv_page_size(vma); 56 unsigned long size = vma_kernel_pagesize(vma);
56 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; 57 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
57 flush_remote(0, cache, mm_cpumask(mm), 58 flush_remote(0, cache, mm_cpumask(mm),
58 va, size, size, mm_cpumask(mm), NULL, 0); 59 va, size, size, mm_cpumask(mm), NULL, 0);
59} 60}
60 61
61void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) 62void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
62{ 63{
63 flush_tlb_page_mm(vma, vma->vm_mm, va); 64 flush_tlb_page_mm(vma, vma->vm_mm, va);
64} 65}
65EXPORT_SYMBOL(flush_tlb_page); 66EXPORT_SYMBOL(flush_tlb_page);
66 67
67void flush_tlb_range(const struct vm_area_struct *vma, 68void flush_tlb_range(struct vm_area_struct *vma,
68 unsigned long start, unsigned long end) 69 unsigned long start, unsigned long end)
69{ 70{
70 unsigned long size = hv_page_size(vma); 71 unsigned long size = vma_kernel_pagesize(vma);
71 struct mm_struct *mm = vma->vm_mm; 72 struct mm_struct *mm = vma->vm_mm;
72 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; 73 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
73 flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, 74 flush_remote(0, cache, mm_cpumask(mm), start, end - start, size,
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 22e58f51ed23..54f18fc25ed0 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -187,7 +187,7 @@ static pgd_t *get_current_pgd(void)
187 HV_Context ctx = hv_inquire_context(); 187 HV_Context ctx = hv_inquire_context();
188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; 188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
189 struct page *pgd_page = pfn_to_page(pgd_pfn); 189 struct page *pgd_page = pfn_to_page(pgd_pfn);
190 BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ 190 BUG_ON(PageHighMem(pgd_page));
191 return (pgd_t *) __va(ctx.page_table); 191 return (pgd_t *) __va(ctx.page_table);
192} 192}
193 193
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 499f73770b05..dbcbdf7b8aa8 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -30,6 +30,7 @@
30#include <linux/cache.h> 30#include <linux/cache.h>
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/hugetlb.h>
33 34
34#include <asm/page.h> 35#include <asm/page.h>
35#include <asm/sections.h> 36#include <asm/sections.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 42cfcba4e1ef..812e2d037972 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -27,85 +27,161 @@
27#include <linux/mman.h> 27#include <linux/mman.h>
28#include <asm/tlb.h> 28#include <asm/tlb.h>
29#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
30#include <asm/setup.h>
31
32#ifdef CONFIG_HUGETLB_SUPER_PAGES
33
34/*
35 * Provide an additional huge page size (in addition to the regular default
36 * huge page size) if no "hugepagesz" arguments are specified.
37 * Note that it must be smaller than the default huge page size so
38 * that it's possible to allocate them on demand from the buddy allocator.
39 * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40 * or not define it at all.
41 */
42#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43
44/* "Extra" page-size multipliers, one per level of the page table. */
45int huge_shift[HUGE_SHIFT_ENTRIES] = {
46#ifdef ADDITIONAL_HUGE_SIZE
47#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48 [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49#endif
50};
51
52/*
53 * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54 * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55 * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56 * but otherwise allocate the page table using the kernel versions.
57 */
58static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59 unsigned long address)
60{
61 pte_t *new;
62
63 if (pmd_none(*pmd)) {
64 new = pte_alloc_one_kernel(mm, address);
65 if (!new)
66 return NULL;
67
68 smp_wmb(); /* See comment in __pte_alloc */
69
70 spin_lock(&mm->page_table_lock);
71 if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
72 mm->nr_ptes++;
73 pmd_populate_kernel(mm, pmd, new);
74 new = NULL;
75 } else
76 VM_BUG_ON(pmd_trans_splitting(*pmd));
77 spin_unlock(&mm->page_table_lock);
78 if (new)
79 pte_free_kernel(mm, new);
80 }
81
82 return pte_offset_kernel(pmd, address);
83}
84#endif
30 85
31pte_t *huge_pte_alloc(struct mm_struct *mm, 86pte_t *huge_pte_alloc(struct mm_struct *mm,
32 unsigned long addr, unsigned long sz) 87 unsigned long addr, unsigned long sz)
33{ 88{
34 pgd_t *pgd; 89 pgd_t *pgd;
35 pud_t *pud; 90 pud_t *pud;
36 pte_t *pte = NULL;
37 91
38 /* We do not yet support multiple huge page sizes. */ 92 addr &= -sz; /* Mask off any low bits in the address. */
39 BUG_ON(sz != PMD_SIZE);
40 93
41 pgd = pgd_offset(mm, addr); 94 pgd = pgd_offset(mm, addr);
42 pud = pud_alloc(mm, pgd, addr); 95 pud = pud_alloc(mm, pgd, addr);
43 if (pud)
44 pte = (pte_t *) pmd_alloc(mm, pud, addr);
45 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
46 96
47 return pte; 97#ifdef CONFIG_HUGETLB_SUPER_PAGES
98 if (sz >= PGDIR_SIZE) {
99 BUG_ON(sz != PGDIR_SIZE &&
100 sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101 return (pte_t *)pud;
102 } else {
103 pmd_t *pmd = pmd_alloc(mm, pud, addr);
104 if (sz >= PMD_SIZE) {
105 BUG_ON(sz != PMD_SIZE &&
106 sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107 return (pte_t *)pmd;
108 }
109 else {
110 if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111 panic("Unexpected page size %#lx\n", sz);
112 return pte_alloc_hugetlb(mm, pmd, addr);
113 }
114 }
115#else
116 BUG_ON(sz != PMD_SIZE);
117 return (pte_t *) pmd_alloc(mm, pud, addr);
118#endif
48} 119}
49 120
50pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 121static pte_t *get_pte(pte_t *base, int index, int level)
51{ 122{
52 pgd_t *pgd; 123 pte_t *ptep = base + index;
53 pud_t *pud; 124#ifdef CONFIG_HUGETLB_SUPER_PAGES
54 pmd_t *pmd = NULL; 125 if (!pte_present(*ptep) && huge_shift[level] != 0) {
55 126 unsigned long mask = -1UL << huge_shift[level];
56 pgd = pgd_offset(mm, addr); 127 pte_t *super_ptep = base + (index & mask);
57 if (pgd_present(*pgd)) { 128 pte_t pte = *super_ptep;
58 pud = pud_offset(pgd, addr); 129 if (pte_present(pte) && pte_super(pte))
59 if (pud_present(*pud)) 130 ptep = super_ptep;
60 pmd = pmd_offset(pud, addr);
61 } 131 }
62 return (pte_t *) pmd; 132#endif
133 return ptep;
63} 134}
64 135
65#ifdef HUGETLB_TEST 136pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
66struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
67 int write)
68{ 137{
69 unsigned long start = address; 138 pgd_t *pgd;
70 int length = 1; 139 pud_t *pud;
71 int nr; 140 pmd_t *pmd;
72 struct page *page; 141#ifdef CONFIG_HUGETLB_SUPER_PAGES
73 struct vm_area_struct *vma; 142 pte_t *pte;
74 143#endif
75 vma = find_vma(mm, addr);
76 if (!vma || !is_vm_hugetlb_page(vma))
77 return ERR_PTR(-EINVAL);
78
79 pte = huge_pte_offset(mm, address);
80 144
81 /* hugetlb should be locked, and hence, prefaulted */ 145 /* Get the top-level page table entry. */
82 WARN_ON(!pte || pte_none(*pte)); 146 pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147 if (!pgd_present(*pgd))
148 return NULL;
83 149
84 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 150 /* We don't have four levels. */
151 pud = pud_offset(pgd, addr);
152#ifndef __PAGETABLE_PUD_FOLDED
153# error support fourth page table level
154#endif
85 155
86 WARN_ON(!PageHead(page)); 156 /* Check for an L0 huge PTE, if we have three levels. */
157#ifndef __PAGETABLE_PMD_FOLDED
158 if (pud_huge(*pud))
159 return (pte_t *)pud;
87 160
88 return page; 161 pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
89} 162 pmd_index(addr), 1);
90 163 if (!pmd_present(*pmd))
91int pmd_huge(pmd_t pmd) 164 return NULL;
92{ 165#else
93 return 0; 166 pmd = pmd_offset(pud, addr);
94} 167#endif
95 168
96int pud_huge(pud_t pud) 169 /* Check for an L1 huge PTE. */
97{ 170 if (pmd_huge(*pmd))
98 return 0; 171 return (pte_t *)pmd;
99} 172
173#ifdef CONFIG_HUGETLB_SUPER_PAGES
174 /* Check for an L2 huge PTE. */
175 pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176 if (!pte_present(*pte))
177 return NULL;
178 if (pte_super(*pte))
179 return pte;
180#endif
100 181
101struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
102 pmd_t *pmd, int write)
103{
104 return NULL; 182 return NULL;
105} 183}
106 184
107#else
108
109struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 185struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
110 int write) 186 int write)
111{ 187{
@@ -149,8 +225,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
149 return 0; 225 return 0;
150} 226}
151 227
152#endif
153
154#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 228#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
155static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, 229static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
156 unsigned long addr, unsigned long len, 230 unsigned long addr, unsigned long len,
@@ -322,21 +396,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
322 return hugetlb_get_unmapped_area_topdown(file, addr, len, 396 return hugetlb_get_unmapped_area_topdown(file, addr, len,
323 pgoff, flags); 397 pgoff, flags);
324} 398}
399#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
325 400
326static __init int setup_hugepagesz(char *opt) 401#ifdef CONFIG_HUGETLB_SUPER_PAGES
402static __init int __setup_hugepagesz(unsigned long ps)
327{ 403{
328 unsigned long ps = memparse(opt, &opt); 404 int log_ps = __builtin_ctzl(ps);
329 if (ps == PMD_SIZE) { 405 int level, base_shift;
330 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 406
331 } else if (ps == PUD_SIZE) { 407 if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
332 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 408 pr_warn("Not enabling %ld byte huge pages;"
409 " must be a power of four.\n", ps);
410 return -EINVAL;
411 }
412
413 if (ps > 64*1024*1024*1024UL) {
414 pr_warn("Not enabling %ld MB huge pages;"
415 " largest legal value is 64 GB .\n", ps >> 20);
416 return -EINVAL;
417 } else if (ps >= PUD_SIZE) {
418 static long hv_jpage_size;
419 if (hv_jpage_size == 0)
420 hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
421 if (hv_jpage_size != PUD_SIZE) {
422 pr_warn("Not enabling >= %ld MB huge pages:"
423 " hypervisor reports size %ld\n",
424 PUD_SIZE >> 20, hv_jpage_size);
425 return -EINVAL;
426 }
427 level = 0;
428 base_shift = PUD_SHIFT;
429 } else if (ps >= PMD_SIZE) {
430 level = 1;
431 base_shift = PMD_SHIFT;
432 } else if (ps > PAGE_SIZE) {
433 level = 2;
434 base_shift = PAGE_SHIFT;
333 } else { 435 } else {
334 pr_err("hugepagesz: Unsupported page size %lu M\n", 436 pr_err("hugepagesz: huge page size %ld too small\n", ps);
335 ps >> 20); 437 return -EINVAL;
336 return 0;
337 } 438 }
338 return 1; 439
440 if (log_ps != base_shift) {
441 int shift_val = log_ps - base_shift;
442 if (huge_shift[level] != 0) {
443 int old_shift = base_shift + huge_shift[level];
444 pr_warn("Not enabling %ld MB huge pages;"
445 " already have size %ld MB.\n",
446 ps >> 20, (1UL << old_shift) >> 20);
447 return -EINVAL;
448 }
449 if (hv_set_pte_super_shift(level, shift_val) != 0) {
450 pr_warn("Not enabling %ld MB huge pages;"
451 " no hypervisor support.\n", ps >> 20);
452 return -EINVAL;
453 }
454 printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
455 huge_shift[level] = shift_val;
456 }
457
458 hugetlb_add_hstate(log_ps - PAGE_SHIFT);
459
460 return 0;
461}
462
463static bool saw_hugepagesz;
464
465static __init int setup_hugepagesz(char *opt)
466{
467 if (!saw_hugepagesz) {
468 saw_hugepagesz = true;
469 memset(huge_shift, 0, sizeof(huge_shift));
470 }
471 return __setup_hugepagesz(memparse(opt, NULL));
339} 472}
340__setup("hugepagesz=", setup_hugepagesz); 473__setup("hugepagesz=", setup_hugepagesz);
341 474
342#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ 475#ifdef ADDITIONAL_HUGE_SIZE
476/*
477 * Provide an additional huge page size if no "hugepagesz" args are given.
478 * In that case, all the cores have properly set up their hv super_shift
479 * already, but we need to notify the hugetlb code to enable the
480 * new huge page size from the Linux point of view.
481 */
482static __init int add_default_hugepagesz(void)
483{
484 if (!saw_hugepagesz) {
485 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
486 ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
487 BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
488 ADDITIONAL_HUGE_SIZE);
489 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
490 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
491 }
492 return 0;
493}
494arch_initcall(add_default_hugepagesz);
495#endif
496
497#endif /* CONFIG_HUGETLB_SUPER_PAGES */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index c04fbfd93fc5..630dd2ce2afe 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -698,6 +698,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
698#endif /* CONFIG_HIGHMEM */ 698#endif /* CONFIG_HIGHMEM */
699 699
700 700
701#ifndef CONFIG_64BIT
701static void __init init_free_pfn_range(unsigned long start, unsigned long end) 702static void __init init_free_pfn_range(unsigned long start, unsigned long end)
702{ 703{
703 unsigned long pfn; 704 unsigned long pfn;
@@ -770,6 +771,7 @@ static void __init set_non_bootmem_pages_init(void)
770 init_free_pfn_range(start, end); 771 init_free_pfn_range(start, end);
771 } 772 }
772} 773}
774#endif
773 775
774/* 776/*
775 * paging_init() sets up the page tables - note that all of lowmem is 777 * paging_init() sets up the page tables - note that all of lowmem is
@@ -858,8 +860,10 @@ void __init mem_init(void)
858 /* this will put all bootmem onto the freelists */ 860 /* this will put all bootmem onto the freelists */
859 totalram_pages += free_all_bootmem(); 861 totalram_pages += free_all_bootmem();
860 862
863#ifndef CONFIG_64BIT
861 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ 864 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */
862 set_non_bootmem_pages_init(); 865 set_non_bootmem_pages_init();
866#endif
863 867
864 codesize = (unsigned long)&_etext - (unsigned long)&_text; 868 codesize = (unsigned long)&_etext - (unsigned long)&_text;
865 datasize = (unsigned long)&_end - (unsigned long)&_sdata; 869 datasize = (unsigned long)&_end - (unsigned long)&_sdata;
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 3d7074347e6d..345edfed9fcd 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -132,15 +132,6 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
133} 133}
134 134
135#if defined(CONFIG_HIGHPTE)
136pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
137{
138 pte_t *pte = kmap_atomic(pmd_page(*dir)) +
139 (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
140 return &pte[pte_index(address)];
141}
142#endif
143
144/** 135/**
145 * shatter_huge_page() - ensure a given address is mapped by a small page. 136 * shatter_huge_page() - ensure a given address is mapped by a small page.
146 * 137 *
@@ -296,10 +287,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
296 struct page *p; 287 struct page *p;
297 int i; 288 int i;
298 289
299#ifdef CONFIG_HIGHPTE
300 flags |= __GFP_HIGHMEM;
301#endif
302
303 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); 290 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
304 if (p == NULL) 291 if (p == NULL)
305 return NULL; 292 return NULL;