aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/tile/Kconfig8
-rw-r--r--arch/tile/include/asm/hugetlb.h21
-rw-r--r--arch/tile/include/asm/page.h5
-rw-r--r--arch/tile/include/asm/pgtable.h12
-rw-r--r--arch/tile/include/asm/tlbflush.h17
-rw-r--r--arch/tile/include/hv/hypervisor.h70
-rw-r--r--arch/tile/kernel/hvglue.lds3
-rw-r--r--arch/tile/kernel/proc.c1
-rw-r--r--arch/tile/kernel/setup.c161
-rw-r--r--arch/tile/kernel/tlb.c11
-rw-r--r--arch/tile/mm/fault.c2
-rw-r--r--arch/tile/mm/homecache.c1
-rw-r--r--arch/tile/mm/hugetlbpage.c285
-rw-r--r--arch/tile/mm/init.c4
-rw-r--r--arch/tile/mm/pgtable.c13
15 files changed, 456 insertions, 158 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 38c3957e0b40..cc5664286a1c 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -47,6 +47,14 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK
47config SYS_SUPPORTS_HUGETLBFS 47config SYS_SUPPORTS_HUGETLBFS
48 def_bool y 48 def_bool y
49 49
50# Support for additional huge page sizes besides HPAGE_SIZE.
51# The software support is currently only present in the TILE-Gx
52# hypervisor. TILEPro in any case does not support page sizes
53# larger than the default HPAGE_SIZE.
54config HUGETLB_SUPER_PAGES
55 depends on HUGETLB_PAGE && TILEGX
56 def_bool y
57
50config GENERIC_CLOCKEVENTS 58config GENERIC_CLOCKEVENTS
51 def_bool y 59 def_bool y
52 60
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index d396d1805163..b2042380a5aa 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,4 +106,25 @@ static inline void arch_release_hugepage(struct page *page)
106{ 106{
107} 107}
108 108
109#ifdef CONFIG_HUGETLB_SUPER_PAGES
110static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
111 struct page *page, int writable)
112{
113 size_t pagesize = huge_page_size(hstate_vma(vma));
114 if (pagesize != PUD_SIZE && pagesize != PMD_SIZE)
115 entry = pte_mksuper(entry);
116 return entry;
117}
118#define arch_make_huge_pte arch_make_huge_pte
119
120/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */
121enum {
122 HUGE_SHIFT_PGDIR = 0,
123 HUGE_SHIFT_PMD = 1,
124 HUGE_SHIFT_PAGE = 2,
125 HUGE_SHIFT_ENTRIES
126};
127extern int huge_shift[HUGE_SHIFT_ENTRIES];
128#endif
129
109#endif /* _ASM_TILE_HUGETLB_H */ 130#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index c750943f961e..9d9131e5c552 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -87,8 +87,7 @@ typedef HV_PTE pgprot_t;
87/* 87/*
88 * User L2 page tables are managed as one L2 page table per page, 88 * User L2 page tables are managed as one L2 page table per page,
89 * because we use the page allocator for them. This keeps the allocation 89 * because we use the page allocator for them. This keeps the allocation
90 * simple and makes it potentially useful to implement HIGHPTE at some point. 90 * simple, but it's also inefficient, since L2 page tables are much smaller
91 * However, it's also inefficient, since L2 page tables are much smaller
92 * than pages (currently 2KB vs 64KB). So we should revisit this. 91 * than pages (currently 2KB vs 64KB). So we should revisit this.
93 */ 92 */
94typedef struct page *pgtable_t; 93typedef struct page *pgtable_t;
@@ -137,7 +136,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
137 136
138#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 137#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
139 138
140#define HUGE_MAX_HSTATE 2 139#define HUGE_MAX_HSTATE 6
141 140
142#ifdef CONFIG_HUGETLB_PAGE 141#ifdef CONFIG_HUGETLB_PAGE
143#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 142#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 319f4826d972..73b1a4c9ad03 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -71,6 +71,7 @@ extern void set_page_homes(void);
71 71
72#define _PAGE_PRESENT HV_PTE_PRESENT 72#define _PAGE_PRESENT HV_PTE_PRESENT
73#define _PAGE_HUGE_PAGE HV_PTE_PAGE 73#define _PAGE_HUGE_PAGE HV_PTE_PAGE
74#define _PAGE_SUPER_PAGE HV_PTE_SUPER
74#define _PAGE_READABLE HV_PTE_READABLE 75#define _PAGE_READABLE HV_PTE_READABLE
75#define _PAGE_WRITABLE HV_PTE_WRITABLE 76#define _PAGE_WRITABLE HV_PTE_WRITABLE
76#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE 77#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE
@@ -87,6 +88,7 @@ extern void set_page_homes(void);
87#define _PAGE_ALL (\ 88#define _PAGE_ALL (\
88 _PAGE_PRESENT | \ 89 _PAGE_PRESENT | \
89 _PAGE_HUGE_PAGE | \ 90 _PAGE_HUGE_PAGE | \
91 _PAGE_SUPER_PAGE | \
90 _PAGE_READABLE | \ 92 _PAGE_READABLE | \
91 _PAGE_WRITABLE | \ 93 _PAGE_WRITABLE | \
92 _PAGE_EXECUTABLE | \ 94 _PAGE_EXECUTABLE | \
@@ -197,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep)
197#define pte_write hv_pte_get_writable 199#define pte_write hv_pte_get_writable
198#define pte_exec hv_pte_get_executable 200#define pte_exec hv_pte_get_executable
199#define pte_huge hv_pte_get_page 201#define pte_huge hv_pte_get_page
202#define pte_super hv_pte_get_super
200#define pte_rdprotect hv_pte_clear_readable 203#define pte_rdprotect hv_pte_clear_readable
201#define pte_exprotect hv_pte_clear_executable 204#define pte_exprotect hv_pte_clear_executable
202#define pte_mkclean hv_pte_clear_dirty 205#define pte_mkclean hv_pte_clear_dirty
@@ -209,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep)
209#define pte_mkyoung hv_pte_set_accessed 212#define pte_mkyoung hv_pte_set_accessed
210#define pte_mkwrite hv_pte_set_writable 213#define pte_mkwrite hv_pte_set_writable
211#define pte_mkhuge hv_pte_set_page 214#define pte_mkhuge hv_pte_set_page
215#define pte_mksuper hv_pte_set_super
212 216
213#define pte_special(pte) 0 217#define pte_special(pte) 0
214#define pte_mkspecial(pte) (pte) 218#define pte_mkspecial(pte) (pte)
@@ -338,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
338 */ 342 */
339#define pgd_offset_k(address) pgd_offset(&init_mm, address) 343#define pgd_offset_k(address) pgd_offset(&init_mm, address)
340 344
341#if defined(CONFIG_HIGHPTE)
342extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
343#define pte_unmap(pte) kunmap_atomic(pte)
344#else
345#define pte_offset_map(dir, address) pte_offset_kernel(dir, address) 345#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
346#define pte_unmap(pte) do { } while (0) 346#define pte_unmap(pte) do { } while (0)
347#endif
348 347
349/* Clear a non-executable kernel PTE and flush it from the TLB. */ 348/* Clear a non-executable kernel PTE and flush it from the TLB. */
350#define kpte_clear_flush(ptep, vaddr) \ 349#define kpte_clear_flush(ptep, vaddr) \
@@ -537,7 +536,8 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
537/* Support /proc/NN/pgtable API. */ 536/* Support /proc/NN/pgtable API. */
538struct seq_file; 537struct seq_file;
539int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, 538int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
540 unsigned long vaddr, pte_t *ptep, void **datap); 539 unsigned long vaddr, unsigned long pagesize,
540 pte_t *ptep, void **datap);
541 541
542#endif /* !__ASSEMBLY__ */ 542#endif /* !__ASSEMBLY__ */
543 543
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
index 96199d214fb8..dcf91b25a1e5 100644
--- a/arch/tile/include/asm/tlbflush.h
+++ b/arch/tile/include/asm/tlbflush.h
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid);
38/* The hypervisor tells us what ASIDs are available to us. */ 38/* The hypervisor tells us what ASIDs are available to us. */
39extern int min_asid, max_asid; 39extern int min_asid, max_asid;
40 40
41static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
42{
43 return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
44}
45
46/* Pass as vma pointer for non-executable mapping, if no vma available. */ 41/* Pass as vma pointer for non-executable mapping, if no vma available. */
47#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) 42#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL)
48 43
49/* Flush a single user page on this cpu. */ 44/* Flush a single user page on this cpu. */
50static inline void local_flush_tlb_page(const struct vm_area_struct *vma, 45static inline void local_flush_tlb_page(struct vm_area_struct *vma,
51 unsigned long addr, 46 unsigned long addr,
52 unsigned long page_size) 47 unsigned long page_size)
53{ 48{
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
60} 55}
61 56
62/* Flush range of user pages on this cpu. */ 57/* Flush range of user pages on this cpu. */
63static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, 58static inline void local_flush_tlb_pages(struct vm_area_struct *vma,
64 unsigned long addr, 59 unsigned long addr,
65 unsigned long page_size, 60 unsigned long page_size,
66 unsigned long len) 61 unsigned long len)
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void);
117extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); 112extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
118extern void flush_tlb_current_task(void); 113extern void flush_tlb_current_task(void);
119extern void flush_tlb_mm(struct mm_struct *); 114extern void flush_tlb_mm(struct mm_struct *);
120extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); 115extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
121extern void flush_tlb_page_mm(const struct vm_area_struct *, 116extern void flush_tlb_page_mm(struct vm_area_struct *,
122 struct mm_struct *, unsigned long); 117 struct mm_struct *, unsigned long);
123extern void flush_tlb_range(const struct vm_area_struct *, 118extern void flush_tlb_range(struct vm_area_struct *,
124 unsigned long start, unsigned long end); 119 unsigned long start, unsigned long end);
125 120
126#define flush_tlb() flush_tlb_current_task() 121#define flush_tlb() flush_tlb_current_task()
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h
index f27871775b7a..85e5cab4c2f0 100644
--- a/arch/tile/include/hv/hypervisor.h
+++ b/arch/tile/include/hv/hypervisor.h
@@ -66,6 +66,22 @@
66#define HV_DEFAULT_PAGE_SIZE_LARGE \ 66#define HV_DEFAULT_PAGE_SIZE_LARGE \
67 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) 67 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE)
68 68
69#if CHIP_VA_WIDTH() > 32
70
71/** The log2 of the initial size of jumbo pages, in bytes.
72 * See HV_DEFAULT_PAGE_SIZE_JUMBO.
73 */
74#define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32
75
76/** The initial size of jumbo pages, in bytes. This value should
77 * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO).
78 * It may also be modified when installing a new context.
79 */
80#define HV_DEFAULT_PAGE_SIZE_JUMBO \
81 (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO)
82
83#endif
84
69/** The log2 of the granularity at which page tables must be aligned; 85/** The log2 of the granularity at which page tables must be aligned;
70 * in other words, the CPA for a page table must have this many zero 86 * in other words, the CPA for a page table must have this many zero
71 * bits at the bottom of the address. 87 * bits at the bottom of the address.
@@ -284,8 +300,11 @@
284#define HV_DISPATCH_GET_IPI_PTE 56 300#define HV_DISPATCH_GET_IPI_PTE 56
285#endif 301#endif
286 302
303/** hv_set_pte_super_shift */
304#define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57
305
287/** One more than the largest dispatch value */ 306/** One more than the largest dispatch value */
288#define _HV_DISPATCH_END 57 307#define _HV_DISPATCH_END 58
289 308
290 309
291#ifndef __ASSEMBLER__ 310#ifndef __ASSEMBLER__
@@ -413,6 +432,11 @@ typedef enum {
413 */ 432 */
414 HV_SYSCONF_VALID_PAGE_SIZES = 7, 433 HV_SYSCONF_VALID_PAGE_SIZES = 7,
415 434
435 /** The size of jumbo pages, in bytes.
436 * If no jumbo pages are available, zero will be returned.
437 */
438 HV_SYSCONF_PAGE_SIZE_JUMBO = 8,
439
416} HV_SysconfQuery; 440} HV_SysconfQuery;
417 441
418/** Offset to subtract from returned Kelvin temperature to get degrees 442/** Offset to subtract from returned Kelvin temperature to get degrees
@@ -695,6 +719,29 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid,
695 719
696#ifndef __ASSEMBLER__ 720#ifndef __ASSEMBLER__
697 721
722
723/** Set the number of pages ganged together by HV_PTE_SUPER at a
724 * particular level of the page table.
725 *
726 * The current TILE-Gx hardware only supports powers of four
727 * (i.e. log2_count must be a multiple of two), and the requested
728 * "super" page size must be less than the span of the next level in
729 * the page table. The largest size that can be requested is 64GB.
730 *
731 * The shift value is initially "0" for all page table levels,
732 * indicating that the HV_PTE_SUPER bit is effectively ignored.
733 *
734 * If you change the count from one non-zero value to another, the
735 * hypervisor will flush the entire TLB and TSB to avoid confusion.
736 *
737 * @param level Page table level (0, 1, or 2)
738 * @param log2_count Base-2 log of the number of pages to gang together,
739 * i.e. how much to shift left the base page size for the super page size.
740 * @return Zero on success, or a hypervisor error code on failure.
741 */
742int hv_set_pte_super_shift(int level, int log2_count);
743
744
698/** Value returned from hv_inquire_context(). */ 745/** Value returned from hv_inquire_context(). */
699typedef struct 746typedef struct
700{ 747{
@@ -1891,8 +1938,9 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1891#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ 1938#define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */
1892#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ 1939#define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */
1893#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ 1940#define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */
1894 /* Bits 13-15 are reserved for 1941 /* Bits 13-14 are reserved for
1895 future use. */ 1942 future use. */
1943#define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */
1896#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ 1944#define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */
1897#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ 1945#define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */
1898#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ 1946#define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */
@@ -1987,7 +2035,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1987 2035
1988/** Does this PTE map a page? 2036/** Does this PTE map a page?
1989 * 2037 *
1990 * If this bit is set in the level-1 page table, the entry should be 2038 * If this bit is set in a level-0 page table, the entry should be
2039 * interpreted as a level-2 page table entry mapping a jumbo page.
2040 *
2041 * If this bit is set in a level-1 page table, the entry should be
1991 * interpreted as a level-2 page table entry mapping a large page. 2042 * interpreted as a level-2 page table entry mapping a large page.
1992 * 2043 *
1993 * This bit should not be modified by the client while PRESENT is set, as 2044 * This bit should not be modified by the client while PRESENT is set, as
@@ -1997,6 +2048,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control,
1997 */ 2048 */
1998#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) 2049#define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE)
1999 2050
2051/** Does this PTE implicitly reference multiple pages?
2052 *
2053 * If this bit is set in the page table (either in the level-2 page table,
2054 * or in a higher level page table in conjunction with the PAGE bit)
2055 * then the PTE specifies a range of contiguous pages, not a single page.
2056 * The hv_set_pte_super_shift() allows you to specify the count for
2057 * each level of the page table.
2058 *
2059 * Note: this bit is not supported on TILEPro systems.
2060 */
2061#define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER)
2062
2000/** Is this a global (non-ASID) mapping? 2063/** Is this a global (non-ASID) mapping?
2001 * 2064 *
2002 * If this bit is set, the translations established by this PTE will 2065 * If this bit is set, the translations established by this PTE will
@@ -2215,6 +2278,7 @@ hv_pte_clear_##name(HV_PTE pte) \
2215 */ 2278 */
2216_HV_BIT(present, PRESENT) 2279_HV_BIT(present, PRESENT)
2217_HV_BIT(page, PAGE) 2280_HV_BIT(page, PAGE)
2281_HV_BIT(super, SUPER)
2218_HV_BIT(client0, CLIENT0) 2282_HV_BIT(client0, CLIENT0)
2219_HV_BIT(client1, CLIENT1) 2283_HV_BIT(client1, CLIENT1)
2220_HV_BIT(client2, CLIENT2) 2284_HV_BIT(client2, CLIENT2)
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds
index 2b7cd0a659a9..d44c5a67a1ed 100644
--- a/arch/tile/kernel/hvglue.lds
+++ b/arch/tile/kernel/hvglue.lds
@@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0;
55hv_inquire_realpa = TEXT_OFFSET + 0x106c0; 55hv_inquire_realpa = TEXT_OFFSET + 0x106c0;
56hv_flush_all = TEXT_OFFSET + 0x106e0; 56hv_flush_all = TEXT_OFFSET + 0x106e0;
57hv_get_ipi_pte = TEXT_OFFSET + 0x10700; 57hv_get_ipi_pte = TEXT_OFFSET + 0x10700;
58hv_glue_internals = TEXT_OFFSET + 0x10720; 58hv_set_pte_super_shift = TEXT_OFFSET + 0x10720;
59hv_glue_internals = TEXT_OFFSET + 0x10740;
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c
index 446a7f52cc11..dafc447b5125 100644
--- a/arch/tile/kernel/proc.c
+++ b/arch/tile/kernel/proc.c
@@ -22,6 +22,7 @@
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <linux/sysctl.h> 23#include <linux/sysctl.h>
24#include <linux/hardirq.h> 24#include <linux/hardirq.h>
25#include <linux/hugetlb.h>
25#include <linux/mman.h> 26#include <linux/mman.h>
26#include <asm/unaligned.h> 27#include <asm/unaligned.h>
27#include <asm/pgtable.h> 28#include <asm/pgtable.h>
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c
index 32948e21113a..445c220eae51 100644
--- a/arch/tile/kernel/setup.c
+++ b/arch/tile/kernel/setup.c
@@ -28,6 +28,7 @@
28#include <linux/highmem.h> 28#include <linux/highmem.h>
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/timex.h> 30#include <linux/timex.h>
31#include <linux/hugetlb.h>
31#include <asm/setup.h> 32#include <asm/setup.h>
32#include <asm/sections.h> 33#include <asm/sections.h>
33#include <asm/cacheflush.h> 34#include <asm/cacheflush.h>
@@ -49,9 +50,6 @@ char chip_model[64] __write_once;
49struct pglist_data node_data[MAX_NUMNODES] __read_mostly; 50struct pglist_data node_data[MAX_NUMNODES] __read_mostly;
50EXPORT_SYMBOL(node_data); 51EXPORT_SYMBOL(node_data);
51 52
52/* We only create bootmem data on node 0. */
53static bootmem_data_t __initdata node0_bdata;
54
55/* Information on the NUMA nodes that we compute early */ 53/* Information on the NUMA nodes that we compute early */
56unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; 54unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES];
57unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; 55unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES];
@@ -518,37 +516,96 @@ static void __init setup_memory(void)
518#endif 516#endif
519} 517}
520 518
521static void __init setup_bootmem_allocator(void) 519/*
520 * On 32-bit machines, we only put bootmem on the low controller,
521 * since PAs > 4GB can't be used in bootmem. In principle one could
522 * imagine, e.g., multiple 1 GB controllers all of which could support
523 * bootmem, but in practice using controllers this small isn't a
524 * particularly interesting scenario, so we just keep it simple and
525 * use only the first controller for bootmem on 32-bit machines.
526 */
527static inline int node_has_bootmem(int nid)
522{ 528{
523 unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; 529#ifdef CONFIG_64BIT
530 return 1;
531#else
532 return nid == 0;
533#endif
534}
524 535
525 /* Provide a node 0 bdata. */ 536static inline unsigned long alloc_bootmem_pfn(int nid,
526 NODE_DATA(0)->bdata = &node0_bdata; 537 unsigned long size,
538 unsigned long goal)
539{
540 void *kva = __alloc_bootmem_node(NODE_DATA(nid), size,
541 PAGE_SIZE, goal);
542 unsigned long pfn = kaddr_to_pfn(kva);
543 BUG_ON(goal && PFN_PHYS(pfn) != goal);
544 return pfn;
545}
527 546
528#ifdef CONFIG_PCI 547static void __init setup_bootmem_allocator_node(int i)
529 /* Don't let boot memory alias the PCI region. */ 548{
530 last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); 549 unsigned long start, end, mapsize, mapstart;
550
551 if (node_has_bootmem(i)) {
552 NODE_DATA(i)->bdata = &bootmem_node_data[i];
553 } else {
554 /* Share controller zero's bdata for now. */
555 NODE_DATA(i)->bdata = &bootmem_node_data[0];
556 return;
557 }
558
559 /* Skip up to after the bss in node 0. */
560 start = (i == 0) ? min_low_pfn : node_start_pfn[i];
561
562 /* Only lowmem, if we're a HIGHMEM build. */
563#ifdef CONFIG_HIGHMEM
564 end = node_lowmem_end_pfn[i];
531#else 565#else
532 last_alloc_pfn = max_low_pfn; 566 end = node_end_pfn[i];
533#endif 567#endif
534 568
535 /* 569 /* No memory here. */
536 * Initialize the boot-time allocator (with low memory only): 570 if (end == start)
537 * The first argument says where to put the bitmap, and the 571 return;
538 * second says where the end of allocatable memory is. 572
539 */ 573 /* Figure out where the bootmem bitmap is located. */
540 bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); 574 mapsize = bootmem_bootmap_pages(end - start);
575 if (i == 0) {
576 /* Use some space right before the heap on node 0. */
577 mapstart = start;
578 start += mapsize;
579 } else {
580 /* Allocate bitmap on node 0 to avoid page table issues. */
581 mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0);
582 }
541 583
584 /* Initialize a node. */
585 init_bootmem_node(NODE_DATA(i), mapstart, start, end);
586
587 /* Free all the space back into the allocator. */
588 free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start));
589
590#if defined(CONFIG_PCI)
542 /* 591 /*
543 * Let the bootmem allocator use all the space we've given it 592 * Throw away any memory aliased by the PCI region. FIXME: this
544 * except for its own bitmap. 593 * is a temporary hack to work around bug 10502, and needs to be
594 * fixed properly.
545 */ 595 */
546 first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); 596 if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start)
547 if (first_alloc_pfn >= last_alloc_pfn) 597 reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn),
548 early_panic("Not enough memory on controller 0 for bootmem\n"); 598 PFN_PHYS(pci_reserve_end_pfn -
599 pci_reserve_start_pfn),
600 BOOTMEM_EXCLUSIVE);
601#endif
602}
549 603
550 free_bootmem(PFN_PHYS(first_alloc_pfn), 604static void __init setup_bootmem_allocator(void)
551 PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); 605{
606 int i;
607 for (i = 0; i < MAX_NUMNODES; ++i)
608 setup_bootmem_allocator_node(i);
552 609
553#ifdef CONFIG_KEXEC 610#ifdef CONFIG_KEXEC
554 if (crashk_res.start != crashk_res.end) 611 if (crashk_res.start != crashk_res.end)
@@ -579,14 +636,6 @@ static int __init percpu_size(void)
579 return size; 636 return size;
580} 637}
581 638
582static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal)
583{
584 void *kva = __alloc_bootmem(size, PAGE_SIZE, goal);
585 unsigned long pfn = kaddr_to_pfn(kva);
586 BUG_ON(goal && PFN_PHYS(pfn) != goal);
587 return pfn;
588}
589
590static void __init zone_sizes_init(void) 639static void __init zone_sizes_init(void)
591{ 640{
592 unsigned long zones_size[MAX_NR_ZONES] = { 0 }; 641 unsigned long zones_size[MAX_NR_ZONES] = { 0 };
@@ -624,21 +673,22 @@ static void __init zone_sizes_init(void)
624 * though, there'll be no lowmem, so we just alloc_bootmem 673 * though, there'll be no lowmem, so we just alloc_bootmem
625 * the memmap. There will be no percpu memory either. 674 * the memmap. There will be no percpu memory either.
626 */ 675 */
627 if (__pfn_to_highbits(start) == 0) { 676 if (i != 0 && cpu_isset(i, isolnodes)) {
628 /* In low PAs, allocate via bootmem. */ 677 node_memmap_pfn[i] =
678 alloc_bootmem_pfn(0, memmap_size, 0);
679 BUG_ON(node_percpu[i] != 0);
680 } else if (node_has_bootmem(start)) {
629 unsigned long goal = 0; 681 unsigned long goal = 0;
630 node_memmap_pfn[i] = 682 node_memmap_pfn[i] =
631 alloc_bootmem_pfn(memmap_size, goal); 683 alloc_bootmem_pfn(i, memmap_size, 0);
632 if (kdata_huge) 684 if (kdata_huge)
633 goal = PFN_PHYS(lowmem_end) - node_percpu[i]; 685 goal = PFN_PHYS(lowmem_end) - node_percpu[i];
634 if (node_percpu[i]) 686 if (node_percpu[i])
635 node_percpu_pfn[i] = 687 node_percpu_pfn[i] =
636 alloc_bootmem_pfn(node_percpu[i], goal); 688 alloc_bootmem_pfn(i, node_percpu[i],
637 } else if (cpu_isset(i, isolnodes)) { 689 goal);
638 node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0);
639 BUG_ON(node_percpu[i] != 0);
640 } else { 690 } else {
641 /* In high PAs, just reserve some pages. */ 691 /* In non-bootmem zones, just reserve some pages. */
642 node_memmap_pfn[i] = node_free_pfn[i]; 692 node_memmap_pfn[i] = node_free_pfn[i];
643 node_free_pfn[i] += PFN_UP(memmap_size); 693 node_free_pfn[i] += PFN_UP(memmap_size);
644 if (!kdata_huge) { 694 if (!kdata_huge) {
@@ -662,16 +712,9 @@ static void __init zone_sizes_init(void)
662 zones_size[ZONE_NORMAL] = end - start; 712 zones_size[ZONE_NORMAL] = end - start;
663#endif 713#endif
664 714
665 /* 715 /* Take zone metadata from controller 0 if we're isolnode. */
666 * Everyone shares node 0's bootmem allocator, but 716 if (node_isset(i, isolnodes))
667 * we use alloc_remap(), above, to put the actual 717 NODE_DATA(i)->bdata = &bootmem_node_data[0];
668 * struct page array on the individual controllers,
669 * which is most of the data that we actually care about.
670 * We can't place bootmem allocators on the other
671 * controllers since the bootmem allocator can only
672 * operate on 32-bit physical addresses.
673 */
674 NODE_DATA(i)->bdata = NODE_DATA(0)->bdata;
675 718
676 free_area_init_node(i, zones_size, start, NULL); 719 free_area_init_node(i, zones_size, start, NULL);
677 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", 720 printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n",
@@ -854,6 +897,22 @@ subsys_initcall(topology_init);
854 897
855#endif /* CONFIG_NUMA */ 898#endif /* CONFIG_NUMA */
856 899
900/*
901 * Initialize hugepage support on this cpu. We do this on all cores
902 * early in boot: before argument parsing for the boot cpu, and after
903 * argument parsing but before the init functions run on the secondaries.
904 * So the values we set up here in the hypervisor may be overridden on
905 * the boot cpu as arguments are parsed.
906 */
907static __cpuinit void init_super_pages(void)
908{
909#ifdef CONFIG_HUGETLB_SUPER_PAGES
910 int i;
911 for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i)
912 hv_set_pte_super_shift(i, huge_shift[i]);
913#endif
914}
915
857/** 916/**
858 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. 917 * setup_cpu() - Do all necessary per-cpu, tile-specific initialization.
859 * @boot: Is this the boot cpu? 918 * @boot: Is this the boot cpu?
@@ -908,6 +967,8 @@ void __cpuinit setup_cpu(int boot)
908 /* Reset the network state on this cpu. */ 967 /* Reset the network state on this cpu. */
909 reset_network_state(); 968 reset_network_state();
910#endif 969#endif
970
971 init_super_pages();
911} 972}
912 973
913#ifdef CONFIG_BLK_DEV_INITRD 974#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c
index a5f241c24cac..3fd54d5bbd4c 100644
--- a/arch/tile/kernel/tlb.c
+++ b/arch/tile/kernel/tlb.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/hugetlb.h>
18#include <asm/tlbflush.h> 19#include <asm/tlbflush.h>
19#include <asm/homecache.h> 20#include <asm/homecache.h>
20#include <hv/hypervisor.h> 21#include <hv/hypervisor.h>
@@ -49,25 +50,25 @@ void flush_tlb_current_task(void)
49 flush_tlb_mm(current->mm); 50 flush_tlb_mm(current->mm);
50} 51}
51 52
52void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, 53void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm,
53 unsigned long va) 54 unsigned long va)
54{ 55{
55 unsigned long size = hv_page_size(vma); 56 unsigned long size = vma_kernel_pagesize(vma);
56 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; 57 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
57 flush_remote(0, cache, mm_cpumask(mm), 58 flush_remote(0, cache, mm_cpumask(mm),
58 va, size, size, mm_cpumask(mm), NULL, 0); 59 va, size, size, mm_cpumask(mm), NULL, 0);
59} 60}
60 61
61void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) 62void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
62{ 63{
63 flush_tlb_page_mm(vma, vma->vm_mm, va); 64 flush_tlb_page_mm(vma, vma->vm_mm, va);
64} 65}
65EXPORT_SYMBOL(flush_tlb_page); 66EXPORT_SYMBOL(flush_tlb_page);
66 67
67void flush_tlb_range(const struct vm_area_struct *vma, 68void flush_tlb_range(struct vm_area_struct *vma,
68 unsigned long start, unsigned long end) 69 unsigned long start, unsigned long end)
69{ 70{
70 unsigned long size = hv_page_size(vma); 71 unsigned long size = vma_kernel_pagesize(vma);
71 struct mm_struct *mm = vma->vm_mm; 72 struct mm_struct *mm = vma->vm_mm;
72 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; 73 int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0;
73 flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, 74 flush_remote(0, cache, mm_cpumask(mm), start, end - start, size,
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 22e58f51ed23..54f18fc25ed0 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -187,7 +187,7 @@ static pgd_t *get_current_pgd(void)
187 HV_Context ctx = hv_inquire_context(); 187 HV_Context ctx = hv_inquire_context();
188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; 188 unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT;
189 struct page *pgd_page = pfn_to_page(pgd_pfn); 189 struct page *pgd_page = pfn_to_page(pgd_pfn);
190 BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ 190 BUG_ON(PageHighMem(pgd_page));
191 return (pgd_t *) __va(ctx.page_table); 191 return (pgd_t *) __va(ctx.page_table);
192} 192}
193 193
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 499f73770b05..dbcbdf7b8aa8 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -30,6 +30,7 @@
30#include <linux/cache.h> 30#include <linux/cache.h>
31#include <linux/smp.h> 31#include <linux/smp.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/hugetlb.h>
33 34
34#include <asm/page.h> 35#include <asm/page.h>
35#include <asm/sections.h> 36#include <asm/sections.h>
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 42cfcba4e1ef..812e2d037972 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -27,85 +27,161 @@
27#include <linux/mman.h> 27#include <linux/mman.h>
28#include <asm/tlb.h> 28#include <asm/tlb.h>
29#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
30#include <asm/setup.h>
31
32#ifdef CONFIG_HUGETLB_SUPER_PAGES
33
34/*
35 * Provide an additional huge page size (in addition to the regular default
36 * huge page size) if no "hugepagesz" arguments are specified.
37 * Note that it must be smaller than the default huge page size so
38 * that it's possible to allocate them on demand from the buddy allocator.
39 * You can change this to 64K (on a 16K build), 256K, 1M, or 4M,
40 * or not define it at all.
41 */
42#define ADDITIONAL_HUGE_SIZE (1024 * 1024UL)
43
44/* "Extra" page-size multipliers, one per level of the page table. */
45int huge_shift[HUGE_SHIFT_ENTRIES] = {
46#ifdef ADDITIONAL_HUGE_SIZE
47#define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE)
48 [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT
49#endif
50};
51
52/*
53 * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel().
54 * It assumes that L2 PTEs are never in HIGHMEM (we don't support that).
55 * It locks the user pagetable, and bumps up the mm->nr_ptes field,
56 * but otherwise allocate the page table using the kernel versions.
57 */
58static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd,
59 unsigned long address)
60{
61 pte_t *new;
62
63 if (pmd_none(*pmd)) {
64 new = pte_alloc_one_kernel(mm, address);
65 if (!new)
66 return NULL;
67
68 smp_wmb(); /* See comment in __pte_alloc */
69
70 spin_lock(&mm->page_table_lock);
71 if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
72 mm->nr_ptes++;
73 pmd_populate_kernel(mm, pmd, new);
74 new = NULL;
75 } else
76 VM_BUG_ON(pmd_trans_splitting(*pmd));
77 spin_unlock(&mm->page_table_lock);
78 if (new)
79 pte_free_kernel(mm, new);
80 }
81
82 return pte_offset_kernel(pmd, address);
83}
84#endif
30 85
31pte_t *huge_pte_alloc(struct mm_struct *mm, 86pte_t *huge_pte_alloc(struct mm_struct *mm,
32 unsigned long addr, unsigned long sz) 87 unsigned long addr, unsigned long sz)
33{ 88{
34 pgd_t *pgd; 89 pgd_t *pgd;
35 pud_t *pud; 90 pud_t *pud;
36 pte_t *pte = NULL;
37 91
38 /* We do not yet support multiple huge page sizes. */ 92 addr &= -sz; /* Mask off any low bits in the address. */
39 BUG_ON(sz != PMD_SIZE);
40 93
41 pgd = pgd_offset(mm, addr); 94 pgd = pgd_offset(mm, addr);
42 pud = pud_alloc(mm, pgd, addr); 95 pud = pud_alloc(mm, pgd, addr);
43 if (pud)
44 pte = (pte_t *) pmd_alloc(mm, pud, addr);
45 BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));
46 96
47 return pte; 97#ifdef CONFIG_HUGETLB_SUPER_PAGES
98 if (sz >= PGDIR_SIZE) {
99 BUG_ON(sz != PGDIR_SIZE &&
100 sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]);
101 return (pte_t *)pud;
102 } else {
103 pmd_t *pmd = pmd_alloc(mm, pud, addr);
104 if (sz >= PMD_SIZE) {
105 BUG_ON(sz != PMD_SIZE &&
106 sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD]));
107 return (pte_t *)pmd;
108 }
109 else {
110 if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE])
111 panic("Unexpected page size %#lx\n", sz);
112 return pte_alloc_hugetlb(mm, pmd, addr);
113 }
114 }
115#else
116 BUG_ON(sz != PMD_SIZE);
117 return (pte_t *) pmd_alloc(mm, pud, addr);
118#endif
48} 119}
49 120
50pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 121static pte_t *get_pte(pte_t *base, int index, int level)
51{ 122{
52 pgd_t *pgd; 123 pte_t *ptep = base + index;
53 pud_t *pud; 124#ifdef CONFIG_HUGETLB_SUPER_PAGES
54 pmd_t *pmd = NULL; 125 if (!pte_present(*ptep) && huge_shift[level] != 0) {
55 126 unsigned long mask = -1UL << huge_shift[level];
56 pgd = pgd_offset(mm, addr); 127 pte_t *super_ptep = base + (index & mask);
57 if (pgd_present(*pgd)) { 128 pte_t pte = *super_ptep;
58 pud = pud_offset(pgd, addr); 129 if (pte_present(pte) && pte_super(pte))
59 if (pud_present(*pud)) 130 ptep = super_ptep;
60 pmd = pmd_offset(pud, addr);
61 } 131 }
62 return (pte_t *) pmd; 132#endif
133 return ptep;
63} 134}
64 135
65#ifdef HUGETLB_TEST 136pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
66struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
67 int write)
68{ 137{
69 unsigned long start = address; 138 pgd_t *pgd;
70 int length = 1; 139 pud_t *pud;
71 int nr; 140 pmd_t *pmd;
72 struct page *page; 141#ifdef CONFIG_HUGETLB_SUPER_PAGES
73 struct vm_area_struct *vma; 142 pte_t *pte;
74 143#endif
75 vma = find_vma(mm, addr);
76 if (!vma || !is_vm_hugetlb_page(vma))
77 return ERR_PTR(-EINVAL);
78
79 pte = huge_pte_offset(mm, address);
80 144
81 /* hugetlb should be locked, and hence, prefaulted */ 145 /* Get the top-level page table entry. */
82 WARN_ON(!pte || pte_none(*pte)); 146 pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0);
147 if (!pgd_present(*pgd))
148 return NULL;
83 149
84 page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; 150 /* We don't have four levels. */
151 pud = pud_offset(pgd, addr);
152#ifndef __PAGETABLE_PUD_FOLDED
153# error support fourth page table level
154#endif
85 155
86 WARN_ON(!PageHead(page)); 156 /* Check for an L0 huge PTE, if we have three levels. */
157#ifndef __PAGETABLE_PMD_FOLDED
158 if (pud_huge(*pud))
159 return (pte_t *)pud;
87 160
88 return page; 161 pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud),
89} 162 pmd_index(addr), 1);
90 163 if (!pmd_present(*pmd))
91int pmd_huge(pmd_t pmd) 164 return NULL;
92{ 165#else
93 return 0; 166 pmd = pmd_offset(pud, addr);
94} 167#endif
95 168
96int pud_huge(pud_t pud) 169 /* Check for an L1 huge PTE. */
97{ 170 if (pmd_huge(*pmd))
98 return 0; 171 return (pte_t *)pmd;
99} 172
173#ifdef CONFIG_HUGETLB_SUPER_PAGES
174 /* Check for an L2 huge PTE. */
175 pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2);
176 if (!pte_present(*pte))
177 return NULL;
178 if (pte_super(*pte))
179 return pte;
180#endif
100 181
101struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
102 pmd_t *pmd, int write)
103{
104 return NULL; 182 return NULL;
105} 183}
106 184
107#else
108
109struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 185struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
110 int write) 186 int write)
111{ 187{
@@ -149,8 +225,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
149 return 0; 225 return 0;
150} 226}
151 227
152#endif
153
154#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 228#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
155static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, 229static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
156 unsigned long addr, unsigned long len, 230 unsigned long addr, unsigned long len,
@@ -322,21 +396,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
322 return hugetlb_get_unmapped_area_topdown(file, addr, len, 396 return hugetlb_get_unmapped_area_topdown(file, addr, len,
323 pgoff, flags); 397 pgoff, flags);
324} 398}
399#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
325 400
326static __init int setup_hugepagesz(char *opt) 401#ifdef CONFIG_HUGETLB_SUPER_PAGES
402static __init int __setup_hugepagesz(unsigned long ps)
327{ 403{
328 unsigned long ps = memparse(opt, &opt); 404 int log_ps = __builtin_ctzl(ps);
329 if (ps == PMD_SIZE) { 405 int level, base_shift;
330 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); 406
331 } else if (ps == PUD_SIZE) { 407 if ((1UL << log_ps) != ps || (log_ps & 1) != 0) {
332 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); 408 pr_warn("Not enabling %ld byte huge pages;"
409 " must be a power of four.\n", ps);
410 return -EINVAL;
411 }
412
413 if (ps > 64*1024*1024*1024UL) {
414 pr_warn("Not enabling %ld MB huge pages;"
415 " largest legal value is 64 GB .\n", ps >> 20);
416 return -EINVAL;
417 } else if (ps >= PUD_SIZE) {
418 static long hv_jpage_size;
419 if (hv_jpage_size == 0)
420 hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO);
421 if (hv_jpage_size != PUD_SIZE) {
422 pr_warn("Not enabling >= %ld MB huge pages:"
423 " hypervisor reports size %ld\n",
424 PUD_SIZE >> 20, hv_jpage_size);
425 return -EINVAL;
426 }
427 level = 0;
428 base_shift = PUD_SHIFT;
429 } else if (ps >= PMD_SIZE) {
430 level = 1;
431 base_shift = PMD_SHIFT;
432 } else if (ps > PAGE_SIZE) {
433 level = 2;
434 base_shift = PAGE_SHIFT;
333 } else { 435 } else {
334 pr_err("hugepagesz: Unsupported page size %lu M\n", 436 pr_err("hugepagesz: huge page size %ld too small\n", ps);
335 ps >> 20); 437 return -EINVAL;
336 return 0;
337 } 438 }
338 return 1; 439
440 if (log_ps != base_shift) {
441 int shift_val = log_ps - base_shift;
442 if (huge_shift[level] != 0) {
443 int old_shift = base_shift + huge_shift[level];
444 pr_warn("Not enabling %ld MB huge pages;"
445 " already have size %ld MB.\n",
446 ps >> 20, (1UL << old_shift) >> 20);
447 return -EINVAL;
448 }
449 if (hv_set_pte_super_shift(level, shift_val) != 0) {
450 pr_warn("Not enabling %ld MB huge pages;"
451 " no hypervisor support.\n", ps >> 20);
452 return -EINVAL;
453 }
454 printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20);
455 huge_shift[level] = shift_val;
456 }
457
458 hugetlb_add_hstate(log_ps - PAGE_SHIFT);
459
460 return 0;
461}
462
463static bool saw_hugepagesz;
464
465static __init int setup_hugepagesz(char *opt)
466{
467 if (!saw_hugepagesz) {
468 saw_hugepagesz = true;
469 memset(huge_shift, 0, sizeof(huge_shift));
470 }
471 return __setup_hugepagesz(memparse(opt, NULL));
339} 472}
340__setup("hugepagesz=", setup_hugepagesz); 473__setup("hugepagesz=", setup_hugepagesz);
341 474
342#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ 475#ifdef ADDITIONAL_HUGE_SIZE
476/*
477 * Provide an additional huge page size if no "hugepagesz" args are given.
478 * In that case, all the cores have properly set up their hv super_shift
479 * already, but we need to notify the hugetlb code to enable the
480 * new huge page size from the Linux point of view.
481 */
482static __init int add_default_hugepagesz(void)
483{
484 if (!saw_hugepagesz) {
485 BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE ||
486 ADDITIONAL_HUGE_SIZE <= PAGE_SIZE);
487 BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) !=
488 ADDITIONAL_HUGE_SIZE);
489 BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1);
490 hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT);
491 }
492 return 0;
493}
494arch_initcall(add_default_hugepagesz);
495#endif
496
497#endif /* CONFIG_HUGETLB_SUPER_PAGES */
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c
index c04fbfd93fc5..630dd2ce2afe 100644
--- a/arch/tile/mm/init.c
+++ b/arch/tile/mm/init.c
@@ -698,6 +698,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
698#endif /* CONFIG_HIGHMEM */ 698#endif /* CONFIG_HIGHMEM */
699 699
700 700
701#ifndef CONFIG_64BIT
701static void __init init_free_pfn_range(unsigned long start, unsigned long end) 702static void __init init_free_pfn_range(unsigned long start, unsigned long end)
702{ 703{
703 unsigned long pfn; 704 unsigned long pfn;
@@ -770,6 +771,7 @@ static void __init set_non_bootmem_pages_init(void)
770 init_free_pfn_range(start, end); 771 init_free_pfn_range(start, end);
771 } 772 }
772} 773}
774#endif
773 775
774/* 776/*
775 * paging_init() sets up the page tables - note that all of lowmem is 777 * paging_init() sets up the page tables - note that all of lowmem is
@@ -858,8 +860,10 @@ void __init mem_init(void)
858 /* this will put all bootmem onto the freelists */ 860 /* this will put all bootmem onto the freelists */
859 totalram_pages += free_all_bootmem(); 861 totalram_pages += free_all_bootmem();
860 862
863#ifndef CONFIG_64BIT
861 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ 864 /* count all remaining LOWMEM and give all HIGHMEM to page allocator */
862 set_non_bootmem_pages_init(); 865 set_non_bootmem_pages_init();
866#endif
863 867
864 codesize = (unsigned long)&_etext - (unsigned long)&_text; 868 codesize = (unsigned long)&_etext - (unsigned long)&_text;
865 datasize = (unsigned long)&_end - (unsigned long)&_sdata; 869 datasize = (unsigned long)&_end - (unsigned long)&_sdata;
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 3d7074347e6d..345edfed9fcd 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -132,15 +132,6 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags); 132 set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
133} 133}
134 134
135#if defined(CONFIG_HIGHPTE)
136pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
137{
138 pte_t *pte = kmap_atomic(pmd_page(*dir)) +
139 (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK;
140 return &pte[pte_index(address)];
141}
142#endif
143
144/** 135/**
145 * shatter_huge_page() - ensure a given address is mapped by a small page. 136 * shatter_huge_page() - ensure a given address is mapped by a small page.
146 * 137 *
@@ -296,10 +287,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
296 struct page *p; 287 struct page *p;
297 int i; 288 int i;
298 289
299#ifdef CONFIG_HIGHPTE
300 flags |= __GFP_HIGHMEM;
301#endif
302
303 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); 290 p = alloc_pages(flags, L2_USER_PGTABLE_ORDER);
304 if (p == NULL) 291 if (p == NULL)
305 return NULL; 292 return NULL;