diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/tile/Kconfig | 8 | ||||
-rw-r--r-- | arch/tile/include/asm/hugetlb.h | 21 | ||||
-rw-r--r-- | arch/tile/include/asm/page.h | 5 | ||||
-rw-r--r-- | arch/tile/include/asm/pgtable.h | 12 | ||||
-rw-r--r-- | arch/tile/include/asm/tlbflush.h | 17 | ||||
-rw-r--r-- | arch/tile/include/hv/hypervisor.h | 70 | ||||
-rw-r--r-- | arch/tile/kernel/hvglue.lds | 3 | ||||
-rw-r--r-- | arch/tile/kernel/proc.c | 1 | ||||
-rw-r--r-- | arch/tile/kernel/setup.c | 161 | ||||
-rw-r--r-- | arch/tile/kernel/tlb.c | 11 | ||||
-rw-r--r-- | arch/tile/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/tile/mm/homecache.c | 1 | ||||
-rw-r--r-- | arch/tile/mm/hugetlbpage.c | 285 | ||||
-rw-r--r-- | arch/tile/mm/init.c | 4 | ||||
-rw-r--r-- | arch/tile/mm/pgtable.c | 13 |
15 files changed, 456 insertions, 158 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 38c3957e0b40..cc5664286a1c 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig | |||
@@ -47,6 +47,14 @@ config NEED_PER_CPU_PAGE_FIRST_CHUNK | |||
47 | config SYS_SUPPORTS_HUGETLBFS | 47 | config SYS_SUPPORTS_HUGETLBFS |
48 | def_bool y | 48 | def_bool y |
49 | 49 | ||
50 | # Support for additional huge page sizes besides HPAGE_SIZE. | ||
51 | # The software support is currently only present in the TILE-Gx | ||
52 | # hypervisor. TILEPro in any case does not support page sizes | ||
53 | # larger than the default HPAGE_SIZE. | ||
54 | config HUGETLB_SUPER_PAGES | ||
55 | depends on HUGETLB_PAGE && TILEGX | ||
56 | def_bool y | ||
57 | |||
50 | config GENERIC_CLOCKEVENTS | 58 | config GENERIC_CLOCKEVENTS |
51 | def_bool y | 59 | def_bool y |
52 | 60 | ||
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h index d396d1805163..b2042380a5aa 100644 --- a/arch/tile/include/asm/hugetlb.h +++ b/arch/tile/include/asm/hugetlb.h | |||
@@ -106,4 +106,25 @@ static inline void arch_release_hugepage(struct page *page) | |||
106 | { | 106 | { |
107 | } | 107 | } |
108 | 108 | ||
109 | #ifdef CONFIG_HUGETLB_SUPER_PAGES | ||
110 | static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, | ||
111 | struct page *page, int writable) | ||
112 | { | ||
113 | size_t pagesize = huge_page_size(hstate_vma(vma)); | ||
114 | if (pagesize != PUD_SIZE && pagesize != PMD_SIZE) | ||
115 | entry = pte_mksuper(entry); | ||
116 | return entry; | ||
117 | } | ||
118 | #define arch_make_huge_pte arch_make_huge_pte | ||
119 | |||
120 | /* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */ | ||
121 | enum { | ||
122 | HUGE_SHIFT_PGDIR = 0, | ||
123 | HUGE_SHIFT_PMD = 1, | ||
124 | HUGE_SHIFT_PAGE = 2, | ||
125 | HUGE_SHIFT_ENTRIES | ||
126 | }; | ||
127 | extern int huge_shift[HUGE_SHIFT_ENTRIES]; | ||
128 | #endif | ||
129 | |||
109 | #endif /* _ASM_TILE_HUGETLB_H */ | 130 | #endif /* _ASM_TILE_HUGETLB_H */ |
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index c750943f961e..9d9131e5c552 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h | |||
@@ -87,8 +87,7 @@ typedef HV_PTE pgprot_t; | |||
87 | /* | 87 | /* |
88 | * User L2 page tables are managed as one L2 page table per page, | 88 | * User L2 page tables are managed as one L2 page table per page, |
89 | * because we use the page allocator for them. This keeps the allocation | 89 | * because we use the page allocator for them. This keeps the allocation |
90 | * simple and makes it potentially useful to implement HIGHPTE at some point. | 90 | * simple, but it's also inefficient, since L2 page tables are much smaller |
91 | * However, it's also inefficient, since L2 page tables are much smaller | ||
92 | * than pages (currently 2KB vs 64KB). So we should revisit this. | 91 | * than pages (currently 2KB vs 64KB). So we should revisit this. |
93 | */ | 92 | */ |
94 | typedef struct page *pgtable_t; | 93 | typedef struct page *pgtable_t; |
@@ -137,7 +136,7 @@ static inline __attribute_const__ int get_order(unsigned long size) | |||
137 | 136 | ||
138 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) | 137 | #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) |
139 | 138 | ||
140 | #define HUGE_MAX_HSTATE 2 | 139 | #define HUGE_MAX_HSTATE 6 |
141 | 140 | ||
142 | #ifdef CONFIG_HUGETLB_PAGE | 141 | #ifdef CONFIG_HUGETLB_PAGE |
143 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 142 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 319f4826d972..73b1a4c9ad03 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h | |||
@@ -71,6 +71,7 @@ extern void set_page_homes(void); | |||
71 | 71 | ||
72 | #define _PAGE_PRESENT HV_PTE_PRESENT | 72 | #define _PAGE_PRESENT HV_PTE_PRESENT |
73 | #define _PAGE_HUGE_PAGE HV_PTE_PAGE | 73 | #define _PAGE_HUGE_PAGE HV_PTE_PAGE |
74 | #define _PAGE_SUPER_PAGE HV_PTE_SUPER | ||
74 | #define _PAGE_READABLE HV_PTE_READABLE | 75 | #define _PAGE_READABLE HV_PTE_READABLE |
75 | #define _PAGE_WRITABLE HV_PTE_WRITABLE | 76 | #define _PAGE_WRITABLE HV_PTE_WRITABLE |
76 | #define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE | 77 | #define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE |
@@ -87,6 +88,7 @@ extern void set_page_homes(void); | |||
87 | #define _PAGE_ALL (\ | 88 | #define _PAGE_ALL (\ |
88 | _PAGE_PRESENT | \ | 89 | _PAGE_PRESENT | \ |
89 | _PAGE_HUGE_PAGE | \ | 90 | _PAGE_HUGE_PAGE | \ |
91 | _PAGE_SUPER_PAGE | \ | ||
90 | _PAGE_READABLE | \ | 92 | _PAGE_READABLE | \ |
91 | _PAGE_WRITABLE | \ | 93 | _PAGE_WRITABLE | \ |
92 | _PAGE_EXECUTABLE | \ | 94 | _PAGE_EXECUTABLE | \ |
@@ -197,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep) | |||
197 | #define pte_write hv_pte_get_writable | 199 | #define pte_write hv_pte_get_writable |
198 | #define pte_exec hv_pte_get_executable | 200 | #define pte_exec hv_pte_get_executable |
199 | #define pte_huge hv_pte_get_page | 201 | #define pte_huge hv_pte_get_page |
202 | #define pte_super hv_pte_get_super | ||
200 | #define pte_rdprotect hv_pte_clear_readable | 203 | #define pte_rdprotect hv_pte_clear_readable |
201 | #define pte_exprotect hv_pte_clear_executable | 204 | #define pte_exprotect hv_pte_clear_executable |
202 | #define pte_mkclean hv_pte_clear_dirty | 205 | #define pte_mkclean hv_pte_clear_dirty |
@@ -209,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep) | |||
209 | #define pte_mkyoung hv_pte_set_accessed | 212 | #define pte_mkyoung hv_pte_set_accessed |
210 | #define pte_mkwrite hv_pte_set_writable | 213 | #define pte_mkwrite hv_pte_set_writable |
211 | #define pte_mkhuge hv_pte_set_page | 214 | #define pte_mkhuge hv_pte_set_page |
215 | #define pte_mksuper hv_pte_set_super | ||
212 | 216 | ||
213 | #define pte_special(pte) 0 | 217 | #define pte_special(pte) 0 |
214 | #define pte_mkspecial(pte) (pte) | 218 | #define pte_mkspecial(pte) (pte) |
@@ -338,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |||
338 | */ | 342 | */ |
339 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) | 343 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) |
340 | 344 | ||
341 | #if defined(CONFIG_HIGHPTE) | ||
342 | extern pte_t *pte_offset_map(pmd_t *, unsigned long address); | ||
343 | #define pte_unmap(pte) kunmap_atomic(pte) | ||
344 | #else | ||
345 | #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) | 345 | #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) |
346 | #define pte_unmap(pte) do { } while (0) | 346 | #define pte_unmap(pte) do { } while (0) |
347 | #endif | ||
348 | 347 | ||
349 | /* Clear a non-executable kernel PTE and flush it from the TLB. */ | 348 | /* Clear a non-executable kernel PTE and flush it from the TLB. */ |
350 | #define kpte_clear_flush(ptep, vaddr) \ | 349 | #define kpte_clear_flush(ptep, vaddr) \ |
@@ -537,7 +536,8 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) | |||
537 | /* Support /proc/NN/pgtable API. */ | 536 | /* Support /proc/NN/pgtable API. */ |
538 | struct seq_file; | 537 | struct seq_file; |
539 | int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, | 538 | int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm, |
540 | unsigned long vaddr, pte_t *ptep, void **datap); | 539 | unsigned long vaddr, unsigned long pagesize, |
540 | pte_t *ptep, void **datap); | ||
541 | 541 | ||
542 | #endif /* !__ASSEMBLY__ */ | 542 | #endif /* !__ASSEMBLY__ */ |
543 | 543 | ||
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h index 96199d214fb8..dcf91b25a1e5 100644 --- a/arch/tile/include/asm/tlbflush.h +++ b/arch/tile/include/asm/tlbflush.h | |||
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid); | |||
38 | /* The hypervisor tells us what ASIDs are available to us. */ | 38 | /* The hypervisor tells us what ASIDs are available to us. */ |
39 | extern int min_asid, max_asid; | 39 | extern int min_asid, max_asid; |
40 | 40 | ||
41 | static inline unsigned long hv_page_size(const struct vm_area_struct *vma) | ||
42 | { | ||
43 | return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE; | ||
44 | } | ||
45 | |||
46 | /* Pass as vma pointer for non-executable mapping, if no vma available. */ | 41 | /* Pass as vma pointer for non-executable mapping, if no vma available. */ |
47 | #define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL) | 42 | #define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL) |
48 | 43 | ||
49 | /* Flush a single user page on this cpu. */ | 44 | /* Flush a single user page on this cpu. */ |
50 | static inline void local_flush_tlb_page(const struct vm_area_struct *vma, | 45 | static inline void local_flush_tlb_page(struct vm_area_struct *vma, |
51 | unsigned long addr, | 46 | unsigned long addr, |
52 | unsigned long page_size) | 47 | unsigned long page_size) |
53 | { | 48 | { |
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma, | |||
60 | } | 55 | } |
61 | 56 | ||
62 | /* Flush range of user pages on this cpu. */ | 57 | /* Flush range of user pages on this cpu. */ |
63 | static inline void local_flush_tlb_pages(const struct vm_area_struct *vma, | 58 | static inline void local_flush_tlb_pages(struct vm_area_struct *vma, |
64 | unsigned long addr, | 59 | unsigned long addr, |
65 | unsigned long page_size, | 60 | unsigned long page_size, |
66 | unsigned long len) | 61 | unsigned long len) |
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void); | |||
117 | extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); | 112 | extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); |
118 | extern void flush_tlb_current_task(void); | 113 | extern void flush_tlb_current_task(void); |
119 | extern void flush_tlb_mm(struct mm_struct *); | 114 | extern void flush_tlb_mm(struct mm_struct *); |
120 | extern void flush_tlb_page(const struct vm_area_struct *, unsigned long); | 115 | extern void flush_tlb_page(struct vm_area_struct *, unsigned long); |
121 | extern void flush_tlb_page_mm(const struct vm_area_struct *, | 116 | extern void flush_tlb_page_mm(struct vm_area_struct *, |
122 | struct mm_struct *, unsigned long); | 117 | struct mm_struct *, unsigned long); |
123 | extern void flush_tlb_range(const struct vm_area_struct *, | 118 | extern void flush_tlb_range(struct vm_area_struct *, |
124 | unsigned long start, unsigned long end); | 119 | unsigned long start, unsigned long end); |
125 | 120 | ||
126 | #define flush_tlb() flush_tlb_current_task() | 121 | #define flush_tlb() flush_tlb_current_task() |
diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index f27871775b7a..85e5cab4c2f0 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h | |||
@@ -66,6 +66,22 @@ | |||
66 | #define HV_DEFAULT_PAGE_SIZE_LARGE \ | 66 | #define HV_DEFAULT_PAGE_SIZE_LARGE \ |
67 | (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) | 67 | (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_LARGE) |
68 | 68 | ||
69 | #if CHIP_VA_WIDTH() > 32 | ||
70 | |||
71 | /** The log2 of the initial size of jumbo pages, in bytes. | ||
72 | * See HV_DEFAULT_PAGE_SIZE_JUMBO. | ||
73 | */ | ||
74 | #define HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO 32 | ||
75 | |||
76 | /** The initial size of jumbo pages, in bytes. This value should | ||
77 | * be verified at runtime by calling hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO). | ||
78 | * It may also be modified when installing a new context. | ||
79 | */ | ||
80 | #define HV_DEFAULT_PAGE_SIZE_JUMBO \ | ||
81 | (__HV_SIZE_ONE << HV_LOG2_DEFAULT_PAGE_SIZE_JUMBO) | ||
82 | |||
83 | #endif | ||
84 | |||
69 | /** The log2 of the granularity at which page tables must be aligned; | 85 | /** The log2 of the granularity at which page tables must be aligned; |
70 | * in other words, the CPA for a page table must have this many zero | 86 | * in other words, the CPA for a page table must have this many zero |
71 | * bits at the bottom of the address. | 87 | * bits at the bottom of the address. |
@@ -284,8 +300,11 @@ | |||
284 | #define HV_DISPATCH_GET_IPI_PTE 56 | 300 | #define HV_DISPATCH_GET_IPI_PTE 56 |
285 | #endif | 301 | #endif |
286 | 302 | ||
303 | /** hv_set_pte_super_shift */ | ||
304 | #define HV_DISPATCH_SET_PTE_SUPER_SHIFT 57 | ||
305 | |||
287 | /** One more than the largest dispatch value */ | 306 | /** One more than the largest dispatch value */ |
288 | #define _HV_DISPATCH_END 57 | 307 | #define _HV_DISPATCH_END 58 |
289 | 308 | ||
290 | 309 | ||
291 | #ifndef __ASSEMBLER__ | 310 | #ifndef __ASSEMBLER__ |
@@ -413,6 +432,11 @@ typedef enum { | |||
413 | */ | 432 | */ |
414 | HV_SYSCONF_VALID_PAGE_SIZES = 7, | 433 | HV_SYSCONF_VALID_PAGE_SIZES = 7, |
415 | 434 | ||
435 | /** The size of jumbo pages, in bytes. | ||
436 | * If no jumbo pages are available, zero will be returned. | ||
437 | */ | ||
438 | HV_SYSCONF_PAGE_SIZE_JUMBO = 8, | ||
439 | |||
416 | } HV_SysconfQuery; | 440 | } HV_SysconfQuery; |
417 | 441 | ||
418 | /** Offset to subtract from returned Kelvin temperature to get degrees | 442 | /** Offset to subtract from returned Kelvin temperature to get degrees |
@@ -695,6 +719,29 @@ int hv_install_context(HV_PhysAddr page_table, HV_PTE access, HV_ASID asid, | |||
695 | 719 | ||
696 | #ifndef __ASSEMBLER__ | 720 | #ifndef __ASSEMBLER__ |
697 | 721 | ||
722 | |||
723 | /** Set the number of pages ganged together by HV_PTE_SUPER at a | ||
724 | * particular level of the page table. | ||
725 | * | ||
726 | * The current TILE-Gx hardware only supports powers of four | ||
727 | * (i.e. log2_count must be a multiple of two), and the requested | ||
728 | * "super" page size must be less than the span of the next level in | ||
729 | * the page table. The largest size that can be requested is 64GB. | ||
730 | * | ||
731 | * The shift value is initially "0" for all page table levels, | ||
732 | * indicating that the HV_PTE_SUPER bit is effectively ignored. | ||
733 | * | ||
734 | * If you change the count from one non-zero value to another, the | ||
735 | * hypervisor will flush the entire TLB and TSB to avoid confusion. | ||
736 | * | ||
737 | * @param level Page table level (0, 1, or 2) | ||
738 | * @param log2_count Base-2 log of the number of pages to gang together, | ||
739 | * i.e. how much to shift left the base page size for the super page size. | ||
740 | * @return Zero on success, or a hypervisor error code on failure. | ||
741 | */ | ||
742 | int hv_set_pte_super_shift(int level, int log2_count); | ||
743 | |||
744 | |||
698 | /** Value returned from hv_inquire_context(). */ | 745 | /** Value returned from hv_inquire_context(). */ |
699 | typedef struct | 746 | typedef struct |
700 | { | 747 | { |
@@ -1891,8 +1938,9 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, | |||
1891 | #define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ | 1938 | #define HV_PTE_INDEX_USER 10 /**< Page is user-accessible */ |
1892 | #define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ | 1939 | #define HV_PTE_INDEX_ACCESSED 11 /**< Page has been accessed */ |
1893 | #define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ | 1940 | #define HV_PTE_INDEX_DIRTY 12 /**< Page has been written */ |
1894 | /* Bits 13-15 are reserved for | 1941 | /* Bits 13-14 are reserved for |
1895 | future use. */ | 1942 | future use. */ |
1943 | #define HV_PTE_INDEX_SUPER 15 /**< Pages ganged together for TLB */ | ||
1896 | #define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ | 1944 | #define HV_PTE_INDEX_MODE 16 /**< Page mode; see HV_PTE_MODE_xxx */ |
1897 | #define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ | 1945 | #define HV_PTE_MODE_BITS 3 /**< Number of bits in mode */ |
1898 | #define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ | 1946 | #define HV_PTE_INDEX_CLIENT2 19 /**< Page client state 2 */ |
@@ -1987,7 +2035,10 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, | |||
1987 | 2035 | ||
1988 | /** Does this PTE map a page? | 2036 | /** Does this PTE map a page? |
1989 | * | 2037 | * |
1990 | * If this bit is set in the level-1 page table, the entry should be | 2038 | * If this bit is set in a level-0 page table, the entry should be |
2039 | * interpreted as a level-2 page table entry mapping a jumbo page. | ||
2040 | * | ||
2041 | * If this bit is set in a level-1 page table, the entry should be | ||
1991 | * interpreted as a level-2 page table entry mapping a large page. | 2042 | * interpreted as a level-2 page table entry mapping a large page. |
1992 | * | 2043 | * |
1993 | * This bit should not be modified by the client while PRESENT is set, as | 2044 | * This bit should not be modified by the client while PRESENT is set, as |
@@ -1997,6 +2048,18 @@ int hv_flush_remote(HV_PhysAddr cache_pa, unsigned long cache_control, | |||
1997 | */ | 2048 | */ |
1998 | #define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) | 2049 | #define HV_PTE_PAGE (__HV_PTE_ONE << HV_PTE_INDEX_PAGE) |
1999 | 2050 | ||
2051 | /** Does this PTE implicitly reference multiple pages? | ||
2052 | * | ||
2053 | * If this bit is set in the page table (either in the level-2 page table, | ||
2054 | * or in a higher level page table in conjunction with the PAGE bit) | ||
2055 | * then the PTE specifies a range of contiguous pages, not a single page. | ||
2056 | * The hv_set_pte_super_shift() allows you to specify the count for | ||
2057 | * each level of the page table. | ||
2058 | * | ||
2059 | * Note: this bit is not supported on TILEPro systems. | ||
2060 | */ | ||
2061 | #define HV_PTE_SUPER (__HV_PTE_ONE << HV_PTE_INDEX_SUPER) | ||
2062 | |||
2000 | /** Is this a global (non-ASID) mapping? | 2063 | /** Is this a global (non-ASID) mapping? |
2001 | * | 2064 | * |
2002 | * If this bit is set, the translations established by this PTE will | 2065 | * If this bit is set, the translations established by this PTE will |
@@ -2215,6 +2278,7 @@ hv_pte_clear_##name(HV_PTE pte) \ | |||
2215 | */ | 2278 | */ |
2216 | _HV_BIT(present, PRESENT) | 2279 | _HV_BIT(present, PRESENT) |
2217 | _HV_BIT(page, PAGE) | 2280 | _HV_BIT(page, PAGE) |
2281 | _HV_BIT(super, SUPER) | ||
2218 | _HV_BIT(client0, CLIENT0) | 2282 | _HV_BIT(client0, CLIENT0) |
2219 | _HV_BIT(client1, CLIENT1) | 2283 | _HV_BIT(client1, CLIENT1) |
2220 | _HV_BIT(client2, CLIENT2) | 2284 | _HV_BIT(client2, CLIENT2) |
diff --git a/arch/tile/kernel/hvglue.lds b/arch/tile/kernel/hvglue.lds index 2b7cd0a659a9..d44c5a67a1ed 100644 --- a/arch/tile/kernel/hvglue.lds +++ b/arch/tile/kernel/hvglue.lds | |||
@@ -55,4 +55,5 @@ hv_store_mapping = TEXT_OFFSET + 0x106a0; | |||
55 | hv_inquire_realpa = TEXT_OFFSET + 0x106c0; | 55 | hv_inquire_realpa = TEXT_OFFSET + 0x106c0; |
56 | hv_flush_all = TEXT_OFFSET + 0x106e0; | 56 | hv_flush_all = TEXT_OFFSET + 0x106e0; |
57 | hv_get_ipi_pte = TEXT_OFFSET + 0x10700; | 57 | hv_get_ipi_pte = TEXT_OFFSET + 0x10700; |
58 | hv_glue_internals = TEXT_OFFSET + 0x10720; | 58 | hv_set_pte_super_shift = TEXT_OFFSET + 0x10720; |
59 | hv_glue_internals = TEXT_OFFSET + 0x10740; | ||
diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 446a7f52cc11..dafc447b5125 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/proc_fs.h> | 22 | #include <linux/proc_fs.h> |
23 | #include <linux/sysctl.h> | 23 | #include <linux/sysctl.h> |
24 | #include <linux/hardirq.h> | 24 | #include <linux/hardirq.h> |
25 | #include <linux/hugetlb.h> | ||
25 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
26 | #include <asm/unaligned.h> | 27 | #include <asm/unaligned.h> |
27 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable.h> |
diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 32948e21113a..445c220eae51 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/smp.h> | 29 | #include <linux/smp.h> |
30 | #include <linux/timex.h> | 30 | #include <linux/timex.h> |
31 | #include <linux/hugetlb.h> | ||
31 | #include <asm/setup.h> | 32 | #include <asm/setup.h> |
32 | #include <asm/sections.h> | 33 | #include <asm/sections.h> |
33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
@@ -49,9 +50,6 @@ char chip_model[64] __write_once; | |||
49 | struct pglist_data node_data[MAX_NUMNODES] __read_mostly; | 50 | struct pglist_data node_data[MAX_NUMNODES] __read_mostly; |
50 | EXPORT_SYMBOL(node_data); | 51 | EXPORT_SYMBOL(node_data); |
51 | 52 | ||
52 | /* We only create bootmem data on node 0. */ | ||
53 | static bootmem_data_t __initdata node0_bdata; | ||
54 | |||
55 | /* Information on the NUMA nodes that we compute early */ | 53 | /* Information on the NUMA nodes that we compute early */ |
56 | unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; | 54 | unsigned long __cpuinitdata node_start_pfn[MAX_NUMNODES]; |
57 | unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; | 55 | unsigned long __cpuinitdata node_end_pfn[MAX_NUMNODES]; |
@@ -518,37 +516,96 @@ static void __init setup_memory(void) | |||
518 | #endif | 516 | #endif |
519 | } | 517 | } |
520 | 518 | ||
521 | static void __init setup_bootmem_allocator(void) | 519 | /* |
520 | * On 32-bit machines, we only put bootmem on the low controller, | ||
521 | * since PAs > 4GB can't be used in bootmem. In principle one could | ||
522 | * imagine, e.g., multiple 1 GB controllers all of which could support | ||
523 | * bootmem, but in practice using controllers this small isn't a | ||
524 | * particularly interesting scenario, so we just keep it simple and | ||
525 | * use only the first controller for bootmem on 32-bit machines. | ||
526 | */ | ||
527 | static inline int node_has_bootmem(int nid) | ||
522 | { | 528 | { |
523 | unsigned long bootmap_size, first_alloc_pfn, last_alloc_pfn; | 529 | #ifdef CONFIG_64BIT |
530 | return 1; | ||
531 | #else | ||
532 | return nid == 0; | ||
533 | #endif | ||
534 | } | ||
524 | 535 | ||
525 | /* Provide a node 0 bdata. */ | 536 | static inline unsigned long alloc_bootmem_pfn(int nid, |
526 | NODE_DATA(0)->bdata = &node0_bdata; | 537 | unsigned long size, |
538 | unsigned long goal) | ||
539 | { | ||
540 | void *kva = __alloc_bootmem_node(NODE_DATA(nid), size, | ||
541 | PAGE_SIZE, goal); | ||
542 | unsigned long pfn = kaddr_to_pfn(kva); | ||
543 | BUG_ON(goal && PFN_PHYS(pfn) != goal); | ||
544 | return pfn; | ||
545 | } | ||
527 | 546 | ||
528 | #ifdef CONFIG_PCI | 547 | static void __init setup_bootmem_allocator_node(int i) |
529 | /* Don't let boot memory alias the PCI region. */ | 548 | { |
530 | last_alloc_pfn = min(max_low_pfn, pci_reserve_start_pfn); | 549 | unsigned long start, end, mapsize, mapstart; |
550 | |||
551 | if (node_has_bootmem(i)) { | ||
552 | NODE_DATA(i)->bdata = &bootmem_node_data[i]; | ||
553 | } else { | ||
554 | /* Share controller zero's bdata for now. */ | ||
555 | NODE_DATA(i)->bdata = &bootmem_node_data[0]; | ||
556 | return; | ||
557 | } | ||
558 | |||
559 | /* Skip up to after the bss in node 0. */ | ||
560 | start = (i == 0) ? min_low_pfn : node_start_pfn[i]; | ||
561 | |||
562 | /* Only lowmem, if we're a HIGHMEM build. */ | ||
563 | #ifdef CONFIG_HIGHMEM | ||
564 | end = node_lowmem_end_pfn[i]; | ||
531 | #else | 565 | #else |
532 | last_alloc_pfn = max_low_pfn; | 566 | end = node_end_pfn[i]; |
533 | #endif | 567 | #endif |
534 | 568 | ||
535 | /* | 569 | /* No memory here. */ |
536 | * Initialize the boot-time allocator (with low memory only): | 570 | if (end == start) |
537 | * The first argument says where to put the bitmap, and the | 571 | return; |
538 | * second says where the end of allocatable memory is. | 572 | |
539 | */ | 573 | /* Figure out where the bootmem bitmap is located. */ |
540 | bootmap_size = init_bootmem(min_low_pfn, last_alloc_pfn); | 574 | mapsize = bootmem_bootmap_pages(end - start); |
575 | if (i == 0) { | ||
576 | /* Use some space right before the heap on node 0. */ | ||
577 | mapstart = start; | ||
578 | start += mapsize; | ||
579 | } else { | ||
580 | /* Allocate bitmap on node 0 to avoid page table issues. */ | ||
581 | mapstart = alloc_bootmem_pfn(0, PFN_PHYS(mapsize), 0); | ||
582 | } | ||
541 | 583 | ||
584 | /* Initialize a node. */ | ||
585 | init_bootmem_node(NODE_DATA(i), mapstart, start, end); | ||
586 | |||
587 | /* Free all the space back into the allocator. */ | ||
588 | free_bootmem(PFN_PHYS(start), PFN_PHYS(end - start)); | ||
589 | |||
590 | #if defined(CONFIG_PCI) | ||
542 | /* | 591 | /* |
543 | * Let the bootmem allocator use all the space we've given it | 592 | * Throw away any memory aliased by the PCI region. FIXME: this |
544 | * except for its own bitmap. | 593 | * is a temporary hack to work around bug 10502, and needs to be |
594 | * fixed properly. | ||
545 | */ | 595 | */ |
546 | first_alloc_pfn = min_low_pfn + PFN_UP(bootmap_size); | 596 | if (pci_reserve_start_pfn < end && pci_reserve_end_pfn > start) |
547 | if (first_alloc_pfn >= last_alloc_pfn) | 597 | reserve_bootmem(PFN_PHYS(pci_reserve_start_pfn), |
548 | early_panic("Not enough memory on controller 0 for bootmem\n"); | 598 | PFN_PHYS(pci_reserve_end_pfn - |
599 | pci_reserve_start_pfn), | ||
600 | BOOTMEM_EXCLUSIVE); | ||
601 | #endif | ||
602 | } | ||
549 | 603 | ||
550 | free_bootmem(PFN_PHYS(first_alloc_pfn), | 604 | static void __init setup_bootmem_allocator(void) |
551 | PFN_PHYS(last_alloc_pfn - first_alloc_pfn)); | 605 | { |
606 | int i; | ||
607 | for (i = 0; i < MAX_NUMNODES; ++i) | ||
608 | setup_bootmem_allocator_node(i); | ||
552 | 609 | ||
553 | #ifdef CONFIG_KEXEC | 610 | #ifdef CONFIG_KEXEC |
554 | if (crashk_res.start != crashk_res.end) | 611 | if (crashk_res.start != crashk_res.end) |
@@ -579,14 +636,6 @@ static int __init percpu_size(void) | |||
579 | return size; | 636 | return size; |
580 | } | 637 | } |
581 | 638 | ||
582 | static inline unsigned long alloc_bootmem_pfn(int size, unsigned long goal) | ||
583 | { | ||
584 | void *kva = __alloc_bootmem(size, PAGE_SIZE, goal); | ||
585 | unsigned long pfn = kaddr_to_pfn(kva); | ||
586 | BUG_ON(goal && PFN_PHYS(pfn) != goal); | ||
587 | return pfn; | ||
588 | } | ||
589 | |||
590 | static void __init zone_sizes_init(void) | 639 | static void __init zone_sizes_init(void) |
591 | { | 640 | { |
592 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; | 641 | unsigned long zones_size[MAX_NR_ZONES] = { 0 }; |
@@ -624,21 +673,22 @@ static void __init zone_sizes_init(void) | |||
624 | * though, there'll be no lowmem, so we just alloc_bootmem | 673 | * though, there'll be no lowmem, so we just alloc_bootmem |
625 | * the memmap. There will be no percpu memory either. | 674 | * the memmap. There will be no percpu memory either. |
626 | */ | 675 | */ |
627 | if (__pfn_to_highbits(start) == 0) { | 676 | if (i != 0 && cpu_isset(i, isolnodes)) { |
628 | /* In low PAs, allocate via bootmem. */ | 677 | node_memmap_pfn[i] = |
678 | alloc_bootmem_pfn(0, memmap_size, 0); | ||
679 | BUG_ON(node_percpu[i] != 0); | ||
680 | } else if (node_has_bootmem(start)) { | ||
629 | unsigned long goal = 0; | 681 | unsigned long goal = 0; |
630 | node_memmap_pfn[i] = | 682 | node_memmap_pfn[i] = |
631 | alloc_bootmem_pfn(memmap_size, goal); | 683 | alloc_bootmem_pfn(i, memmap_size, 0); |
632 | if (kdata_huge) | 684 | if (kdata_huge) |
633 | goal = PFN_PHYS(lowmem_end) - node_percpu[i]; | 685 | goal = PFN_PHYS(lowmem_end) - node_percpu[i]; |
634 | if (node_percpu[i]) | 686 | if (node_percpu[i]) |
635 | node_percpu_pfn[i] = | 687 | node_percpu_pfn[i] = |
636 | alloc_bootmem_pfn(node_percpu[i], goal); | 688 | alloc_bootmem_pfn(i, node_percpu[i], |
637 | } else if (cpu_isset(i, isolnodes)) { | 689 | goal); |
638 | node_memmap_pfn[i] = alloc_bootmem_pfn(memmap_size, 0); | ||
639 | BUG_ON(node_percpu[i] != 0); | ||
640 | } else { | 690 | } else { |
641 | /* In high PAs, just reserve some pages. */ | 691 | /* In non-bootmem zones, just reserve some pages. */ |
642 | node_memmap_pfn[i] = node_free_pfn[i]; | 692 | node_memmap_pfn[i] = node_free_pfn[i]; |
643 | node_free_pfn[i] += PFN_UP(memmap_size); | 693 | node_free_pfn[i] += PFN_UP(memmap_size); |
644 | if (!kdata_huge) { | 694 | if (!kdata_huge) { |
@@ -662,16 +712,9 @@ static void __init zone_sizes_init(void) | |||
662 | zones_size[ZONE_NORMAL] = end - start; | 712 | zones_size[ZONE_NORMAL] = end - start; |
663 | #endif | 713 | #endif |
664 | 714 | ||
665 | /* | 715 | /* Take zone metadata from controller 0 if we're isolnode. */ |
666 | * Everyone shares node 0's bootmem allocator, but | 716 | if (node_isset(i, isolnodes)) |
667 | * we use alloc_remap(), above, to put the actual | 717 | NODE_DATA(i)->bdata = &bootmem_node_data[0]; |
668 | * struct page array on the individual controllers, | ||
669 | * which is most of the data that we actually care about. | ||
670 | * We can't place bootmem allocators on the other | ||
671 | * controllers since the bootmem allocator can only | ||
672 | * operate on 32-bit physical addresses. | ||
673 | */ | ||
674 | NODE_DATA(i)->bdata = NODE_DATA(0)->bdata; | ||
675 | 718 | ||
676 | free_area_init_node(i, zones_size, start, NULL); | 719 | free_area_init_node(i, zones_size, start, NULL); |
677 | printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", | 720 | printk(KERN_DEBUG " Normal zone: %ld per-cpu pages\n", |
@@ -854,6 +897,22 @@ subsys_initcall(topology_init); | |||
854 | 897 | ||
855 | #endif /* CONFIG_NUMA */ | 898 | #endif /* CONFIG_NUMA */ |
856 | 899 | ||
900 | /* | ||
901 | * Initialize hugepage support on this cpu. We do this on all cores | ||
902 | * early in boot: before argument parsing for the boot cpu, and after | ||
903 | * argument parsing but before the init functions run on the secondaries. | ||
904 | * So the values we set up here in the hypervisor may be overridden on | ||
905 | * the boot cpu as arguments are parsed. | ||
906 | */ | ||
907 | static __cpuinit void init_super_pages(void) | ||
908 | { | ||
909 | #ifdef CONFIG_HUGETLB_SUPER_PAGES | ||
910 | int i; | ||
911 | for (i = 0; i < HUGE_SHIFT_ENTRIES; ++i) | ||
912 | hv_set_pte_super_shift(i, huge_shift[i]); | ||
913 | #endif | ||
914 | } | ||
915 | |||
857 | /** | 916 | /** |
858 | * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. | 917 | * setup_cpu() - Do all necessary per-cpu, tile-specific initialization. |
859 | * @boot: Is this the boot cpu? | 918 | * @boot: Is this the boot cpu? |
@@ -908,6 +967,8 @@ void __cpuinit setup_cpu(int boot) | |||
908 | /* Reset the network state on this cpu. */ | 967 | /* Reset the network state on this cpu. */ |
909 | reset_network_state(); | 968 | reset_network_state(); |
910 | #endif | 969 | #endif |
970 | |||
971 | init_super_pages(); | ||
911 | } | 972 | } |
912 | 973 | ||
913 | #ifdef CONFIG_BLK_DEV_INITRD | 974 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/tile/kernel/tlb.c b/arch/tile/kernel/tlb.c index a5f241c24cac..3fd54d5bbd4c 100644 --- a/arch/tile/kernel/tlb.c +++ b/arch/tile/kernel/tlb.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/cpumask.h> | 16 | #include <linux/cpumask.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <linux/hugetlb.h> | ||
18 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
19 | #include <asm/homecache.h> | 20 | #include <asm/homecache.h> |
20 | #include <hv/hypervisor.h> | 21 | #include <hv/hypervisor.h> |
@@ -49,25 +50,25 @@ void flush_tlb_current_task(void) | |||
49 | flush_tlb_mm(current->mm); | 50 | flush_tlb_mm(current->mm); |
50 | } | 51 | } |
51 | 52 | ||
52 | void flush_tlb_page_mm(const struct vm_area_struct *vma, struct mm_struct *mm, | 53 | void flush_tlb_page_mm(struct vm_area_struct *vma, struct mm_struct *mm, |
53 | unsigned long va) | 54 | unsigned long va) |
54 | { | 55 | { |
55 | unsigned long size = hv_page_size(vma); | 56 | unsigned long size = vma_kernel_pagesize(vma); |
56 | int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; | 57 | int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; |
57 | flush_remote(0, cache, mm_cpumask(mm), | 58 | flush_remote(0, cache, mm_cpumask(mm), |
58 | va, size, size, mm_cpumask(mm), NULL, 0); | 59 | va, size, size, mm_cpumask(mm), NULL, 0); |
59 | } | 60 | } |
60 | 61 | ||
61 | void flush_tlb_page(const struct vm_area_struct *vma, unsigned long va) | 62 | void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) |
62 | { | 63 | { |
63 | flush_tlb_page_mm(vma, vma->vm_mm, va); | 64 | flush_tlb_page_mm(vma, vma->vm_mm, va); |
64 | } | 65 | } |
65 | EXPORT_SYMBOL(flush_tlb_page); | 66 | EXPORT_SYMBOL(flush_tlb_page); |
66 | 67 | ||
67 | void flush_tlb_range(const struct vm_area_struct *vma, | 68 | void flush_tlb_range(struct vm_area_struct *vma, |
68 | unsigned long start, unsigned long end) | 69 | unsigned long start, unsigned long end) |
69 | { | 70 | { |
70 | unsigned long size = hv_page_size(vma); | 71 | unsigned long size = vma_kernel_pagesize(vma); |
71 | struct mm_struct *mm = vma->vm_mm; | 72 | struct mm_struct *mm = vma->vm_mm; |
72 | int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; | 73 | int cache = (vma->vm_flags & VM_EXEC) ? HV_FLUSH_EVICT_L1I : 0; |
73 | flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, | 74 | flush_remote(0, cache, mm_cpumask(mm), start, end - start, size, |
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 22e58f51ed23..54f18fc25ed0 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c | |||
@@ -187,7 +187,7 @@ static pgd_t *get_current_pgd(void) | |||
187 | HV_Context ctx = hv_inquire_context(); | 187 | HV_Context ctx = hv_inquire_context(); |
188 | unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; | 188 | unsigned long pgd_pfn = ctx.page_table >> PAGE_SHIFT; |
189 | struct page *pgd_page = pfn_to_page(pgd_pfn); | 189 | struct page *pgd_page = pfn_to_page(pgd_pfn); |
190 | BUG_ON(PageHighMem(pgd_page)); /* oops, HIGHPTE? */ | 190 | BUG_ON(PageHighMem(pgd_page)); |
191 | return (pgd_t *) __va(ctx.page_table); | 191 | return (pgd_t *) __va(ctx.page_table); |
192 | } | 192 | } |
193 | 193 | ||
diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 499f73770b05..dbcbdf7b8aa8 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/cache.h> | 30 | #include <linux/cache.h> |
31 | #include <linux/smp.h> | 31 | #include <linux/smp.h> |
32 | #include <linux/module.h> | 32 | #include <linux/module.h> |
33 | #include <linux/hugetlb.h> | ||
33 | 34 | ||
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
35 | #include <asm/sections.h> | 36 | #include <asm/sections.h> |
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 42cfcba4e1ef..812e2d037972 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c | |||
@@ -27,85 +27,161 @@ | |||
27 | #include <linux/mman.h> | 27 | #include <linux/mman.h> |
28 | #include <asm/tlb.h> | 28 | #include <asm/tlb.h> |
29 | #include <asm/tlbflush.h> | 29 | #include <asm/tlbflush.h> |
30 | #include <asm/setup.h> | ||
31 | |||
32 | #ifdef CONFIG_HUGETLB_SUPER_PAGES | ||
33 | |||
34 | /* | ||
35 | * Provide an additional huge page size (in addition to the regular default | ||
36 | * huge page size) if no "hugepagesz" arguments are specified. | ||
37 | * Note that it must be smaller than the default huge page size so | ||
38 | * that it's possible to allocate them on demand from the buddy allocator. | ||
39 | * You can change this to 64K (on a 16K build), 256K, 1M, or 4M, | ||
40 | * or not define it at all. | ||
41 | */ | ||
42 | #define ADDITIONAL_HUGE_SIZE (1024 * 1024UL) | ||
43 | |||
44 | /* "Extra" page-size multipliers, one per level of the page table. */ | ||
45 | int huge_shift[HUGE_SHIFT_ENTRIES] = { | ||
46 | #ifdef ADDITIONAL_HUGE_SIZE | ||
47 | #define ADDITIONAL_HUGE_SHIFT __builtin_ctzl(ADDITIONAL_HUGE_SIZE / PAGE_SIZE) | ||
48 | [HUGE_SHIFT_PAGE] = ADDITIONAL_HUGE_SHIFT | ||
49 | #endif | ||
50 | }; | ||
51 | |||
52 | /* | ||
53 | * This routine is a hybrid of pte_alloc_map() and pte_alloc_kernel(). | ||
54 | * It assumes that L2 PTEs are never in HIGHMEM (we don't support that). | ||
55 | * It locks the user pagetable, and bumps up the mm->nr_ptes field, | ||
56 | * but otherwise allocate the page table using the kernel versions. | ||
57 | */ | ||
58 | static pte_t *pte_alloc_hugetlb(struct mm_struct *mm, pmd_t *pmd, | ||
59 | unsigned long address) | ||
60 | { | ||
61 | pte_t *new; | ||
62 | |||
63 | if (pmd_none(*pmd)) { | ||
64 | new = pte_alloc_one_kernel(mm, address); | ||
65 | if (!new) | ||
66 | return NULL; | ||
67 | |||
68 | smp_wmb(); /* See comment in __pte_alloc */ | ||
69 | |||
70 | spin_lock(&mm->page_table_lock); | ||
71 | if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ | ||
72 | mm->nr_ptes++; | ||
73 | pmd_populate_kernel(mm, pmd, new); | ||
74 | new = NULL; | ||
75 | } else | ||
76 | VM_BUG_ON(pmd_trans_splitting(*pmd)); | ||
77 | spin_unlock(&mm->page_table_lock); | ||
78 | if (new) | ||
79 | pte_free_kernel(mm, new); | ||
80 | } | ||
81 | |||
82 | return pte_offset_kernel(pmd, address); | ||
83 | } | ||
84 | #endif | ||
30 | 85 | ||
31 | pte_t *huge_pte_alloc(struct mm_struct *mm, | 86 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
32 | unsigned long addr, unsigned long sz) | 87 | unsigned long addr, unsigned long sz) |
33 | { | 88 | { |
34 | pgd_t *pgd; | 89 | pgd_t *pgd; |
35 | pud_t *pud; | 90 | pud_t *pud; |
36 | pte_t *pte = NULL; | ||
37 | 91 | ||
38 | /* We do not yet support multiple huge page sizes. */ | 92 | addr &= -sz; /* Mask off any low bits in the address. */ |
39 | BUG_ON(sz != PMD_SIZE); | ||
40 | 93 | ||
41 | pgd = pgd_offset(mm, addr); | 94 | pgd = pgd_offset(mm, addr); |
42 | pud = pud_alloc(mm, pgd, addr); | 95 | pud = pud_alloc(mm, pgd, addr); |
43 | if (pud) | ||
44 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
45 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | ||
46 | 96 | ||
47 | return pte; | 97 | #ifdef CONFIG_HUGETLB_SUPER_PAGES |
98 | if (sz >= PGDIR_SIZE) { | ||
99 | BUG_ON(sz != PGDIR_SIZE && | ||
100 | sz != PGDIR_SIZE << huge_shift[HUGE_SHIFT_PGDIR]); | ||
101 | return (pte_t *)pud; | ||
102 | } else { | ||
103 | pmd_t *pmd = pmd_alloc(mm, pud, addr); | ||
104 | if (sz >= PMD_SIZE) { | ||
105 | BUG_ON(sz != PMD_SIZE && | ||
106 | sz != (PMD_SIZE << huge_shift[HUGE_SHIFT_PMD])); | ||
107 | return (pte_t *)pmd; | ||
108 | } | ||
109 | else { | ||
110 | if (sz != PAGE_SIZE << huge_shift[HUGE_SHIFT_PAGE]) | ||
111 | panic("Unexpected page size %#lx\n", sz); | ||
112 | return pte_alloc_hugetlb(mm, pmd, addr); | ||
113 | } | ||
114 | } | ||
115 | #else | ||
116 | BUG_ON(sz != PMD_SIZE); | ||
117 | return (pte_t *) pmd_alloc(mm, pud, addr); | ||
118 | #endif | ||
48 | } | 119 | } |
49 | 120 | ||
50 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 121 | static pte_t *get_pte(pte_t *base, int index, int level) |
51 | { | 122 | { |
52 | pgd_t *pgd; | 123 | pte_t *ptep = base + index; |
53 | pud_t *pud; | 124 | #ifdef CONFIG_HUGETLB_SUPER_PAGES |
54 | pmd_t *pmd = NULL; | 125 | if (!pte_present(*ptep) && huge_shift[level] != 0) { |
55 | 126 | unsigned long mask = -1UL << huge_shift[level]; | |
56 | pgd = pgd_offset(mm, addr); | 127 | pte_t *super_ptep = base + (index & mask); |
57 | if (pgd_present(*pgd)) { | 128 | pte_t pte = *super_ptep; |
58 | pud = pud_offset(pgd, addr); | 129 | if (pte_present(pte) && pte_super(pte)) |
59 | if (pud_present(*pud)) | 130 | ptep = super_ptep; |
60 | pmd = pmd_offset(pud, addr); | ||
61 | } | 131 | } |
62 | return (pte_t *) pmd; | 132 | #endif |
133 | return ptep; | ||
63 | } | 134 | } |
64 | 135 | ||
65 | #ifdef HUGETLB_TEST | 136 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
66 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | ||
67 | int write) | ||
68 | { | 137 | { |
69 | unsigned long start = address; | 138 | pgd_t *pgd; |
70 | int length = 1; | 139 | pud_t *pud; |
71 | int nr; | 140 | pmd_t *pmd; |
72 | struct page *page; | 141 | #ifdef CONFIG_HUGETLB_SUPER_PAGES |
73 | struct vm_area_struct *vma; | 142 | pte_t *pte; |
74 | 143 | #endif | |
75 | vma = find_vma(mm, addr); | ||
76 | if (!vma || !is_vm_hugetlb_page(vma)) | ||
77 | return ERR_PTR(-EINVAL); | ||
78 | |||
79 | pte = huge_pte_offset(mm, address); | ||
80 | 144 | ||
81 | /* hugetlb should be locked, and hence, prefaulted */ | 145 | /* Get the top-level page table entry. */ |
82 | WARN_ON(!pte || pte_none(*pte)); | 146 | pgd = (pgd_t *)get_pte((pte_t *)mm->pgd, pgd_index(addr), 0); |
147 | if (!pgd_present(*pgd)) | ||
148 | return NULL; | ||
83 | 149 | ||
84 | page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; | 150 | /* We don't have four levels. */ |
151 | pud = pud_offset(pgd, addr); | ||
152 | #ifndef __PAGETABLE_PUD_FOLDED | ||
153 | # error support fourth page table level | ||
154 | #endif | ||
85 | 155 | ||
86 | WARN_ON(!PageHead(page)); | 156 | /* Check for an L0 huge PTE, if we have three levels. */ |
157 | #ifndef __PAGETABLE_PMD_FOLDED | ||
158 | if (pud_huge(*pud)) | ||
159 | return (pte_t *)pud; | ||
87 | 160 | ||
88 | return page; | 161 | pmd = (pmd_t *)get_pte((pte_t *)pud_page_vaddr(*pud), |
89 | } | 162 | pmd_index(addr), 1); |
90 | 163 | if (!pmd_present(*pmd)) | |
91 | int pmd_huge(pmd_t pmd) | 164 | return NULL; |
92 | { | 165 | #else |
93 | return 0; | 166 | pmd = pmd_offset(pud, addr); |
94 | } | 167 | #endif |
95 | 168 | ||
96 | int pud_huge(pud_t pud) | 169 | /* Check for an L1 huge PTE. */ |
97 | { | 170 | if (pmd_huge(*pmd)) |
98 | return 0; | 171 | return (pte_t *)pmd; |
99 | } | 172 | |
173 | #ifdef CONFIG_HUGETLB_SUPER_PAGES | ||
174 | /* Check for an L2 huge PTE. */ | ||
175 | pte = get_pte((pte_t *)pmd_page_vaddr(*pmd), pte_index(addr), 2); | ||
176 | if (!pte_present(*pte)) | ||
177 | return NULL; | ||
178 | if (pte_super(*pte)) | ||
179 | return pte; | ||
180 | #endif | ||
100 | 181 | ||
101 | struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, | ||
102 | pmd_t *pmd, int write) | ||
103 | { | ||
104 | return NULL; | 182 | return NULL; |
105 | } | 183 | } |
106 | 184 | ||
107 | #else | ||
108 | |||
109 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, | 185 | struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, |
110 | int write) | 186 | int write) |
111 | { | 187 | { |
@@ -149,8 +225,6 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
149 | return 0; | 225 | return 0; |
150 | } | 226 | } |
151 | 227 | ||
152 | #endif | ||
153 | |||
154 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | 228 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA |
155 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | 229 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, |
156 | unsigned long addr, unsigned long len, | 230 | unsigned long addr, unsigned long len, |
@@ -322,21 +396,102 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
322 | return hugetlb_get_unmapped_area_topdown(file, addr, len, | 396 | return hugetlb_get_unmapped_area_topdown(file, addr, len, |
323 | pgoff, flags); | 397 | pgoff, flags); |
324 | } | 398 | } |
399 | #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ | ||
325 | 400 | ||
326 | static __init int setup_hugepagesz(char *opt) | 401 | #ifdef CONFIG_HUGETLB_SUPER_PAGES |
402 | static __init int __setup_hugepagesz(unsigned long ps) | ||
327 | { | 403 | { |
328 | unsigned long ps = memparse(opt, &opt); | 404 | int log_ps = __builtin_ctzl(ps); |
329 | if (ps == PMD_SIZE) { | 405 | int level, base_shift; |
330 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | 406 | |
331 | } else if (ps == PUD_SIZE) { | 407 | if ((1UL << log_ps) != ps || (log_ps & 1) != 0) { |
332 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | 408 | pr_warn("Not enabling %ld byte huge pages;" |
409 | " must be a power of four.\n", ps); | ||
410 | return -EINVAL; | ||
411 | } | ||
412 | |||
413 | if (ps > 64*1024*1024*1024UL) { | ||
414 | pr_warn("Not enabling %ld MB huge pages;" | ||
415 | " largest legal value is 64 GB .\n", ps >> 20); | ||
416 | return -EINVAL; | ||
417 | } else if (ps >= PUD_SIZE) { | ||
418 | static long hv_jpage_size; | ||
419 | if (hv_jpage_size == 0) | ||
420 | hv_jpage_size = hv_sysconf(HV_SYSCONF_PAGE_SIZE_JUMBO); | ||
421 | if (hv_jpage_size != PUD_SIZE) { | ||
422 | pr_warn("Not enabling >= %ld MB huge pages:" | ||
423 | " hypervisor reports size %ld\n", | ||
424 | PUD_SIZE >> 20, hv_jpage_size); | ||
425 | return -EINVAL; | ||
426 | } | ||
427 | level = 0; | ||
428 | base_shift = PUD_SHIFT; | ||
429 | } else if (ps >= PMD_SIZE) { | ||
430 | level = 1; | ||
431 | base_shift = PMD_SHIFT; | ||
432 | } else if (ps > PAGE_SIZE) { | ||
433 | level = 2; | ||
434 | base_shift = PAGE_SHIFT; | ||
333 | } else { | 435 | } else { |
334 | pr_err("hugepagesz: Unsupported page size %lu M\n", | 436 | pr_err("hugepagesz: huge page size %ld too small\n", ps); |
335 | ps >> 20); | 437 | return -EINVAL; |
336 | return 0; | ||
337 | } | 438 | } |
338 | return 1; | 439 | |
440 | if (log_ps != base_shift) { | ||
441 | int shift_val = log_ps - base_shift; | ||
442 | if (huge_shift[level] != 0) { | ||
443 | int old_shift = base_shift + huge_shift[level]; | ||
444 | pr_warn("Not enabling %ld MB huge pages;" | ||
445 | " already have size %ld MB.\n", | ||
446 | ps >> 20, (1UL << old_shift) >> 20); | ||
447 | return -EINVAL; | ||
448 | } | ||
449 | if (hv_set_pte_super_shift(level, shift_val) != 0) { | ||
450 | pr_warn("Not enabling %ld MB huge pages;" | ||
451 | " no hypervisor support.\n", ps >> 20); | ||
452 | return -EINVAL; | ||
453 | } | ||
454 | printk(KERN_DEBUG "Enabled %ld MB huge pages\n", ps >> 20); | ||
455 | huge_shift[level] = shift_val; | ||
456 | } | ||
457 | |||
458 | hugetlb_add_hstate(log_ps - PAGE_SHIFT); | ||
459 | |||
460 | return 0; | ||
461 | } | ||
462 | |||
463 | static bool saw_hugepagesz; | ||
464 | |||
465 | static __init int setup_hugepagesz(char *opt) | ||
466 | { | ||
467 | if (!saw_hugepagesz) { | ||
468 | saw_hugepagesz = true; | ||
469 | memset(huge_shift, 0, sizeof(huge_shift)); | ||
470 | } | ||
471 | return __setup_hugepagesz(memparse(opt, NULL)); | ||
339 | } | 472 | } |
340 | __setup("hugepagesz=", setup_hugepagesz); | 473 | __setup("hugepagesz=", setup_hugepagesz); |
341 | 474 | ||
342 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | 475 | #ifdef ADDITIONAL_HUGE_SIZE |
476 | /* | ||
477 | * Provide an additional huge page size if no "hugepagesz" args are given. | ||
478 | * In that case, all the cores have properly set up their hv super_shift | ||
479 | * already, but we need to notify the hugetlb code to enable the | ||
480 | * new huge page size from the Linux point of view. | ||
481 | */ | ||
482 | static __init int add_default_hugepagesz(void) | ||
483 | { | ||
484 | if (!saw_hugepagesz) { | ||
485 | BUILD_BUG_ON(ADDITIONAL_HUGE_SIZE >= PMD_SIZE || | ||
486 | ADDITIONAL_HUGE_SIZE <= PAGE_SIZE); | ||
487 | BUILD_BUG_ON((PAGE_SIZE << ADDITIONAL_HUGE_SHIFT) != | ||
488 | ADDITIONAL_HUGE_SIZE); | ||
489 | BUILD_BUG_ON(ADDITIONAL_HUGE_SHIFT & 1); | ||
490 | hugetlb_add_hstate(ADDITIONAL_HUGE_SHIFT); | ||
491 | } | ||
492 | return 0; | ||
493 | } | ||
494 | arch_initcall(add_default_hugepagesz); | ||
495 | #endif | ||
496 | |||
497 | #endif /* CONFIG_HUGETLB_SUPER_PAGES */ | ||
diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index c04fbfd93fc5..630dd2ce2afe 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c | |||
@@ -698,6 +698,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) | |||
698 | #endif /* CONFIG_HIGHMEM */ | 698 | #endif /* CONFIG_HIGHMEM */ |
699 | 699 | ||
700 | 700 | ||
701 | #ifndef CONFIG_64BIT | ||
701 | static void __init init_free_pfn_range(unsigned long start, unsigned long end) | 702 | static void __init init_free_pfn_range(unsigned long start, unsigned long end) |
702 | { | 703 | { |
703 | unsigned long pfn; | 704 | unsigned long pfn; |
@@ -770,6 +771,7 @@ static void __init set_non_bootmem_pages_init(void) | |||
770 | init_free_pfn_range(start, end); | 771 | init_free_pfn_range(start, end); |
771 | } | 772 | } |
772 | } | 773 | } |
774 | #endif | ||
773 | 775 | ||
774 | /* | 776 | /* |
775 | * paging_init() sets up the page tables - note that all of lowmem is | 777 | * paging_init() sets up the page tables - note that all of lowmem is |
@@ -858,8 +860,10 @@ void __init mem_init(void) | |||
858 | /* this will put all bootmem onto the freelists */ | 860 | /* this will put all bootmem onto the freelists */ |
859 | totalram_pages += free_all_bootmem(); | 861 | totalram_pages += free_all_bootmem(); |
860 | 862 | ||
863 | #ifndef CONFIG_64BIT | ||
861 | /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ | 864 | /* count all remaining LOWMEM and give all HIGHMEM to page allocator */ |
862 | set_non_bootmem_pages_init(); | 865 | set_non_bootmem_pages_init(); |
866 | #endif | ||
863 | 867 | ||
864 | codesize = (unsigned long)&_etext - (unsigned long)&_text; | 868 | codesize = (unsigned long)&_etext - (unsigned long)&_text; |
865 | datasize = (unsigned long)&_end - (unsigned long)&_sdata; | 869 | datasize = (unsigned long)&_end - (unsigned long)&_sdata; |
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 3d7074347e6d..345edfed9fcd 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c | |||
@@ -132,15 +132,6 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | |||
132 | set_pte_pfn(address, phys >> PAGE_SHIFT, flags); | 132 | set_pte_pfn(address, phys >> PAGE_SHIFT, flags); |
133 | } | 133 | } |
134 | 134 | ||
135 | #if defined(CONFIG_HIGHPTE) | ||
136 | pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) | ||
137 | { | ||
138 | pte_t *pte = kmap_atomic(pmd_page(*dir)) + | ||
139 | (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; | ||
140 | return &pte[pte_index(address)]; | ||
141 | } | ||
142 | #endif | ||
143 | |||
144 | /** | 135 | /** |
145 | * shatter_huge_page() - ensure a given address is mapped by a small page. | 136 | * shatter_huge_page() - ensure a given address is mapped by a small page. |
146 | * | 137 | * |
@@ -296,10 +287,6 @@ struct page *pgtable_alloc_one(struct mm_struct *mm, unsigned long address, | |||
296 | struct page *p; | 287 | struct page *p; |
297 | int i; | 288 | int i; |
298 | 289 | ||
299 | #ifdef CONFIG_HIGHPTE | ||
300 | flags |= __GFP_HIGHMEM; | ||
301 | #endif | ||
302 | |||
303 | p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); | 290 | p = alloc_pages(flags, L2_USER_PGTABLE_ORDER); |
304 | if (p == NULL) | 291 | if (p == NULL) |
305 | return NULL; | 292 | return NULL; |