diff options
Diffstat (limited to 'arch/powerpc/mm')
27 files changed, 818 insertions, 833 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index f5e7b9ce63dd..65abfcfaaa9e 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c | |||
@@ -84,14 +84,14 @@ void __init MMU_init_hw(void) | |||
84 | * vectors and the kernel live in real-mode. | 84 | * vectors and the kernel live in real-mode. |
85 | */ | 85 | */ |
86 | 86 | ||
87 | mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ | 87 | mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */ |
88 | mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ | 88 | mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */ |
89 | } | 89 | } |
90 | 90 | ||
91 | #define LARGE_PAGE_SIZE_16M (1<<24) | 91 | #define LARGE_PAGE_SIZE_16M (1<<24) |
92 | #define LARGE_PAGE_SIZE_4M (1<<22) | 92 | #define LARGE_PAGE_SIZE_4M (1<<22) |
93 | 93 | ||
94 | unsigned long __init mmu_mapin_ram(void) | 94 | unsigned long __init mmu_mapin_ram(unsigned long top) |
95 | { | 95 | { |
96 | unsigned long v, s, mapped; | 96 | unsigned long v, s, mapped; |
97 | phys_addr_t p; | 97 | phys_addr_t p; |
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 98052ac96580..3986264b0993 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c | |||
@@ -88,7 +88,7 @@ void __init MMU_init_hw(void) | |||
88 | flush_instruction_cache(); | 88 | flush_instruction_cache(); |
89 | } | 89 | } |
90 | 90 | ||
91 | unsigned long __init mmu_mapin_ram(void) | 91 | unsigned long __init mmu_mapin_ram(unsigned long top) |
92 | { | 92 | { |
93 | unsigned long addr; | 93 | unsigned long addr; |
94 | 94 | ||
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 6fb8fc8d2fea..ce68708bbad5 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -28,7 +28,10 @@ obj-$(CONFIG_44x) += 44x_mmu.o | |||
28 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o | 28 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o |
29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o | 29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o |
30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o | 30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o |
31 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) |
32 | obj-y += hugetlbpage.o | ||
33 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o | ||
34 | endif | ||
32 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o | 35 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o |
33 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o | 36 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o |
34 | obj-$(CONFIG_HIGHMEM) += highmem.o | 37 | obj-$(CONFIG_HIGHMEM) += highmem.o |
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 36692f5c9a76..757c0bed9a91 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c | |||
@@ -23,6 +23,7 @@ | |||
23 | */ | 23 | */ |
24 | 24 | ||
25 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
26 | #include <linux/slab.h> | ||
26 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
27 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
28 | #include <linux/string.h> | 29 | #include <linux/string.h> |
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index e7dae82c1285..26fb6b990b0a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -40,7 +40,7 @@ | |||
40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
41 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
42 | #include <asm/siginfo.h> | 42 | #include <asm/siginfo.h> |
43 | 43 | #include <mm/mmu_decl.h> | |
44 | 44 | ||
45 | #ifdef CONFIG_KPROBES | 45 | #ifdef CONFIG_KPROBES |
46 | static inline int notify_page_fault(struct pt_regs *regs) | 46 | static inline int notify_page_fault(struct pt_regs *regs) |
@@ -246,6 +246,12 @@ good_area: | |||
246 | goto bad_area; | 246 | goto bad_area; |
247 | #endif /* CONFIG_6xx */ | 247 | #endif /* CONFIG_6xx */ |
248 | #if defined(CONFIG_8xx) | 248 | #if defined(CONFIG_8xx) |
249 | /* 8xx sometimes need to load a invalid/non-present TLBs. | ||
250 | * These must be invalidated separately as linux mm don't. | ||
251 | */ | ||
252 | if (error_code & 0x40000000) /* no translation? */ | ||
253 | _tlbil_va(address, 0, 0, 0); | ||
254 | |||
249 | /* The MPC8xx seems to always set 0x80000000, which is | 255 | /* The MPC8xx seems to always set 0x80000000, which is |
250 | * "undefined". Of those that can be set, this is the only | 256 | * "undefined". Of those that can be set, this is the only |
251 | * one which seems bad. | 257 | * one which seems bad. |
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index dc93e95b256e..1ed6b52f3031 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c | |||
@@ -54,26 +54,35 @@ | |||
54 | 54 | ||
55 | #include "mmu_decl.h" | 55 | #include "mmu_decl.h" |
56 | 56 | ||
57 | extern void loadcam_entry(unsigned int index); | ||
58 | unsigned int tlbcam_index; | 57 | unsigned int tlbcam_index; |
59 | static unsigned long cam[CONFIG_LOWMEM_CAM_NUM]; | ||
60 | 58 | ||
61 | #define NUM_TLBCAMS (16) | 59 | #define NUM_TLBCAMS (64) |
62 | 60 | ||
63 | #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) | 61 | #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) |
64 | #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" | 62 | #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" |
65 | #endif | 63 | #endif |
66 | 64 | ||
67 | struct tlbcam TLBCAM[NUM_TLBCAMS]; | 65 | struct tlbcam { |
66 | u32 MAS0; | ||
67 | u32 MAS1; | ||
68 | unsigned long MAS2; | ||
69 | u32 MAS3; | ||
70 | u32 MAS7; | ||
71 | } TLBCAM[NUM_TLBCAMS]; | ||
68 | 72 | ||
69 | struct tlbcamrange { | 73 | struct tlbcamrange { |
70 | unsigned long start; | 74 | unsigned long start; |
71 | unsigned long limit; | 75 | unsigned long limit; |
72 | phys_addr_t phys; | 76 | phys_addr_t phys; |
73 | } tlbcam_addrs[NUM_TLBCAMS]; | 77 | } tlbcam_addrs[NUM_TLBCAMS]; |
74 | 78 | ||
75 | extern unsigned int tlbcam_index; | 79 | extern unsigned int tlbcam_index; |
76 | 80 | ||
81 | unsigned long tlbcam_sz(int idx) | ||
82 | { | ||
83 | return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; | ||
84 | } | ||
85 | |||
77 | /* | 86 | /* |
78 | * Return PA for this VA if it is mapped by a CAM, or 0 | 87 | * Return PA for this VA if it is mapped by a CAM, or 0 |
79 | */ | 88 | */ |
@@ -94,23 +103,36 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) | |||
94 | int b; | 103 | int b; |
95 | for (b = 0; b < tlbcam_index; ++b) | 104 | for (b = 0; b < tlbcam_index; ++b) |
96 | if (pa >= tlbcam_addrs[b].phys | 105 | if (pa >= tlbcam_addrs[b].phys |
97 | && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) | 106 | && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) |
98 | +tlbcam_addrs[b].phys) | 107 | +tlbcam_addrs[b].phys) |
99 | return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); | 108 | return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); |
100 | return 0; | 109 | return 0; |
101 | } | 110 | } |
102 | 111 | ||
112 | void loadcam_entry(int idx) | ||
113 | { | ||
114 | mtspr(SPRN_MAS0, TLBCAM[idx].MAS0); | ||
115 | mtspr(SPRN_MAS1, TLBCAM[idx].MAS1); | ||
116 | mtspr(SPRN_MAS2, TLBCAM[idx].MAS2); | ||
117 | mtspr(SPRN_MAS3, TLBCAM[idx].MAS3); | ||
118 | |||
119 | if (mmu_has_feature(MMU_FTR_BIG_PHYS)) | ||
120 | mtspr(SPRN_MAS7, TLBCAM[idx].MAS7); | ||
121 | |||
122 | asm volatile("isync;tlbwe;isync" : : : "memory"); | ||
123 | } | ||
124 | |||
103 | /* | 125 | /* |
104 | * Set up one of the I/D BAT (block address translation) register pairs. | 126 | * Set up one of the I/D BAT (block address translation) register pairs. |
105 | * The parameters are not checked; in particular size must be a power | 127 | * The parameters are not checked; in particular size must be a power |
106 | * of 4 between 4k and 256M. | 128 | * of 4 between 4k and 256M. |
107 | */ | 129 | */ |
108 | void settlbcam(int index, unsigned long virt, phys_addr_t phys, | 130 | static void settlbcam(int index, unsigned long virt, phys_addr_t phys, |
109 | unsigned int size, int flags, unsigned int pid) | 131 | unsigned long size, unsigned long flags, unsigned int pid) |
110 | { | 132 | { |
111 | unsigned int tsize, lz; | 133 | unsigned int tsize, lz; |
112 | 134 | ||
113 | asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); | 135 | asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size)); |
114 | tsize = 21 - lz; | 136 | tsize = 21 - lz; |
115 | 137 | ||
116 | #ifdef CONFIG_SMP | 138 | #ifdef CONFIG_SMP |
@@ -128,18 +150,15 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, | |||
128 | TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; | 150 | TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; |
129 | TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; | 151 | TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; |
130 | 152 | ||
131 | TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR; | 153 | TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; |
132 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); | 154 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); |
155 | if (mmu_has_feature(MMU_FTR_BIG_PHYS)) | ||
156 | TLBCAM[index].MAS7 = (u64)phys >> 32; | ||
133 | 157 | ||
134 | #ifndef CONFIG_KGDB /* want user access for breakpoints */ | ||
135 | if (flags & _PAGE_USER) { | 158 | if (flags & _PAGE_USER) { |
136 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | 159 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; |
137 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | 160 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); |
138 | } | 161 | } |
139 | #else | ||
140 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | ||
141 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | ||
142 | #endif | ||
143 | 162 | ||
144 | tlbcam_addrs[index].start = virt; | 163 | tlbcam_addrs[index].start = virt; |
145 | tlbcam_addrs[index].limit = virt + size - 1; | 164 | tlbcam_addrs[index].limit = virt + size - 1; |
@@ -148,27 +167,44 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, | |||
148 | loadcam_entry(index); | 167 | loadcam_entry(index); |
149 | } | 168 | } |
150 | 169 | ||
151 | void invalidate_tlbcam_entry(int index) | 170 | unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) |
152 | { | ||
153 | TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); | ||
154 | TLBCAM[index].MAS1 = ~MAS1_VALID; | ||
155 | |||
156 | loadcam_entry(index); | ||
157 | } | ||
158 | |||
159 | unsigned long __init mmu_mapin_ram(void) | ||
160 | { | 171 | { |
172 | int i; | ||
161 | unsigned long virt = PAGE_OFFSET; | 173 | unsigned long virt = PAGE_OFFSET; |
162 | phys_addr_t phys = memstart_addr; | 174 | phys_addr_t phys = memstart_addr; |
175 | unsigned long amount_mapped = 0; | ||
176 | unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; | ||
177 | |||
178 | /* Convert (4^max) kB to (2^max) bytes */ | ||
179 | max_cam = max_cam * 2 + 10; | ||
180 | |||
181 | /* Calculate CAM values */ | ||
182 | for (i = 0; ram && i < max_cam_idx; i++) { | ||
183 | unsigned int camsize = __ilog2(ram) & ~1U; | ||
184 | unsigned int align = __ffs(virt | phys) & ~1U; | ||
185 | unsigned long cam_sz; | ||
186 | |||
187 | if (camsize > align) | ||
188 | camsize = align; | ||
189 | if (camsize > max_cam) | ||
190 | camsize = max_cam; | ||
191 | |||
192 | cam_sz = 1UL << camsize; | ||
193 | settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); | ||
163 | 194 | ||
164 | while (tlbcam_index < ARRAY_SIZE(cam) && cam[tlbcam_index]) { | 195 | ram -= cam_sz; |
165 | settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0); | 196 | amount_mapped += cam_sz; |
166 | virt += cam[tlbcam_index]; | 197 | virt += cam_sz; |
167 | phys += cam[tlbcam_index]; | 198 | phys += cam_sz; |
168 | tlbcam_index++; | ||
169 | } | 199 | } |
200 | tlbcam_index = i; | ||
201 | |||
202 | return amount_mapped; | ||
203 | } | ||
170 | 204 | ||
171 | return virt - PAGE_OFFSET; | 205 | unsigned long __init mmu_mapin_ram(unsigned long top) |
206 | { | ||
207 | return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; | ||
172 | } | 208 | } |
173 | 209 | ||
174 | /* | 210 | /* |
@@ -179,46 +215,21 @@ void __init MMU_init_hw(void) | |||
179 | flush_instruction_cache(); | 215 | flush_instruction_cache(); |
180 | } | 216 | } |
181 | 217 | ||
182 | void __init | 218 | void __init adjust_total_lowmem(void) |
183 | adjust_total_lowmem(void) | ||
184 | { | 219 | { |
185 | phys_addr_t ram; | 220 | unsigned long ram; |
186 | unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff; | ||
187 | char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf; | ||
188 | int i; | 221 | int i; |
189 | unsigned long virt = PAGE_OFFSET & 0xffffffffUL; | ||
190 | unsigned long phys = memstart_addr & 0xffffffffUL; | ||
191 | |||
192 | /* Convert (4^max) kB to (2^max) bytes */ | ||
193 | max_cam = max_cam * 2 + 10; | ||
194 | 222 | ||
195 | /* adjust lowmem size to __max_low_memory */ | 223 | /* adjust lowmem size to __max_low_memory */ |
196 | ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); | 224 | ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); |
197 | 225 | ||
198 | /* Calculate CAM values */ | 226 | __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); |
199 | __max_low_memory = 0; | ||
200 | for (i = 0; ram && i < ARRAY_SIZE(cam); i++) { | ||
201 | unsigned int camsize = __ilog2(ram) & ~1U; | ||
202 | unsigned int align = __ffs(virt | phys) & ~1U; | ||
203 | 227 | ||
204 | if (camsize > align) | 228 | pr_info("Memory CAM mapping: "); |
205 | camsize = align; | 229 | for (i = 0; i < tlbcam_index - 1; i++) |
206 | if (camsize > max_cam) | 230 | pr_cont("%lu/", tlbcam_sz(i) >> 20); |
207 | camsize = max_cam; | 231 | pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, |
208 | |||
209 | cam[i] = 1UL << camsize; | ||
210 | ram -= cam[i]; | ||
211 | __max_low_memory += cam[i]; | ||
212 | virt += cam[i]; | ||
213 | phys += cam[i]; | ||
214 | |||
215 | p += sprintf(p, "%lu/", cam[i] >> 20); | ||
216 | } | ||
217 | for (; i < ARRAY_SIZE(cam); i++) | ||
218 | p += sprintf(p, "0/"); | ||
219 | p[-1] = '\0'; | ||
220 | |||
221 | pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf, | ||
222 | (unsigned int)((total_lowmem - __max_low_memory) >> 20)); | 232 | (unsigned int)((total_lowmem - __max_low_memory) >> 20)); |
233 | |||
223 | __initial_memory_limit_addr = memstart_addr + __max_low_memory; | 234 | __initial_memory_limit_addr = memstart_addr + __max_low_memory; |
224 | } | 235 | } |
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index bc122a120bf0..d7efdbf640c7 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c | |||
@@ -55,57 +55,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
55 | return 1; | 55 | return 1; |
56 | } | 56 | } |
57 | 57 | ||
58 | #ifdef CONFIG_HUGETLB_PAGE | ||
59 | static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, | ||
60 | unsigned long *addr, unsigned long end, | ||
61 | int write, struct page **pages, int *nr) | ||
62 | { | ||
63 | unsigned long mask; | ||
64 | unsigned long pte_end; | ||
65 | struct page *head, *page; | ||
66 | pte_t pte; | ||
67 | int refs; | ||
68 | |||
69 | pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); | ||
70 | if (pte_end < end) | ||
71 | end = pte_end; | ||
72 | |||
73 | pte = *ptep; | ||
74 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
75 | if (write) | ||
76 | mask |= _PAGE_RW; | ||
77 | if ((pte_val(pte) & mask) != mask) | ||
78 | return 0; | ||
79 | /* hugepages are never "special" */ | ||
80 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
81 | |||
82 | refs = 0; | ||
83 | head = pte_page(pte); | ||
84 | page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); | ||
85 | do { | ||
86 | VM_BUG_ON(compound_head(page) != head); | ||
87 | pages[*nr] = page; | ||
88 | (*nr)++; | ||
89 | page++; | ||
90 | refs++; | ||
91 | } while (*addr += PAGE_SIZE, *addr != end); | ||
92 | |||
93 | if (!page_cache_add_speculative(head, refs)) { | ||
94 | *nr -= refs; | ||
95 | return 0; | ||
96 | } | ||
97 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
98 | /* Could be optimized better */ | ||
99 | while (*nr) { | ||
100 | put_page(page); | ||
101 | (*nr)--; | ||
102 | } | ||
103 | } | ||
104 | |||
105 | return 1; | ||
106 | } | ||
107 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
108 | |||
109 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | 58 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, |
110 | int write, struct page **pages, int *nr) | 59 | int write, struct page **pages, int *nr) |
111 | { | 60 | { |
@@ -119,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
119 | next = pmd_addr_end(addr, end); | 68 | next = pmd_addr_end(addr, end); |
120 | if (pmd_none(pmd)) | 69 | if (pmd_none(pmd)) |
121 | return 0; | 70 | return 0; |
122 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | 71 | if (is_hugepd(pmdp)) { |
72 | if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, | ||
73 | addr, next, write, pages, nr)) | ||
74 | return 0; | ||
75 | } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
123 | return 0; | 76 | return 0; |
124 | } while (pmdp++, addr = next, addr != end); | 77 | } while (pmdp++, addr = next, addr != end); |
125 | 78 | ||
@@ -139,7 +92,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
139 | next = pud_addr_end(addr, end); | 92 | next = pud_addr_end(addr, end); |
140 | if (pud_none(pud)) | 93 | if (pud_none(pud)) |
141 | return 0; | 94 | return 0; |
142 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | 95 | if (is_hugepd(pudp)) { |
96 | if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, | ||
97 | addr, next, write, pages, nr)) | ||
98 | return 0; | ||
99 | } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
143 | return 0; | 100 | return 0; |
144 | } while (pudp++, addr = next, addr != end); | 101 | } while (pudp++, addr = next, addr != end); |
145 | 102 | ||
@@ -154,10 +111,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
154 | unsigned long next; | 111 | unsigned long next; |
155 | pgd_t *pgdp; | 112 | pgd_t *pgdp; |
156 | int nr = 0; | 113 | int nr = 0; |
157 | #ifdef CONFIG_PPC64 | ||
158 | unsigned int shift; | ||
159 | int psize; | ||
160 | #endif | ||
161 | 114 | ||
162 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); | 115 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); |
163 | 116 | ||
@@ -172,25 +125,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
172 | 125 | ||
173 | pr_devel(" aligned: %lx .. %lx\n", start, end); | 126 | pr_devel(" aligned: %lx .. %lx\n", start, end); |
174 | 127 | ||
175 | #ifdef CONFIG_HUGETLB_PAGE | ||
176 | /* We bail out on slice boundary crossing when hugetlb is | ||
177 | * enabled in order to not have to deal with two different | ||
178 | * page table formats | ||
179 | */ | ||
180 | if (addr < SLICE_LOW_TOP) { | ||
181 | if (end > SLICE_LOW_TOP) | ||
182 | goto slow_irqon; | ||
183 | |||
184 | if (unlikely(GET_LOW_SLICE_INDEX(addr) != | ||
185 | GET_LOW_SLICE_INDEX(end - 1))) | ||
186 | goto slow_irqon; | ||
187 | } else { | ||
188 | if (unlikely(GET_HIGH_SLICE_INDEX(addr) != | ||
189 | GET_HIGH_SLICE_INDEX(end - 1))) | ||
190 | goto slow_irqon; | ||
191 | } | ||
192 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
193 | |||
194 | /* | 128 | /* |
195 | * XXX: batch / limit 'nr', to avoid large irq off latency | 129 | * XXX: batch / limit 'nr', to avoid large irq off latency |
196 | * needs some instrumenting to determine the common sizes used by | 130 | * needs some instrumenting to determine the common sizes used by |
@@ -210,54 +144,23 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
210 | */ | 144 | */ |
211 | local_irq_disable(); | 145 | local_irq_disable(); |
212 | 146 | ||
213 | #ifdef CONFIG_PPC64 | 147 | pgdp = pgd_offset(mm, addr); |
214 | /* Those bits are related to hugetlbfs implementation and only exist | 148 | do { |
215 | * on 64-bit for now | 149 | pgd_t pgd = *pgdp; |
216 | */ | 150 | |
217 | psize = get_slice_psize(mm, addr); | 151 | pr_devel(" %016lx: normal pgd %p\n", addr, |
218 | shift = mmu_psize_defs[psize].shift; | 152 | (void *)pgd_val(pgd)); |
219 | #endif /* CONFIG_PPC64 */ | 153 | next = pgd_addr_end(addr, end); |
220 | 154 | if (pgd_none(pgd)) | |
221 | #ifdef CONFIG_HUGETLB_PAGE | 155 | goto slow; |
222 | if (unlikely(mmu_huge_psizes[psize])) { | 156 | if (is_hugepd(pgdp)) { |
223 | pte_t *ptep; | 157 | if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, |
224 | unsigned long a = addr; | 158 | addr, next, write, pages, &nr)) |
225 | unsigned long sz = ((1UL) << shift); | ||
226 | struct hstate *hstate = size_to_hstate(sz); | ||
227 | |||
228 | BUG_ON(!hstate); | ||
229 | /* | ||
230 | * XXX: could be optimized to avoid hstate | ||
231 | * lookup entirely (just use shift) | ||
232 | */ | ||
233 | |||
234 | do { | ||
235 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); | ||
236 | ptep = huge_pte_offset(mm, a); | ||
237 | pr_devel(" %016lx: huge ptep %p\n", a, ptep); | ||
238 | if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, | ||
239 | &nr)) | ||
240 | goto slow; | ||
241 | } while (a != end); | ||
242 | } else | ||
243 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
244 | { | ||
245 | pgdp = pgd_offset(mm, addr); | ||
246 | do { | ||
247 | pgd_t pgd = *pgdp; | ||
248 | |||
249 | #ifdef CONFIG_PPC64 | ||
250 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); | ||
251 | #endif | ||
252 | pr_devel(" %016lx: normal pgd %p\n", addr, | ||
253 | (void *)pgd_val(pgd)); | ||
254 | next = pgd_addr_end(addr, end); | ||
255 | if (pgd_none(pgd)) | ||
256 | goto slow; | ||
257 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
258 | goto slow; | 159 | goto slow; |
259 | } while (pgdp++, addr = next, addr != end); | 160 | } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) |
260 | } | 161 | goto slow; |
162 | } while (pgdp++, addr = next, addr != end); | ||
163 | |||
261 | local_irq_enable(); | 164 | local_irq_enable(); |
262 | 165 | ||
263 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | 166 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); |
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 056d23a1b105..784a400e0781 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
@@ -37,7 +37,7 @@ | |||
37 | 37 | ||
38 | #define HPTE_LOCK_BIT 3 | 38 | #define HPTE_LOCK_BIT 3 |
39 | 39 | ||
40 | static DEFINE_SPINLOCK(native_tlbie_lock); | 40 | static DEFINE_RAW_SPINLOCK(native_tlbie_lock); |
41 | 41 | ||
42 | static inline void __tlbie(unsigned long va, int psize, int ssize) | 42 | static inline void __tlbie(unsigned long va, int psize, int ssize) |
43 | { | 43 | { |
@@ -104,7 +104,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) | |||
104 | if (use_local) | 104 | if (use_local) |
105 | use_local = mmu_psize_defs[psize].tlbiel; | 105 | use_local = mmu_psize_defs[psize].tlbiel; |
106 | if (lock_tlbie && !use_local) | 106 | if (lock_tlbie && !use_local) |
107 | spin_lock(&native_tlbie_lock); | 107 | raw_spin_lock(&native_tlbie_lock); |
108 | asm volatile("ptesync": : :"memory"); | 108 | asm volatile("ptesync": : :"memory"); |
109 | if (use_local) { | 109 | if (use_local) { |
110 | __tlbiel(va, psize, ssize); | 110 | __tlbiel(va, psize, ssize); |
@@ -114,7 +114,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) | |||
114 | asm volatile("eieio; tlbsync; ptesync": : :"memory"); | 114 | asm volatile("eieio; tlbsync; ptesync": : :"memory"); |
115 | } | 115 | } |
116 | if (lock_tlbie && !use_local) | 116 | if (lock_tlbie && !use_local) |
117 | spin_unlock(&native_tlbie_lock); | 117 | raw_spin_unlock(&native_tlbie_lock); |
118 | } | 118 | } |
119 | 119 | ||
120 | static inline void native_lock_hpte(struct hash_pte *hptep) | 120 | static inline void native_lock_hpte(struct hash_pte *hptep) |
@@ -122,7 +122,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep) | |||
122 | unsigned long *word = &hptep->v; | 122 | unsigned long *word = &hptep->v; |
123 | 123 | ||
124 | while (1) { | 124 | while (1) { |
125 | if (!test_and_set_bit(HPTE_LOCK_BIT, word)) | 125 | if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) |
126 | break; | 126 | break; |
127 | while(test_bit(HPTE_LOCK_BIT, word)) | 127 | while(test_bit(HPTE_LOCK_BIT, word)) |
128 | cpu_relax(); | 128 | cpu_relax(); |
@@ -133,8 +133,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) | |||
133 | { | 133 | { |
134 | unsigned long *word = &hptep->v; | 134 | unsigned long *word = &hptep->v; |
135 | 135 | ||
136 | asm volatile("lwsync":::"memory"); | 136 | clear_bit_unlock(HPTE_LOCK_BIT, word); |
137 | clear_bit(HPTE_LOCK_BIT, word); | ||
138 | } | 137 | } |
139 | 138 | ||
140 | static long native_hpte_insert(unsigned long hpte_group, unsigned long va, | 139 | static long native_hpte_insert(unsigned long hpte_group, unsigned long va, |
@@ -434,7 +433,7 @@ static void native_hpte_clear(void) | |||
434 | /* we take the tlbie lock and hold it. Some hardware will | 433 | /* we take the tlbie lock and hold it. Some hardware will |
435 | * deadlock if we try to tlbie from two processors at once. | 434 | * deadlock if we try to tlbie from two processors at once. |
436 | */ | 435 | */ |
437 | spin_lock(&native_tlbie_lock); | 436 | raw_spin_lock(&native_tlbie_lock); |
438 | 437 | ||
439 | slots = pteg_count * HPTES_PER_GROUP; | 438 | slots = pteg_count * HPTES_PER_GROUP; |
440 | 439 | ||
@@ -458,7 +457,7 @@ static void native_hpte_clear(void) | |||
458 | } | 457 | } |
459 | 458 | ||
460 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | 459 | asm volatile("eieio; tlbsync; ptesync":::"memory"); |
461 | spin_unlock(&native_tlbie_lock); | 460 | raw_spin_unlock(&native_tlbie_lock); |
462 | local_irq_restore(flags); | 461 | local_irq_restore(flags); |
463 | } | 462 | } |
464 | 463 | ||
@@ -521,7 +520,7 @@ static void native_flush_hash_range(unsigned long number, int local) | |||
521 | int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); | 520 | int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); |
522 | 521 | ||
523 | if (lock_tlbie) | 522 | if (lock_tlbie) |
524 | spin_lock(&native_tlbie_lock); | 523 | raw_spin_lock(&native_tlbie_lock); |
525 | 524 | ||
526 | asm volatile("ptesync":::"memory"); | 525 | asm volatile("ptesync":::"memory"); |
527 | for (i = 0; i < number; i++) { | 526 | for (i = 0; i < number; i++) { |
@@ -536,7 +535,7 @@ static void native_flush_hash_range(unsigned long number, int local) | |||
536 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | 535 | asm volatile("eieio; tlbsync; ptesync":::"memory"); |
537 | 536 | ||
538 | if (lock_tlbie) | 537 | if (lock_tlbie) |
539 | spin_unlock(&native_tlbie_lock); | 538 | raw_spin_unlock(&native_tlbie_lock); |
540 | } | 539 | } |
541 | 540 | ||
542 | local_irq_restore(flags); | 541 | local_irq_restore(flags); |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1ade7eb6ae00..3ecdcec0a39e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -92,6 +92,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; | |||
92 | struct hash_pte *htab_address; | 92 | struct hash_pte *htab_address; |
93 | unsigned long htab_size_bytes; | 93 | unsigned long htab_size_bytes; |
94 | unsigned long htab_hash_mask; | 94 | unsigned long htab_hash_mask; |
95 | EXPORT_SYMBOL_GPL(htab_hash_mask); | ||
95 | int mmu_linear_psize = MMU_PAGE_4K; | 96 | int mmu_linear_psize = MMU_PAGE_4K; |
96 | int mmu_virtual_psize = MMU_PAGE_4K; | 97 | int mmu_virtual_psize = MMU_PAGE_4K; |
97 | int mmu_vmalloc_psize = MMU_PAGE_4K; | 98 | int mmu_vmalloc_psize = MMU_PAGE_4K; |
@@ -102,6 +103,7 @@ int mmu_io_psize = MMU_PAGE_4K; | |||
102 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; | 103 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; |
103 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; | 104 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; |
104 | u16 mmu_slb_size = 64; | 105 | u16 mmu_slb_size = 64; |
106 | EXPORT_SYMBOL_GPL(mmu_slb_size); | ||
105 | #ifdef CONFIG_HUGETLB_PAGE | 107 | #ifdef CONFIG_HUGETLB_PAGE |
106 | unsigned int HPAGE_SHIFT; | 108 | unsigned int HPAGE_SHIFT; |
107 | #endif | 109 | #endif |
@@ -338,7 +340,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, | |||
338 | else | 340 | else |
339 | def->tlbiel = 0; | 341 | def->tlbiel = 0; |
340 | 342 | ||
341 | DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, " | 343 | DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " |
342 | "tlbiel=%d, penc=%d\n", | 344 | "tlbiel=%d, penc=%d\n", |
343 | idx, shift, def->sllp, def->avpnm, def->tlbiel, | 345 | idx, shift, def->sllp, def->avpnm, def->tlbiel, |
344 | def->penc); | 346 | def->penc); |
@@ -481,16 +483,6 @@ static void __init htab_init_page_sizes(void) | |||
481 | #ifdef CONFIG_HUGETLB_PAGE | 483 | #ifdef CONFIG_HUGETLB_PAGE |
482 | /* Reserve 16G huge page memory sections for huge pages */ | 484 | /* Reserve 16G huge page memory sections for huge pages */ |
483 | of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); | 485 | of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); |
484 | |||
485 | /* Set default large page size. Currently, we pick 16M or 1M depending | ||
486 | * on what is available | ||
487 | */ | ||
488 | if (mmu_psize_defs[MMU_PAGE_16M].shift) | ||
489 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; | ||
490 | /* With 4k/4level pagetables, we can't (for now) cope with a | ||
491 | * huge page size < PMD_SIZE */ | ||
492 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | ||
493 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; | ||
494 | #endif /* CONFIG_HUGETLB_PAGE */ | 486 | #endif /* CONFIG_HUGETLB_PAGE */ |
495 | } | 487 | } |
496 | 488 | ||
@@ -671,7 +663,7 @@ static void __init htab_initialize(void) | |||
671 | base = (unsigned long)__va(lmb.memory.region[i].base); | 663 | base = (unsigned long)__va(lmb.memory.region[i].base); |
672 | size = lmb.memory.region[i].size; | 664 | size = lmb.memory.region[i].size; |
673 | 665 | ||
674 | DBG("creating mapping for region: %lx..%lx (prot: %x)\n", | 666 | DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", |
675 | base, size, prot); | 667 | base, size, prot); |
676 | 668 | ||
677 | #ifdef CONFIG_U3_DART | 669 | #ifdef CONFIG_U3_DART |
@@ -785,7 +777,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) | |||
785 | /* page is dirty */ | 777 | /* page is dirty */ |
786 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | 778 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { |
787 | if (trap == 0x400) { | 779 | if (trap == 0x400) { |
788 | __flush_dcache_icache(page_address(page)); | 780 | flush_dcache_icache_page(page); |
789 | set_bit(PG_arch_1, &page->flags); | 781 | set_bit(PG_arch_1, &page->flags); |
790 | } else | 782 | } else |
791 | pp |= HPTE_R_N; | 783 | pp |= HPTE_R_N; |
@@ -843,9 +835,9 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) | |||
843 | * Result is 0: full permissions, _PAGE_RW: read-only, | 835 | * Result is 0: full permissions, _PAGE_RW: read-only, |
844 | * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. | 836 | * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. |
845 | */ | 837 | */ |
846 | static int subpage_protection(pgd_t *pgdir, unsigned long ea) | 838 | static int subpage_protection(struct mm_struct *mm, unsigned long ea) |
847 | { | 839 | { |
848 | struct subpage_prot_table *spt = pgd_subpage_prot(pgdir); | 840 | struct subpage_prot_table *spt = &mm->context.spt; |
849 | u32 spp = 0; | 841 | u32 spp = 0; |
850 | u32 **sbpm, *sbpp; | 842 | u32 **sbpm, *sbpp; |
851 | 843 | ||
@@ -873,7 +865,7 @@ static int subpage_protection(pgd_t *pgdir, unsigned long ea) | |||
873 | } | 865 | } |
874 | 866 | ||
875 | #else /* CONFIG_PPC_SUBPAGE_PROT */ | 867 | #else /* CONFIG_PPC_SUBPAGE_PROT */ |
876 | static inline int subpage_protection(pgd_t *pgdir, unsigned long ea) | 868 | static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) |
877 | { | 869 | { |
878 | return 0; | 870 | return 0; |
879 | } | 871 | } |
@@ -887,10 +879,11 @@ static inline int subpage_protection(pgd_t *pgdir, unsigned long ea) | |||
887 | */ | 879 | */ |
888 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | 880 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) |
889 | { | 881 | { |
890 | void *pgdir; | 882 | pgd_t *pgdir; |
891 | unsigned long vsid; | 883 | unsigned long vsid; |
892 | struct mm_struct *mm; | 884 | struct mm_struct *mm; |
893 | pte_t *ptep; | 885 | pte_t *ptep; |
886 | unsigned hugeshift; | ||
894 | const struct cpumask *tmp; | 887 | const struct cpumask *tmp; |
895 | int rc, user_region = 0, local = 0; | 888 | int rc, user_region = 0, local = 0; |
896 | int psize, ssize; | 889 | int psize, ssize; |
@@ -943,30 +936,31 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
943 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) | 936 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) |
944 | local = 1; | 937 | local = 1; |
945 | 938 | ||
946 | #ifdef CONFIG_HUGETLB_PAGE | ||
947 | /* Handle hugepage regions */ | ||
948 | if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { | ||
949 | DBG_LOW(" -> huge page !\n"); | ||
950 | return hash_huge_page(mm, access, ea, vsid, local, trap); | ||
951 | } | ||
952 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
953 | |||
954 | #ifndef CONFIG_PPC_64K_PAGES | 939 | #ifndef CONFIG_PPC_64K_PAGES |
955 | /* If we use 4K pages and our psize is not 4K, then we are hitting | 940 | /* If we use 4K pages and our psize is not 4K, then we might |
956 | * a special driver mapping, we need to align the address before | 941 | * be hitting a special driver mapping, and need to align the |
957 | * we fetch the PTE | 942 | * address before we fetch the PTE. |
943 | * | ||
944 | * It could also be a hugepage mapping, in which case this is | ||
945 | * not necessary, but it's not harmful, either. | ||
958 | */ | 946 | */ |
959 | if (psize != MMU_PAGE_4K) | 947 | if (psize != MMU_PAGE_4K) |
960 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); | 948 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); |
961 | #endif /* CONFIG_PPC_64K_PAGES */ | 949 | #endif /* CONFIG_PPC_64K_PAGES */ |
962 | 950 | ||
963 | /* Get PTE and page size from page tables */ | 951 | /* Get PTE and page size from page tables */ |
964 | ptep = find_linux_pte(pgdir, ea); | 952 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); |
965 | if (ptep == NULL || !pte_present(*ptep)) { | 953 | if (ptep == NULL || !pte_present(*ptep)) { |
966 | DBG_LOW(" no PTE !\n"); | 954 | DBG_LOW(" no PTE !\n"); |
967 | return 1; | 955 | return 1; |
968 | } | 956 | } |
969 | 957 | ||
958 | #ifdef CONFIG_HUGETLB_PAGE | ||
959 | if (hugeshift) | ||
960 | return __hash_page_huge(ea, access, vsid, ptep, trap, local, | ||
961 | ssize, hugeshift, psize); | ||
962 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
963 | |||
970 | #ifndef CONFIG_PPC_64K_PAGES | 964 | #ifndef CONFIG_PPC_64K_PAGES |
971 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); | 965 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); |
972 | #else | 966 | #else |
@@ -1031,7 +1025,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1031 | else | 1025 | else |
1032 | #endif /* CONFIG_PPC_HAS_HASH_64K */ | 1026 | #endif /* CONFIG_PPC_HAS_HASH_64K */ |
1033 | { | 1027 | { |
1034 | int spp = subpage_protection(pgdir, ea); | 1028 | int spp = subpage_protection(mm, ea); |
1035 | if (access & spp) | 1029 | if (access & spp) |
1036 | rc = -2; | 1030 | rc = -2; |
1037 | else | 1031 | else |
@@ -1121,7 +1115,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, | |||
1121 | { | 1115 | { |
1122 | unsigned long hash, index, shift, hidx, slot; | 1116 | unsigned long hash, index, shift, hidx, slot; |
1123 | 1117 | ||
1124 | DBG_LOW("flush_hash_page(va=%016x)\n", va); | 1118 | DBG_LOW("flush_hash_page(va=%016lx)\n", va); |
1125 | pte_iterate_hashed_subpages(pte, psize, va, index, shift) { | 1119 | pte_iterate_hashed_subpages(pte, psize, va, index, shift) { |
1126 | hash = hpt_hash(va, shift, ssize); | 1120 | hash = hpt_hash(va, shift, ssize); |
1127 | hidx = __rpte_to_hidx(pte, index); | 1121 | hidx = __rpte_to_hidx(pte, index); |
@@ -1129,7 +1123,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, | |||
1129 | hash = ~hash; | 1123 | hash = ~hash; |
1130 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1124 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
1131 | slot += hidx & _PTEIDX_GROUP_IX; | 1125 | slot += hidx & _PTEIDX_GROUP_IX; |
1132 | DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); | 1126 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); |
1133 | ppc_md.hpte_invalidate(slot, va, psize, ssize, local); | 1127 | ppc_md.hpte_invalidate(slot, va, psize, ssize, local); |
1134 | } pte_iterate_hashed_end(); | 1128 | } pte_iterate_hashed_end(); |
1135 | } | 1129 | } |
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c new file mode 100644 index 000000000000..199539882f92 --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage-hash64.c | |||
@@ -0,0 +1,139 @@ | |||
1 | /* | ||
2 | * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later) | ||
3 | * | ||
4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | ||
5 | * | ||
6 | * Based on the IA-32 version: | ||
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | ||
8 | */ | ||
9 | |||
10 | #include <linux/mm.h> | ||
11 | #include <linux/hugetlb.h> | ||
12 | #include <asm/pgtable.h> | ||
13 | #include <asm/pgalloc.h> | ||
14 | #include <asm/cacheflush.h> | ||
15 | #include <asm/machdep.h> | ||
16 | |||
17 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, | ||
18 | pte_t *ptep, unsigned long trap, int local, int ssize, | ||
19 | unsigned int shift, unsigned int mmu_psize) | ||
20 | { | ||
21 | unsigned long old_pte, new_pte; | ||
22 | unsigned long va, rflags, pa, sz; | ||
23 | long slot; | ||
24 | int err = 1; | ||
25 | |||
26 | BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); | ||
27 | |||
28 | /* Search the Linux page table for a match with va */ | ||
29 | va = hpt_va(ea, vsid, ssize); | ||
30 | |||
31 | /* | ||
32 | * Check the user's access rights to the page. If access should be | ||
33 | * prevented then send the problem up to do_page_fault. | ||
34 | */ | ||
35 | if (unlikely(access & ~pte_val(*ptep))) | ||
36 | goto out; | ||
37 | /* | ||
38 | * At this point, we have a pte (old_pte) which can be used to build | ||
39 | * or update an HPTE. There are 2 cases: | ||
40 | * | ||
41 | * 1. There is a valid (present) pte with no associated HPTE (this is | ||
42 | * the most common case) | ||
43 | * 2. There is a valid (present) pte with an associated HPTE. The | ||
44 | * current values of the pp bits in the HPTE prevent access | ||
45 | * because we are doing software DIRTY bit management and the | ||
46 | * page is currently not DIRTY. | ||
47 | */ | ||
48 | |||
49 | |||
50 | do { | ||
51 | old_pte = pte_val(*ptep); | ||
52 | if (old_pte & _PAGE_BUSY) | ||
53 | goto out; | ||
54 | new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; | ||
55 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
56 | old_pte, new_pte)); | ||
57 | |||
58 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
59 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | ||
60 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
61 | sz = ((1UL) << shift); | ||
62 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
63 | /* No CPU has hugepages but lacks no execute, so we | ||
64 | * don't need to worry about that case */ | ||
65 | rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); | ||
66 | |||
67 | /* Check if pte already has an hpte (case 2) */ | ||
68 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | ||
69 | /* There MIGHT be an HPTE for this pte */ | ||
70 | unsigned long hash, slot; | ||
71 | |||
72 | hash = hpt_hash(va, shift, ssize); | ||
73 | if (old_pte & _PAGE_F_SECOND) | ||
74 | hash = ~hash; | ||
75 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
76 | slot += (old_pte & _PAGE_F_GIX) >> 12; | ||
77 | |||
78 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, | ||
79 | ssize, local) == -1) | ||
80 | old_pte &= ~_PAGE_HPTEFLAGS; | ||
81 | } | ||
82 | |||
83 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | ||
84 | unsigned long hash = hpt_hash(va, shift, ssize); | ||
85 | unsigned long hpte_group; | ||
86 | |||
87 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | ||
88 | |||
89 | repeat: | ||
90 | hpte_group = ((hash & htab_hash_mask) * | ||
91 | HPTES_PER_GROUP) & ~0x7UL; | ||
92 | |||
93 | /* clear HPTE slot informations in new PTE */ | ||
94 | #ifdef CONFIG_PPC_64K_PAGES | ||
95 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; | ||
96 | #else | ||
97 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
98 | #endif | ||
99 | /* Add in WIMG bits */ | ||
100 | rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
101 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
102 | |||
103 | /* Insert into the hash table, primary slot */ | ||
104 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | ||
105 | mmu_psize, ssize); | ||
106 | |||
107 | /* Primary is full, try the secondary */ | ||
108 | if (unlikely(slot == -1)) { | ||
109 | hpte_group = ((~hash & htab_hash_mask) * | ||
110 | HPTES_PER_GROUP) & ~0x7UL; | ||
111 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | ||
112 | HPTE_V_SECONDARY, | ||
113 | mmu_psize, ssize); | ||
114 | if (slot == -1) { | ||
115 | if (mftb() & 0x1) | ||
116 | hpte_group = ((hash & htab_hash_mask) * | ||
117 | HPTES_PER_GROUP)&~0x7UL; | ||
118 | |||
119 | ppc_md.hpte_remove(hpte_group); | ||
120 | goto repeat; | ||
121 | } | ||
122 | } | ||
123 | |||
124 | if (unlikely(slot == -2)) | ||
125 | panic("hash_huge_page: pte_insert failed\n"); | ||
126 | |||
127 | new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * No need to use ldarx/stdcx here | ||
132 | */ | ||
133 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
134 | |||
135 | err = 0; | ||
136 | |||
137 | out: | ||
138 | return err; | ||
139 | } | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 90df6ffe3a43..9bb249c3046e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -7,29 +7,18 @@ | |||
7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | 7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/fs.h> | ||
12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
13 | #include <linux/hugetlb.h> | 11 | #include <linux/io.h> |
14 | #include <linux/pagemap.h> | ||
15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
16 | #include <linux/err.h> | 13 | #include <linux/hugetlb.h> |
17 | #include <linux/sysctl.h> | 14 | #include <asm/pgtable.h> |
18 | #include <asm/mman.h> | ||
19 | #include <asm/pgalloc.h> | 15 | #include <asm/pgalloc.h> |
20 | #include <asm/tlb.h> | 16 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | ||
22 | #include <asm/mmu_context.h> | ||
23 | #include <asm/machdep.h> | ||
24 | #include <asm/cputable.h> | ||
25 | #include <asm/spu.h> | ||
26 | 17 | ||
27 | #define PAGE_SHIFT_64K 16 | 18 | #define PAGE_SHIFT_64K 16 |
28 | #define PAGE_SHIFT_16M 24 | 19 | #define PAGE_SHIFT_16M 24 |
29 | #define PAGE_SHIFT_16G 34 | 20 | #define PAGE_SHIFT_16G 34 |
30 | 21 | ||
31 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | ||
32 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | ||
33 | #define MAX_NUMBER_GPAGES 1024 | 22 | #define MAX_NUMBER_GPAGES 1024 |
34 | 23 | ||
35 | /* Tracks the 16G pages after the device tree is scanned and before the | 24 | /* Tracks the 16G pages after the device tree is scanned and before the |
@@ -37,53 +26,17 @@ | |||
37 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; | 26 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; |
38 | static unsigned nr_gpages; | 27 | static unsigned nr_gpages; |
39 | 28 | ||
40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is | ||
41 | * stored for the huge page sizes that are valid. | ||
42 | */ | ||
43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | ||
44 | |||
45 | #define hugepte_shift mmu_huge_psizes | ||
46 | #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) | ||
47 | #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) | ||
48 | |||
49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
50 | + hugepte_shift[psize]) | ||
51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
53 | |||
54 | /* Subtract one from array size because we don't need a cache for 4K since | ||
55 | * is not a huge page size */ | ||
56 | #define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1) | ||
57 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) | ||
58 | |||
59 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { | ||
60 | [MMU_PAGE_64K] = "hugepte_cache_64K", | ||
61 | [MMU_PAGE_1M] = "hugepte_cache_1M", | ||
62 | [MMU_PAGE_16M] = "hugepte_cache_16M", | ||
63 | [MMU_PAGE_16G] = "hugepte_cache_16G", | ||
64 | }; | ||
65 | |||
66 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 29 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
67 | * will choke on pointers to hugepte tables, which is handy for | 30 | * will choke on pointers to hugepte tables, which is handy for |
68 | * catching screwups early. */ | 31 | * catching screwups early. */ |
69 | #define HUGEPD_OK 0x1 | ||
70 | |||
71 | typedef struct { unsigned long pd; } hugepd_t; | ||
72 | |||
73 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
74 | 32 | ||
75 | static inline int shift_to_mmu_psize(unsigned int shift) | 33 | static inline int shift_to_mmu_psize(unsigned int shift) |
76 | { | 34 | { |
77 | switch (shift) { | 35 | int psize; |
78 | #ifndef CONFIG_PPC_64K_PAGES | 36 | |
79 | case PAGE_SHIFT_64K: | 37 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) |
80 | return MMU_PAGE_64K; | 38 | if (mmu_psize_defs[psize].shift == shift) |
81 | #endif | 39 | return psize; |
82 | case PAGE_SHIFT_16M: | ||
83 | return MMU_PAGE_16M; | ||
84 | case PAGE_SHIFT_16G: | ||
85 | return MMU_PAGE_16G; | ||
86 | } | ||
87 | return -1; | 40 | return -1; |
88 | } | 41 | } |
89 | 42 | ||
@@ -94,71 +47,126 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | |||
94 | BUG(); | 47 | BUG(); |
95 | } | 48 | } |
96 | 49 | ||
50 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
51 | |||
97 | static inline pte_t *hugepd_page(hugepd_t hpd) | 52 | static inline pte_t *hugepd_page(hugepd_t hpd) |
98 | { | 53 | { |
99 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 54 | BUG_ON(!hugepd_ok(hpd)); |
100 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 55 | return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); |
56 | } | ||
57 | |||
58 | static inline unsigned int hugepd_shift(hugepd_t hpd) | ||
59 | { | ||
60 | return hpd.pd & HUGEPD_SHIFT_MASK; | ||
101 | } | 61 | } |
102 | 62 | ||
103 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, | 63 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) |
104 | struct hstate *hstate) | ||
105 | { | 64 | { |
106 | unsigned int shift = huge_page_shift(hstate); | 65 | unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); |
107 | int psize = shift_to_mmu_psize(shift); | ||
108 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
109 | pte_t *dir = hugepd_page(*hpdp); | 66 | pte_t *dir = hugepd_page(*hpdp); |
110 | 67 | ||
111 | return dir + idx; | 68 | return dir + idx; |
112 | } | 69 | } |
113 | 70 | ||
71 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
72 | { | ||
73 | pgd_t *pg; | ||
74 | pud_t *pu; | ||
75 | pmd_t *pm; | ||
76 | hugepd_t *hpdp = NULL; | ||
77 | unsigned pdshift = PGDIR_SHIFT; | ||
78 | |||
79 | if (shift) | ||
80 | *shift = 0; | ||
81 | |||
82 | pg = pgdir + pgd_index(ea); | ||
83 | if (is_hugepd(pg)) { | ||
84 | hpdp = (hugepd_t *)pg; | ||
85 | } else if (!pgd_none(*pg)) { | ||
86 | pdshift = PUD_SHIFT; | ||
87 | pu = pud_offset(pg, ea); | ||
88 | if (is_hugepd(pu)) | ||
89 | hpdp = (hugepd_t *)pu; | ||
90 | else if (!pud_none(*pu)) { | ||
91 | pdshift = PMD_SHIFT; | ||
92 | pm = pmd_offset(pu, ea); | ||
93 | if (is_hugepd(pm)) | ||
94 | hpdp = (hugepd_t *)pm; | ||
95 | else if (!pmd_none(*pm)) { | ||
96 | return pte_offset_map(pm, ea); | ||
97 | } | ||
98 | } | ||
99 | } | ||
100 | |||
101 | if (!hpdp) | ||
102 | return NULL; | ||
103 | |||
104 | if (shift) | ||
105 | *shift = hugepd_shift(*hpdp); | ||
106 | return hugepte_offset(hpdp, ea, pdshift); | ||
107 | } | ||
108 | |||
109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
110 | { | ||
111 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | ||
112 | } | ||
113 | |||
114 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 114 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
115 | unsigned long address, unsigned int psize) | 115 | unsigned long address, unsigned pdshift, unsigned pshift) |
116 | { | 116 | { |
117 | pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], | 117 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), |
118 | GFP_KERNEL|__GFP_REPEAT); | 118 | GFP_KERNEL|__GFP_REPEAT); |
119 | |||
120 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); | ||
121 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); | ||
119 | 122 | ||
120 | if (! new) | 123 | if (! new) |
121 | return -ENOMEM; | 124 | return -ENOMEM; |
122 | 125 | ||
123 | spin_lock(&mm->page_table_lock); | 126 | spin_lock(&mm->page_table_lock); |
124 | if (!hugepd_none(*hpdp)) | 127 | if (!hugepd_none(*hpdp)) |
125 | kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); | 128 | kmem_cache_free(PGT_CACHE(pdshift - pshift), new); |
126 | else | 129 | else |
127 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 130 | hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; |
128 | spin_unlock(&mm->page_table_lock); | 131 | spin_unlock(&mm->page_table_lock); |
129 | return 0; | 132 | return 0; |
130 | } | 133 | } |
131 | 134 | ||
132 | 135 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) | |
133 | static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate) | ||
134 | { | ||
135 | if (huge_page_shift(hstate) < PUD_SHIFT) | ||
136 | return pud_offset(pgd, addr); | ||
137 | else | ||
138 | return (pud_t *) pgd; | ||
139 | } | ||
140 | static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, | ||
141 | struct hstate *hstate) | ||
142 | { | ||
143 | if (huge_page_shift(hstate) < PUD_SHIFT) | ||
144 | return pud_alloc(mm, pgd, addr); | ||
145 | else | ||
146 | return (pud_t *) pgd; | ||
147 | } | ||
148 | static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) | ||
149 | { | 136 | { |
150 | if (huge_page_shift(hstate) < PMD_SHIFT) | 137 | pgd_t *pg; |
151 | return pmd_offset(pud, addr); | 138 | pud_t *pu; |
152 | else | 139 | pmd_t *pm; |
153 | return (pmd_t *) pud; | 140 | hugepd_t *hpdp = NULL; |
154 | } | 141 | unsigned pshift = __ffs(sz); |
155 | static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, | 142 | unsigned pdshift = PGDIR_SHIFT; |
156 | struct hstate *hstate) | 143 | |
157 | { | 144 | addr &= ~(sz-1); |
158 | if (huge_page_shift(hstate) < PMD_SHIFT) | 145 | |
159 | return pmd_alloc(mm, pud, addr); | 146 | pg = pgd_offset(mm, addr); |
160 | else | 147 | if (pshift >= PUD_SHIFT) { |
161 | return (pmd_t *) pud; | 148 | hpdp = (hugepd_t *)pg; |
149 | } else { | ||
150 | pdshift = PUD_SHIFT; | ||
151 | pu = pud_alloc(mm, pg, addr); | ||
152 | if (pshift >= PMD_SHIFT) { | ||
153 | hpdp = (hugepd_t *)pu; | ||
154 | } else { | ||
155 | pdshift = PMD_SHIFT; | ||
156 | pm = pmd_alloc(mm, pu, addr); | ||
157 | hpdp = (hugepd_t *)pm; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | if (!hpdp) | ||
162 | return NULL; | ||
163 | |||
164 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); | ||
165 | |||
166 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) | ||
167 | return NULL; | ||
168 | |||
169 | return hugepte_offset(hpdp, addr, pdshift); | ||
162 | } | 170 | } |
163 | 171 | ||
164 | /* Build list of addresses of gigantic pages. This function is used in early | 172 | /* Build list of addresses of gigantic pages. This function is used in early |
@@ -192,94 +200,38 @@ int alloc_bootmem_huge_page(struct hstate *hstate) | |||
192 | return 1; | 200 | return 1; |
193 | } | 201 | } |
194 | 202 | ||
195 | |||
196 | /* Modelled after find_linux_pte() */ | ||
197 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
198 | { | ||
199 | pgd_t *pg; | ||
200 | pud_t *pu; | ||
201 | pmd_t *pm; | ||
202 | |||
203 | unsigned int psize; | ||
204 | unsigned int shift; | ||
205 | unsigned long sz; | ||
206 | struct hstate *hstate; | ||
207 | psize = get_slice_psize(mm, addr); | ||
208 | shift = mmu_psize_to_shift(psize); | ||
209 | sz = ((1UL) << shift); | ||
210 | hstate = size_to_hstate(sz); | ||
211 | |||
212 | addr &= hstate->mask; | ||
213 | |||
214 | pg = pgd_offset(mm, addr); | ||
215 | if (!pgd_none(*pg)) { | ||
216 | pu = hpud_offset(pg, addr, hstate); | ||
217 | if (!pud_none(*pu)) { | ||
218 | pm = hpmd_offset(pu, addr, hstate); | ||
219 | if (!pmd_none(*pm)) | ||
220 | return hugepte_offset((hugepd_t *)pm, addr, | ||
221 | hstate); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | return NULL; | ||
226 | } | ||
227 | |||
228 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
229 | unsigned long addr, unsigned long sz) | ||
230 | { | ||
231 | pgd_t *pg; | ||
232 | pud_t *pu; | ||
233 | pmd_t *pm; | ||
234 | hugepd_t *hpdp = NULL; | ||
235 | struct hstate *hstate; | ||
236 | unsigned int psize; | ||
237 | hstate = size_to_hstate(sz); | ||
238 | |||
239 | psize = get_slice_psize(mm, addr); | ||
240 | BUG_ON(!mmu_huge_psizes[psize]); | ||
241 | |||
242 | addr &= hstate->mask; | ||
243 | |||
244 | pg = pgd_offset(mm, addr); | ||
245 | pu = hpud_alloc(mm, pg, addr, hstate); | ||
246 | |||
247 | if (pu) { | ||
248 | pm = hpmd_alloc(mm, pu, addr, hstate); | ||
249 | if (pm) | ||
250 | hpdp = (hugepd_t *)pm; | ||
251 | } | ||
252 | |||
253 | if (! hpdp) | ||
254 | return NULL; | ||
255 | |||
256 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) | ||
257 | return NULL; | ||
258 | |||
259 | return hugepte_offset(hpdp, addr, hstate); | ||
260 | } | ||
261 | |||
262 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 203 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
263 | { | 204 | { |
264 | return 0; | 205 | return 0; |
265 | } | 206 | } |
266 | 207 | ||
267 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, | 208 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, |
268 | unsigned int psize) | 209 | unsigned long start, unsigned long end, |
210 | unsigned long floor, unsigned long ceiling) | ||
269 | { | 211 | { |
270 | pte_t *hugepte = hugepd_page(*hpdp); | 212 | pte_t *hugepte = hugepd_page(*hpdp); |
213 | unsigned shift = hugepd_shift(*hpdp); | ||
214 | unsigned long pdmask = ~((1UL << pdshift) - 1); | ||
215 | |||
216 | start &= pdmask; | ||
217 | if (start < floor) | ||
218 | return; | ||
219 | if (ceiling) { | ||
220 | ceiling &= pdmask; | ||
221 | if (! ceiling) | ||
222 | return; | ||
223 | } | ||
224 | if (end - 1 > ceiling - 1) | ||
225 | return; | ||
271 | 226 | ||
272 | hpdp->pd = 0; | 227 | hpdp->pd = 0; |
273 | tlb->need_flush = 1; | 228 | tlb->need_flush = 1; |
274 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, | 229 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); |
275 | HUGEPTE_CACHE_NUM+psize-1, | ||
276 | PGF_CACHENUM_MASK)); | ||
277 | } | 230 | } |
278 | 231 | ||
279 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 232 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
280 | unsigned long addr, unsigned long end, | 233 | unsigned long addr, unsigned long end, |
281 | unsigned long floor, unsigned long ceiling, | 234 | unsigned long floor, unsigned long ceiling) |
282 | unsigned int psize) | ||
283 | { | 235 | { |
284 | pmd_t *pmd; | 236 | pmd_t *pmd; |
285 | unsigned long next; | 237 | unsigned long next; |
@@ -291,7 +243,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
291 | next = pmd_addr_end(addr, end); | 243 | next = pmd_addr_end(addr, end); |
292 | if (pmd_none(*pmd)) | 244 | if (pmd_none(*pmd)) |
293 | continue; | 245 | continue; |
294 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); | 246 | free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, |
247 | addr, next, floor, ceiling); | ||
295 | } while (pmd++, addr = next, addr != end); | 248 | } while (pmd++, addr = next, addr != end); |
296 | 249 | ||
297 | start &= PUD_MASK; | 250 | start &= PUD_MASK; |
@@ -317,23 +270,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
317 | pud_t *pud; | 270 | pud_t *pud; |
318 | unsigned long next; | 271 | unsigned long next; |
319 | unsigned long start; | 272 | unsigned long start; |
320 | unsigned int shift; | ||
321 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
322 | shift = mmu_psize_to_shift(psize); | ||
323 | 273 | ||
324 | start = addr; | 274 | start = addr; |
325 | pud = pud_offset(pgd, addr); | 275 | pud = pud_offset(pgd, addr); |
326 | do { | 276 | do { |
327 | next = pud_addr_end(addr, end); | 277 | next = pud_addr_end(addr, end); |
328 | if (shift < PMD_SHIFT) { | 278 | if (!is_hugepd(pud)) { |
329 | if (pud_none_or_clear_bad(pud)) | 279 | if (pud_none_or_clear_bad(pud)) |
330 | continue; | 280 | continue; |
331 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, | 281 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
332 | ceiling, psize); | 282 | ceiling); |
333 | } else { | 283 | } else { |
334 | if (pud_none(*pud)) | 284 | free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, |
335 | continue; | 285 | addr, next, floor, ceiling); |
336 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); | ||
337 | } | 286 | } |
338 | } while (pud++, addr = next, addr != end); | 287 | } while (pud++, addr = next, addr != end); |
339 | 288 | ||
@@ -364,121 +313,56 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
364 | { | 313 | { |
365 | pgd_t *pgd; | 314 | pgd_t *pgd; |
366 | unsigned long next; | 315 | unsigned long next; |
367 | unsigned long start; | ||
368 | 316 | ||
369 | /* | 317 | /* |
370 | * Comments below take from the normal free_pgd_range(). They | 318 | * Because there are a number of different possible pagetable |
371 | * apply here too. The tests against HUGEPD_MASK below are | 319 | * layouts for hugepage ranges, we limit knowledge of how |
372 | * essential, because we *don't* test for this at the bottom | 320 | * things should be laid out to the allocation path |
373 | * level. Without them we'll attempt to free a hugepte table | 321 | * (huge_pte_alloc(), above). Everything else works out the |
374 | * when we unmap just part of it, even if there are other | 322 | * structure as it goes from information in the hugepd |
375 | * active mappings using it. | 323 | * pointers. That means that we can't here use the |
376 | * | 324 | * optimization used in the normal page free_pgd_range(), of |
377 | * The next few lines have given us lots of grief... | 325 | * checking whether we're actually covering a large enough |
378 | * | 326 | * range to have to do anything at the top level of the walk |
379 | * Why are we testing HUGEPD* at this top level? Because | 327 | * instead of at the bottom. |
380 | * often there will be no work to do at all, and we'd prefer | ||
381 | * not to go all the way down to the bottom just to discover | ||
382 | * that. | ||
383 | * | 328 | * |
384 | * Why all these "- 1"s? Because 0 represents both the bottom | 329 | * To make sense of this, you should probably go read the big |
385 | * of the address space and the top of it (using -1 for the | 330 | * block comment at the top of the normal free_pgd_range(), |
386 | * top wouldn't help much: the masks would do the wrong thing). | 331 | * too. |
387 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
388 | * the address space, but end 0 and ceiling 0 refer to the top | ||
389 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
390 | * that end 0 case should be mythical). | ||
391 | * | ||
392 | * Wherever addr is brought up or ceiling brought down, we | ||
393 | * must be careful to reject "the opposite 0" before it | ||
394 | * confuses the subsequent tests. But what about where end is | ||
395 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
396 | * 0 there. | ||
397 | * | ||
398 | * Whereas we round start (addr) and ceiling down, by different | ||
399 | * masks at different levels, in order to test whether a table | ||
400 | * now has no other vmas using it, so can be freed, we don't | ||
401 | * bother to round floor or end up - the tests don't need that. | ||
402 | */ | 332 | */ |
403 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
404 | |||
405 | addr &= HUGEPD_MASK(psize); | ||
406 | if (addr < floor) { | ||
407 | addr += HUGEPD_SIZE(psize); | ||
408 | if (!addr) | ||
409 | return; | ||
410 | } | ||
411 | if (ceiling) { | ||
412 | ceiling &= HUGEPD_MASK(psize); | ||
413 | if (!ceiling) | ||
414 | return; | ||
415 | } | ||
416 | if (end - 1 > ceiling - 1) | ||
417 | end -= HUGEPD_SIZE(psize); | ||
418 | if (addr > end - 1) | ||
419 | return; | ||
420 | 333 | ||
421 | start = addr; | ||
422 | pgd = pgd_offset(tlb->mm, addr); | 334 | pgd = pgd_offset(tlb->mm, addr); |
423 | do { | 335 | do { |
424 | psize = get_slice_psize(tlb->mm, addr); | ||
425 | BUG_ON(!mmu_huge_psizes[psize]); | ||
426 | next = pgd_addr_end(addr, end); | 336 | next = pgd_addr_end(addr, end); |
427 | if (mmu_psize_to_shift(psize) < PUD_SHIFT) { | 337 | if (!is_hugepd(pgd)) { |
428 | if (pgd_none_or_clear_bad(pgd)) | 338 | if (pgd_none_or_clear_bad(pgd)) |
429 | continue; | 339 | continue; |
430 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); | 340 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
431 | } else { | 341 | } else { |
432 | if (pgd_none(*pgd)) | 342 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
433 | continue; | 343 | addr, next, floor, ceiling); |
434 | free_hugepte_range(tlb, (hugepd_t *)pgd, psize); | ||
435 | } | 344 | } |
436 | } while (pgd++, addr = next, addr != end); | 345 | } while (pgd++, addr = next, addr != end); |
437 | } | 346 | } |
438 | 347 | ||
439 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
440 | pte_t *ptep, pte_t pte) | ||
441 | { | ||
442 | if (pte_present(*ptep)) { | ||
443 | /* We open-code pte_clear because we need to pass the right | ||
444 | * argument to hpte_need_flush (huge / !huge). Might not be | ||
445 | * necessary anymore if we make hpte_need_flush() get the | ||
446 | * page size from the slices | ||
447 | */ | ||
448 | unsigned int psize = get_slice_psize(mm, addr); | ||
449 | unsigned int shift = mmu_psize_to_shift(psize); | ||
450 | unsigned long sz = ((1UL) << shift); | ||
451 | struct hstate *hstate = size_to_hstate(sz); | ||
452 | pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); | ||
453 | } | ||
454 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
455 | } | ||
456 | |||
457 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
458 | pte_t *ptep) | ||
459 | { | ||
460 | unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); | ||
461 | return __pte(old); | ||
462 | } | ||
463 | |||
464 | struct page * | 348 | struct page * |
465 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 349 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
466 | { | 350 | { |
467 | pte_t *ptep; | 351 | pte_t *ptep; |
468 | struct page *page; | 352 | struct page *page; |
469 | unsigned int mmu_psize = get_slice_psize(mm, address); | 353 | unsigned shift; |
354 | unsigned long mask; | ||
355 | |||
356 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | ||
470 | 357 | ||
471 | /* Verify it is a huge page else bail. */ | 358 | /* Verify it is a huge page else bail. */ |
472 | if (!mmu_huge_psizes[mmu_psize]) | 359 | if (!ptep || !shift) |
473 | return ERR_PTR(-EINVAL); | 360 | return ERR_PTR(-EINVAL); |
474 | 361 | ||
475 | ptep = huge_pte_offset(mm, address); | 362 | mask = (1UL << shift) - 1; |
476 | page = pte_page(*ptep); | 363 | page = pte_page(*ptep); |
477 | if (page) { | 364 | if (page) |
478 | unsigned int shift = mmu_psize_to_shift(mmu_psize); | 365 | page += (address & mask) / PAGE_SIZE; |
479 | unsigned long sz = ((1UL) << shift); | ||
480 | page += (address % sz) / PAGE_SIZE; | ||
481 | } | ||
482 | 366 | ||
483 | return page; | 367 | return page; |
484 | } | 368 | } |
@@ -501,6 +385,82 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
501 | return NULL; | 385 | return NULL; |
502 | } | 386 | } |
503 | 387 | ||
388 | static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
389 | unsigned long end, int write, struct page **pages, int *nr) | ||
390 | { | ||
391 | unsigned long mask; | ||
392 | unsigned long pte_end; | ||
393 | struct page *head, *page; | ||
394 | pte_t pte; | ||
395 | int refs; | ||
396 | |||
397 | pte_end = (addr + sz) & ~(sz-1); | ||
398 | if (pte_end < end) | ||
399 | end = pte_end; | ||
400 | |||
401 | pte = *ptep; | ||
402 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
403 | if (write) | ||
404 | mask |= _PAGE_RW; | ||
405 | |||
406 | if ((pte_val(pte) & mask) != mask) | ||
407 | return 0; | ||
408 | |||
409 | /* hugepages are never "special" */ | ||
410 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
411 | |||
412 | refs = 0; | ||
413 | head = pte_page(pte); | ||
414 | |||
415 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
416 | do { | ||
417 | VM_BUG_ON(compound_head(page) != head); | ||
418 | pages[*nr] = page; | ||
419 | (*nr)++; | ||
420 | page++; | ||
421 | refs++; | ||
422 | } while (addr += PAGE_SIZE, addr != end); | ||
423 | |||
424 | if (!page_cache_add_speculative(head, refs)) { | ||
425 | *nr -= refs; | ||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
430 | /* Could be optimized better */ | ||
431 | while (*nr) { | ||
432 | put_page(page); | ||
433 | (*nr)--; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | return 1; | ||
438 | } | ||
439 | |||
440 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, | ||
441 | unsigned long sz) | ||
442 | { | ||
443 | unsigned long __boundary = (addr + sz) & ~(sz-1); | ||
444 | return (__boundary - 1 < end - 1) ? __boundary : end; | ||
445 | } | ||
446 | |||
447 | int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, | ||
448 | unsigned long addr, unsigned long end, | ||
449 | int write, struct page **pages, int *nr) | ||
450 | { | ||
451 | pte_t *ptep; | ||
452 | unsigned long sz = 1UL << hugepd_shift(*hugepd); | ||
453 | unsigned long next; | ||
454 | |||
455 | ptep = hugepte_offset(hugepd, addr, pdshift); | ||
456 | do { | ||
457 | next = hugepte_addr_end(addr, end, sz); | ||
458 | if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) | ||
459 | return 0; | ||
460 | } while (ptep++, addr = next, addr != end); | ||
461 | |||
462 | return 1; | ||
463 | } | ||
504 | 464 | ||
505 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 465 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
506 | unsigned long len, unsigned long pgoff, | 466 | unsigned long len, unsigned long pgoff, |
@@ -509,8 +469,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
509 | struct hstate *hstate = hstate_file(file); | 469 | struct hstate *hstate = hstate_file(file); |
510 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); | 470 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); |
511 | 471 | ||
512 | if (!mmu_huge_psizes[mmu_psize]) | ||
513 | return -EINVAL; | ||
514 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); | 472 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); |
515 | } | 473 | } |
516 | 474 | ||
@@ -521,229 +479,46 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) | |||
521 | return 1UL << mmu_psize_to_shift(psize); | 479 | return 1UL << mmu_psize_to_shift(psize); |
522 | } | 480 | } |
523 | 481 | ||
524 | /* | 482 | static int __init add_huge_page_size(unsigned long long size) |
525 | * Called by asm hashtable.S for doing lazy icache flush | ||
526 | */ | ||
527 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | ||
528 | pte_t pte, int trap, unsigned long sz) | ||
529 | { | 483 | { |
530 | struct page *page; | 484 | int shift = __ffs(size); |
531 | int i; | 485 | int mmu_psize; |
532 | |||
533 | if (!pfn_valid(pte_pfn(pte))) | ||
534 | return rflags; | ||
535 | |||
536 | page = pte_page(pte); | ||
537 | |||
538 | /* page is dirty */ | ||
539 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | ||
540 | if (trap == 0x400) { | ||
541 | for (i = 0; i < (sz / PAGE_SIZE); i++) | ||
542 | __flush_dcache_icache(page_address(page+i)); | ||
543 | set_bit(PG_arch_1, &page->flags); | ||
544 | } else { | ||
545 | rflags |= HPTE_R_N; | ||
546 | } | ||
547 | } | ||
548 | return rflags; | ||
549 | } | ||
550 | 486 | ||
551 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 487 | /* Check that it is a page size supported by the hardware and |
552 | unsigned long ea, unsigned long vsid, int local, | 488 | * that it fits within pagetable and slice limits. */ |
553 | unsigned long trap) | 489 | if (!is_power_of_2(size) |
554 | { | 490 | || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) |
555 | pte_t *ptep; | 491 | return -EINVAL; |
556 | unsigned long old_pte, new_pte; | ||
557 | unsigned long va, rflags, pa, sz; | ||
558 | long slot; | ||
559 | int err = 1; | ||
560 | int ssize = user_segment_size(ea); | ||
561 | unsigned int mmu_psize; | ||
562 | int shift; | ||
563 | mmu_psize = get_slice_psize(mm, ea); | ||
564 | |||
565 | if (!mmu_huge_psizes[mmu_psize]) | ||
566 | goto out; | ||
567 | ptep = huge_pte_offset(mm, ea); | ||
568 | |||
569 | /* Search the Linux page table for a match with va */ | ||
570 | va = hpt_va(ea, vsid, ssize); | ||
571 | 492 | ||
572 | /* | 493 | if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) |
573 | * If no pte found or not present, send the problem up to | 494 | return -EINVAL; |
574 | * do_page_fault | ||
575 | */ | ||
576 | if (unlikely(!ptep || pte_none(*ptep))) | ||
577 | goto out; | ||
578 | 495 | ||
579 | /* | 496 | #ifdef CONFIG_SPU_FS_64K_LS |
580 | * Check the user's access rights to the page. If access should be | 497 | /* Disable support for 64K huge pages when 64K SPU local store |
581 | * prevented then send the problem up to do_page_fault. | 498 | * support is enabled as the current implementation conflicts. |
582 | */ | ||
583 | if (unlikely(access & ~pte_val(*ptep))) | ||
584 | goto out; | ||
585 | /* | ||
586 | * At this point, we have a pte (old_pte) which can be used to build | ||
587 | * or update an HPTE. There are 2 cases: | ||
588 | * | ||
589 | * 1. There is a valid (present) pte with no associated HPTE (this is | ||
590 | * the most common case) | ||
591 | * 2. There is a valid (present) pte with an associated HPTE. The | ||
592 | * current values of the pp bits in the HPTE prevent access | ||
593 | * because we are doing software DIRTY bit management and the | ||
594 | * page is currently not DIRTY. | ||
595 | */ | 499 | */ |
500 | if (shift == PAGE_SHIFT_64K) | ||
501 | return -EINVAL; | ||
502 | #endif /* CONFIG_SPU_FS_64K_LS */ | ||
596 | 503 | ||
504 | BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); | ||
597 | 505 | ||
598 | do { | 506 | /* Return if huge page size has already been setup */ |
599 | old_pte = pte_val(*ptep); | 507 | if (size_to_hstate(size)) |
600 | if (old_pte & _PAGE_BUSY) | 508 | return 0; |
601 | goto out; | ||
602 | new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; | ||
603 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
604 | old_pte, new_pte)); | ||
605 | |||
606 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
607 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | ||
608 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
609 | shift = mmu_psize_to_shift(mmu_psize); | ||
610 | sz = ((1UL) << shift); | ||
611 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
612 | /* No CPU has hugepages but lacks no execute, so we | ||
613 | * don't need to worry about that case */ | ||
614 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | ||
615 | trap, sz); | ||
616 | |||
617 | /* Check if pte already has an hpte (case 2) */ | ||
618 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | ||
619 | /* There MIGHT be an HPTE for this pte */ | ||
620 | unsigned long hash, slot; | ||
621 | |||
622 | hash = hpt_hash(va, shift, ssize); | ||
623 | if (old_pte & _PAGE_F_SECOND) | ||
624 | hash = ~hash; | ||
625 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
626 | slot += (old_pte & _PAGE_F_GIX) >> 12; | ||
627 | |||
628 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, | ||
629 | ssize, local) == -1) | ||
630 | old_pte &= ~_PAGE_HPTEFLAGS; | ||
631 | } | ||
632 | |||
633 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | ||
634 | unsigned long hash = hpt_hash(va, shift, ssize); | ||
635 | unsigned long hpte_group; | ||
636 | |||
637 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | ||
638 | |||
639 | repeat: | ||
640 | hpte_group = ((hash & htab_hash_mask) * | ||
641 | HPTES_PER_GROUP) & ~0x7UL; | ||
642 | |||
643 | /* clear HPTE slot informations in new PTE */ | ||
644 | #ifdef CONFIG_PPC_64K_PAGES | ||
645 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; | ||
646 | #else | ||
647 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
648 | #endif | ||
649 | /* Add in WIMG bits */ | ||
650 | rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
651 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
652 | |||
653 | /* Insert into the hash table, primary slot */ | ||
654 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | ||
655 | mmu_psize, ssize); | ||
656 | |||
657 | /* Primary is full, try the secondary */ | ||
658 | if (unlikely(slot == -1)) { | ||
659 | hpte_group = ((~hash & htab_hash_mask) * | ||
660 | HPTES_PER_GROUP) & ~0x7UL; | ||
661 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | ||
662 | HPTE_V_SECONDARY, | ||
663 | mmu_psize, ssize); | ||
664 | if (slot == -1) { | ||
665 | if (mftb() & 0x1) | ||
666 | hpte_group = ((hash & htab_hash_mask) * | ||
667 | HPTES_PER_GROUP)&~0x7UL; | ||
668 | |||
669 | ppc_md.hpte_remove(hpte_group); | ||
670 | goto repeat; | ||
671 | } | ||
672 | } | ||
673 | |||
674 | if (unlikely(slot == -2)) | ||
675 | panic("hash_huge_page: pte_insert failed\n"); | ||
676 | |||
677 | new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * No need to use ldarx/stdcx here | ||
682 | */ | ||
683 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
684 | |||
685 | err = 0; | ||
686 | 509 | ||
687 | out: | 510 | hugetlb_add_hstate(shift - PAGE_SHIFT); |
688 | return err; | ||
689 | } | ||
690 | 511 | ||
691 | static void __init set_huge_psize(int psize) | 512 | return 0; |
692 | { | ||
693 | /* Check that it is a page size supported by the hardware and | ||
694 | * that it fits within pagetable limits. */ | ||
695 | if (mmu_psize_defs[psize].shift && | ||
696 | mmu_psize_defs[psize].shift < SID_SHIFT_1T && | ||
697 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || | ||
698 | mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || | ||
699 | mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { | ||
700 | /* Return if huge page size has already been setup or is the | ||
701 | * same as the base page size. */ | ||
702 | if (mmu_huge_psizes[psize] || | ||
703 | mmu_psize_defs[psize].shift == PAGE_SHIFT) | ||
704 | return; | ||
705 | if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL)) | ||
706 | return; | ||
707 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); | ||
708 | |||
709 | switch (mmu_psize_defs[psize].shift) { | ||
710 | case PAGE_SHIFT_64K: | ||
711 | /* We only allow 64k hpages with 4k base page, | ||
712 | * which was checked above, and always put them | ||
713 | * at the PMD */ | ||
714 | hugepte_shift[psize] = PMD_SHIFT; | ||
715 | break; | ||
716 | case PAGE_SHIFT_16M: | ||
717 | /* 16M pages can be at two different levels | ||
718 | * of pagestables based on base page size */ | ||
719 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
720 | hugepte_shift[psize] = PMD_SHIFT; | ||
721 | else /* 4k base page */ | ||
722 | hugepte_shift[psize] = PUD_SHIFT; | ||
723 | break; | ||
724 | case PAGE_SHIFT_16G: | ||
725 | /* 16G pages are always at PGD level */ | ||
726 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
727 | break; | ||
728 | } | ||
729 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
730 | } else | ||
731 | hugepte_shift[psize] = 0; | ||
732 | } | 513 | } |
733 | 514 | ||
734 | static int __init hugepage_setup_sz(char *str) | 515 | static int __init hugepage_setup_sz(char *str) |
735 | { | 516 | { |
736 | unsigned long long size; | 517 | unsigned long long size; |
737 | int mmu_psize; | ||
738 | int shift; | ||
739 | 518 | ||
740 | size = memparse(str, &str); | 519 | size = memparse(str, &str); |
741 | 520 | ||
742 | shift = __ffs(size); | 521 | if (add_huge_page_size(size) != 0) |
743 | mmu_psize = shift_to_mmu_psize(shift); | ||
744 | if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) | ||
745 | set_huge_psize(mmu_psize); | ||
746 | else | ||
747 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); | 522 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); |
748 | 523 | ||
749 | return 1; | 524 | return 1; |
@@ -752,41 +527,55 @@ __setup("hugepagesz=", hugepage_setup_sz); | |||
752 | 527 | ||
753 | static int __init hugetlbpage_init(void) | 528 | static int __init hugetlbpage_init(void) |
754 | { | 529 | { |
755 | unsigned int psize; | 530 | int psize; |
756 | 531 | ||
757 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 532 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
758 | return -ENODEV; | 533 | return -ENODEV; |
759 | 534 | ||
760 | /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE | 535 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
761 | * and adjust PTE_NONCACHE_NUM if the number of supported huge page | 536 | unsigned shift; |
762 | * sizes changes. | 537 | unsigned pdshift; |
763 | */ | ||
764 | set_huge_psize(MMU_PAGE_16M); | ||
765 | set_huge_psize(MMU_PAGE_16G); | ||
766 | 538 | ||
767 | /* Temporarily disable support for 64K huge pages when 64K SPU local | 539 | if (!mmu_psize_defs[psize].shift) |
768 | * store support is enabled as the current implementation conflicts. | 540 | continue; |
769 | */ | ||
770 | #ifndef CONFIG_SPU_FS_64K_LS | ||
771 | set_huge_psize(MMU_PAGE_64K); | ||
772 | #endif | ||
773 | 541 | ||
774 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | 542 | shift = mmu_psize_to_shift(psize); |
775 | if (mmu_huge_psizes[psize]) { | 543 | |
776 | pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = | 544 | if (add_huge_page_size(1ULL << shift) < 0) |
777 | kmem_cache_create( | 545 | continue; |
778 | HUGEPTE_CACHE_NAME(psize), | 546 | |
779 | HUGEPTE_TABLE_SIZE(psize), | 547 | if (shift < PMD_SHIFT) |
780 | HUGEPTE_TABLE_SIZE(psize), | 548 | pdshift = PMD_SHIFT; |
781 | 0, | 549 | else if (shift < PUD_SHIFT) |
782 | NULL); | 550 | pdshift = PUD_SHIFT; |
783 | if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)]) | 551 | else |
784 | panic("hugetlbpage_init(): could not create %s"\ | 552 | pdshift = PGDIR_SHIFT; |
785 | "\n", HUGEPTE_CACHE_NAME(psize)); | 553 | |
786 | } | 554 | pgtable_cache_add(pdshift - shift, NULL); |
555 | if (!PGT_CACHE(pdshift - shift)) | ||
556 | panic("hugetlbpage_init(): could not create " | ||
557 | "pgtable cache for %d bit pagesize\n", shift); | ||
787 | } | 558 | } |
788 | 559 | ||
560 | /* Set default large page size. Currently, we pick 16M or 1M | ||
561 | * depending on what is available | ||
562 | */ | ||
563 | if (mmu_psize_defs[MMU_PAGE_16M].shift) | ||
564 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; | ||
565 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | ||
566 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; | ||
567 | |||
789 | return 0; | 568 | return 0; |
790 | } | 569 | } |
791 | 570 | ||
792 | module_init(hugetlbpage_init); | 571 | module_init(hugetlbpage_init); |
572 | |||
573 | void flush_dcache_icache_hugepage(struct page *page) | ||
574 | { | ||
575 | int i; | ||
576 | |||
577 | BUG_ON(!PageCompound(page)); | ||
578 | |||
579 | for (i = 0; i < (1UL << compound_order(page)); i++) | ||
580 | __flush_dcache_icache(page_address(page+i)); | ||
581 | } | ||
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 9ddcfb4dc139..767333005eb4 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/initrd.h> | 31 | #include <linux/initrd.h> |
32 | #include <linux/pagemap.h> | 32 | #include <linux/pagemap.h> |
33 | #include <linux/lmb.h> | 33 | #include <linux/lmb.h> |
34 | #include <linux/gfp.h> | ||
34 | 35 | ||
35 | #include <asm/pgalloc.h> | 36 | #include <asm/pgalloc.h> |
36 | #include <asm/prom.h> | 37 | #include <asm/prom.h> |
@@ -47,7 +48,7 @@ | |||
47 | #include "mmu_decl.h" | 48 | #include "mmu_decl.h" |
48 | 49 | ||
49 | #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) | 50 | #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) |
50 | /* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ | 51 | /* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ |
51 | #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) | 52 | #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) |
52 | #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" | 53 | #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" |
53 | #endif | 54 | #endif |
@@ -82,6 +83,11 @@ extern struct task_struct *current_set[NR_CPUS]; | |||
82 | int __map_without_bats; | 83 | int __map_without_bats; |
83 | int __map_without_ltlbs; | 84 | int __map_without_ltlbs; |
84 | 85 | ||
86 | /* | ||
87 | * This tells the system to allow ioremapping memory marked as reserved. | ||
88 | */ | ||
89 | int __allow_ioremap_reserved; | ||
90 | |||
85 | /* max amount of low RAM to map in */ | 91 | /* max amount of low RAM to map in */ |
86 | unsigned long __max_low_memory = MAX_LOW_MEM; | 92 | unsigned long __max_low_memory = MAX_LOW_MEM; |
87 | 93 | ||
@@ -131,9 +137,13 @@ void __init MMU_init(void) | |||
131 | MMU_setup(); | 137 | MMU_setup(); |
132 | 138 | ||
133 | if (lmb.memory.cnt > 1) { | 139 | if (lmb.memory.cnt > 1) { |
140 | #ifndef CONFIG_WII | ||
134 | lmb.memory.cnt = 1; | 141 | lmb.memory.cnt = 1; |
135 | lmb_analyze(); | 142 | lmb_analyze(); |
136 | printk(KERN_WARNING "Only using first contiguous memory region"); | 143 | printk(KERN_WARNING "Only using first contiguous memory region"); |
144 | #else | ||
145 | wii_memory_fixups(); | ||
146 | #endif | ||
137 | } | 147 | } |
138 | 148 | ||
139 | total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; | 149 | total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 335c578b9cc3..d7fa50b09b4a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
42 | #include <linux/poison.h> | 42 | #include <linux/poison.h> |
43 | #include <linux/lmb.h> | 43 | #include <linux/lmb.h> |
44 | #include <linux/hugetlb.h> | ||
45 | #include <linux/slab.h> | ||
44 | 46 | ||
45 | #include <asm/pgalloc.h> | 47 | #include <asm/pgalloc.h> |
46 | #include <asm/page.h> | 48 | #include <asm/page.h> |
@@ -119,30 +121,63 @@ static void pmd_ctor(void *addr) | |||
119 | memset(addr, 0, PMD_TABLE_SIZE); | 121 | memset(addr, 0, PMD_TABLE_SIZE); |
120 | } | 122 | } |
121 | 123 | ||
122 | static const unsigned int pgtable_cache_size[2] = { | 124 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; |
123 | PGD_TABLE_SIZE, PMD_TABLE_SIZE | 125 | |
124 | }; | 126 | /* |
125 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | 127 | * Create a kmem_cache() for pagetables. This is not used for PTE |
126 | #ifdef CONFIG_PPC_64K_PAGES | 128 | * pages - they're linked to struct page, come from the normal free |
127 | "pgd_cache", "pmd_cache", | 129 | * pages pool and have a different entry size (see real_pte_t) to |
128 | #else | 130 | * everything else. Caches created by this function are used for all |
129 | "pgd_cache", "pud_pmd_cache", | 131 | * the higher level pagetables, and for hugepage pagetables. |
130 | #endif /* CONFIG_PPC_64K_PAGES */ | 132 | */ |
131 | }; | 133 | void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) |
132 | 134 | { | |
133 | #ifdef CONFIG_HUGETLB_PAGE | 135 | char *name; |
134 | /* Hugepages need an extra cache per hugepagesize, initialized in | 136 | unsigned long table_size = sizeof(void *) << shift; |
135 | * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT | 137 | unsigned long align = table_size; |
136 | * is not compile time constant. */ | 138 | |
137 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; | 139 | /* When batching pgtable pointers for RCU freeing, we store |
138 | #else | 140 | * the index size in the low bits. Table alignment must be |
139 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | 141 | * big enough to fit it. |
140 | #endif | 142 | * |
143 | * Likewise, hugeapge pagetable pointers contain a (different) | ||
144 | * shift value in the low bits. All tables must be aligned so | ||
145 | * as to leave enough 0 bits in the address to contain it. */ | ||
146 | unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, | ||
147 | HUGEPD_SHIFT_MASK + 1); | ||
148 | struct kmem_cache *new; | ||
149 | |||
150 | /* It would be nice if this was a BUILD_BUG_ON(), but at the | ||
151 | * moment, gcc doesn't seem to recognize is_power_of_2 as a | ||
152 | * constant expression, so so much for that. */ | ||
153 | BUG_ON(!is_power_of_2(minalign)); | ||
154 | BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); | ||
155 | |||
156 | if (PGT_CACHE(shift)) | ||
157 | return; /* Already have a cache of this size */ | ||
158 | |||
159 | align = max_t(unsigned long, align, minalign); | ||
160 | name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); | ||
161 | new = kmem_cache_create(name, table_size, align, 0, ctor); | ||
162 | PGT_CACHE(shift) = new; | ||
163 | |||
164 | pr_debug("Allocated pgtable cache for order %d\n", shift); | ||
165 | } | ||
166 | |||
141 | 167 | ||
142 | void pgtable_cache_init(void) | 168 | void pgtable_cache_init(void) |
143 | { | 169 | { |
144 | pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); | 170 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); |
145 | pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); | 171 | pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); |
172 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) | ||
173 | panic("Couldn't allocate pgtable caches"); | ||
174 | |||
175 | /* In all current configs, when the PUD index exists it's the | ||
176 | * same size as either the pgd or pmd index. Verify that the | ||
177 | * initialization above has also created a PUD cache. This | ||
178 | * will need re-examiniation if we add new possibilities for | ||
179 | * the pagetable layout. */ | ||
180 | BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); | ||
146 | } | 181 | } |
147 | 182 | ||
148 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 183 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 59736317bf0e..0f594d774bf7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/gfp.h> | ||
25 | #include <linux/types.h> | 26 | #include <linux/types.h> |
26 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
27 | #include <linux/stddef.h> | 28 | #include <linux/stddef.h> |
@@ -32,6 +33,7 @@ | |||
32 | #include <linux/pagemap.h> | 33 | #include <linux/pagemap.h> |
33 | #include <linux/suspend.h> | 34 | #include <linux/suspend.h> |
34 | #include <linux/lmb.h> | 35 | #include <linux/lmb.h> |
36 | #include <linux/hugetlb.h> | ||
35 | 37 | ||
36 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
37 | #include <asm/prom.h> | 39 | #include <asm/prom.h> |
@@ -47,6 +49,7 @@ | |||
47 | #include <asm/sparsemem.h> | 49 | #include <asm/sparsemem.h> |
48 | #include <asm/vdso.h> | 50 | #include <asm/vdso.h> |
49 | #include <asm/fixmap.h> | 51 | #include <asm/fixmap.h> |
52 | #include <asm/swiotlb.h> | ||
50 | 53 | ||
51 | #include "mmu_decl.h" | 54 | #include "mmu_decl.h" |
52 | 55 | ||
@@ -319,6 +322,11 @@ void __init mem_init(void) | |||
319 | struct page *page; | 322 | struct page *page; |
320 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | 323 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; |
321 | 324 | ||
325 | #ifdef CONFIG_SWIOTLB | ||
326 | if (ppc_swiotlb_enable) | ||
327 | swiotlb_init(1); | ||
328 | #endif | ||
329 | |||
322 | num_physpages = lmb.memory.size >> PAGE_SHIFT; | 330 | num_physpages = lmb.memory.size >> PAGE_SHIFT; |
323 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | 331 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); |
324 | 332 | ||
@@ -417,18 +425,26 @@ EXPORT_SYMBOL(flush_dcache_page); | |||
417 | 425 | ||
418 | void flush_dcache_icache_page(struct page *page) | 426 | void flush_dcache_icache_page(struct page *page) |
419 | { | 427 | { |
428 | #ifdef CONFIG_HUGETLB_PAGE | ||
429 | if (PageCompound(page)) { | ||
430 | flush_dcache_icache_hugepage(page); | ||
431 | return; | ||
432 | } | ||
433 | #endif | ||
420 | #ifdef CONFIG_BOOKE | 434 | #ifdef CONFIG_BOOKE |
421 | void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); | 435 | { |
422 | __flush_dcache_icache(start); | 436 | void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); |
423 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | 437 | __flush_dcache_icache(start); |
438 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | ||
439 | } | ||
424 | #elif defined(CONFIG_8xx) || defined(CONFIG_PPC64) | 440 | #elif defined(CONFIG_8xx) || defined(CONFIG_PPC64) |
425 | /* On 8xx there is no need to kmap since highmem is not supported */ | 441 | /* On 8xx there is no need to kmap since highmem is not supported */ |
426 | __flush_dcache_icache(page_address(page)); | 442 | __flush_dcache_icache(page_address(page)); |
427 | #else | 443 | #else |
428 | __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); | 444 | __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); |
429 | #endif | 445 | #endif |
430 | |||
431 | } | 446 | } |
447 | |||
432 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | 448 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) |
433 | { | 449 | { |
434 | clear_page(page); | 450 | clear_page(page); |
@@ -485,13 +501,13 @@ EXPORT_SYMBOL(flush_icache_user_range); | |||
485 | * This must always be called with the pte lock held. | 501 | * This must always be called with the pte lock held. |
486 | */ | 502 | */ |
487 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, | 503 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, |
488 | pte_t pte) | 504 | pte_t *ptep) |
489 | { | 505 | { |
490 | #ifdef CONFIG_PPC_STD_MMU | 506 | #ifdef CONFIG_PPC_STD_MMU |
491 | unsigned long access = 0, trap; | 507 | unsigned long access = 0, trap; |
492 | 508 | ||
493 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | 509 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ |
494 | if (!pte_young(pte) || address >= TASK_SIZE) | 510 | if (!pte_young(*ptep) || address >= TASK_SIZE) |
495 | return; | 511 | return; |
496 | 512 | ||
497 | /* We try to figure out if we are coming from an instruction | 513 | /* We try to figure out if we are coming from an instruction |
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 0d957a4c70fe..5a783d8e8e8e 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c | |||
@@ -47,7 +47,7 @@ static inline int mmap_is_legacy(void) | |||
47 | if (current->personality & ADDR_COMPAT_LAYOUT) | 47 | if (current->personality & ADDR_COMPAT_LAYOUT) |
48 | return 1; | 48 | return 1; |
49 | 49 | ||
50 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) | 50 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) |
51 | return 1; | 51 | return 1; |
52 | 52 | ||
53 | return sysctl_legacy_va_layout; | 53 | return sysctl_legacy_va_layout; |
@@ -77,7 +77,7 @@ static unsigned long mmap_rnd(void) | |||
77 | 77 | ||
78 | static inline unsigned long mmap_base(void) | 78 | static inline unsigned long mmap_base(void) |
79 | { | 79 | { |
80 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | 80 | unsigned long gap = rlimit(RLIMIT_STACK); |
81 | 81 | ||
82 | if (gap < MIN_GAP) | 82 | if (gap < MIN_GAP) |
83 | gap = MIN_GAP; | 83 | gap = MIN_GAP; |
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index dbeb86ac90cd..2535828aa84b 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c | |||
@@ -18,11 +18,13 @@ | |||
18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
19 | #include <linux/spinlock.h> | 19 | #include <linux/spinlock.h> |
20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
21 | #include <linux/module.h> | ||
22 | #include <linux/gfp.h> | ||
21 | 23 | ||
22 | #include <asm/mmu_context.h> | 24 | #include <asm/mmu_context.h> |
23 | 25 | ||
24 | static DEFINE_SPINLOCK(mmu_context_lock); | 26 | static DEFINE_SPINLOCK(mmu_context_lock); |
25 | static DEFINE_IDR(mmu_context_idr); | 27 | static DEFINE_IDA(mmu_context_ida); |
26 | 28 | ||
27 | /* | 29 | /* |
28 | * The proto-VSID space has 2^35 - 1 segments available for user mappings. | 30 | * The proto-VSID space has 2^35 - 1 segments available for user mappings. |
@@ -32,17 +34,17 @@ static DEFINE_IDR(mmu_context_idr); | |||
32 | #define NO_CONTEXT 0 | 34 | #define NO_CONTEXT 0 |
33 | #define MAX_CONTEXT ((1UL << 19) - 1) | 35 | #define MAX_CONTEXT ((1UL << 19) - 1) |
34 | 36 | ||
35 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | 37 | int __init_new_context(void) |
36 | { | 38 | { |
37 | int index; | 39 | int index; |
38 | int err; | 40 | int err; |
39 | 41 | ||
40 | again: | 42 | again: |
41 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | 43 | if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) |
42 | return -ENOMEM; | 44 | return -ENOMEM; |
43 | 45 | ||
44 | spin_lock(&mmu_context_lock); | 46 | spin_lock(&mmu_context_lock); |
45 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | 47 | err = ida_get_new_above(&mmu_context_ida, 1, &index); |
46 | spin_unlock(&mmu_context_lock); | 48 | spin_unlock(&mmu_context_lock); |
47 | 49 | ||
48 | if (err == -EAGAIN) | 50 | if (err == -EAGAIN) |
@@ -52,27 +54,46 @@ again: | |||
52 | 54 | ||
53 | if (index > MAX_CONTEXT) { | 55 | if (index > MAX_CONTEXT) { |
54 | spin_lock(&mmu_context_lock); | 56 | spin_lock(&mmu_context_lock); |
55 | idr_remove(&mmu_context_idr, index); | 57 | ida_remove(&mmu_context_ida, index); |
56 | spin_unlock(&mmu_context_lock); | 58 | spin_unlock(&mmu_context_lock); |
57 | return -ENOMEM; | 59 | return -ENOMEM; |
58 | } | 60 | } |
59 | 61 | ||
62 | return index; | ||
63 | } | ||
64 | EXPORT_SYMBOL_GPL(__init_new_context); | ||
65 | |||
66 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
67 | { | ||
68 | int index; | ||
69 | |||
70 | index = __init_new_context(); | ||
71 | if (index < 0) | ||
72 | return index; | ||
73 | |||
60 | /* The old code would re-promote on fork, we don't do that | 74 | /* The old code would re-promote on fork, we don't do that |
61 | * when using slices as it could cause problem promoting slices | 75 | * when using slices as it could cause problem promoting slices |
62 | * that have been forced down to 4K | 76 | * that have been forced down to 4K |
63 | */ | 77 | */ |
64 | if (slice_mm_new_context(mm)) | 78 | if (slice_mm_new_context(mm)) |
65 | slice_set_user_psize(mm, mmu_virtual_psize); | 79 | slice_set_user_psize(mm, mmu_virtual_psize); |
80 | subpage_prot_init_new_context(mm); | ||
66 | mm->context.id = index; | 81 | mm->context.id = index; |
67 | 82 | ||
68 | return 0; | 83 | return 0; |
69 | } | 84 | } |
70 | 85 | ||
71 | void destroy_context(struct mm_struct *mm) | 86 | void __destroy_context(int context_id) |
72 | { | 87 | { |
73 | spin_lock(&mmu_context_lock); | 88 | spin_lock(&mmu_context_lock); |
74 | idr_remove(&mmu_context_idr, mm->context.id); | 89 | ida_remove(&mmu_context_ida, context_id); |
75 | spin_unlock(&mmu_context_lock); | 90 | spin_unlock(&mmu_context_lock); |
91 | } | ||
92 | EXPORT_SYMBOL_GPL(__destroy_context); | ||
76 | 93 | ||
94 | void destroy_context(struct mm_struct *mm) | ||
95 | { | ||
96 | __destroy_context(mm->context.id); | ||
97 | subpage_prot_free(mm); | ||
77 | mm->context.id = NO_CONTEXT; | 98 | mm->context.id = NO_CONTEXT; |
78 | } | 99 | } |
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index be4f34c30a0b..1f2d9ff09895 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
48 | #include <linux/notifier.h> | 48 | #include <linux/notifier.h> |
49 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
50 | #include <linux/slab.h> | ||
50 | 51 | ||
51 | #include <asm/mmu_context.h> | 52 | #include <asm/mmu_context.h> |
52 | #include <asm/tlbflush.h> | 53 | #include <asm/tlbflush.h> |
@@ -56,7 +57,7 @@ static unsigned int next_context, nr_free_contexts; | |||
56 | static unsigned long *context_map; | 57 | static unsigned long *context_map; |
57 | static unsigned long *stale_map[NR_CPUS]; | 58 | static unsigned long *stale_map[NR_CPUS]; |
58 | static struct mm_struct **context_mm; | 59 | static struct mm_struct **context_mm; |
59 | static DEFINE_SPINLOCK(context_lock); | 60 | static DEFINE_RAW_SPINLOCK(context_lock); |
60 | 61 | ||
61 | #define CTX_MAP_SIZE \ | 62 | #define CTX_MAP_SIZE \ |
62 | (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) | 63 | (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) |
@@ -121,9 +122,9 @@ static unsigned int steal_context_smp(unsigned int id) | |||
121 | /* This will happen if you have more CPUs than available contexts, | 122 | /* This will happen if you have more CPUs than available contexts, |
122 | * all we can do here is wait a bit and try again | 123 | * all we can do here is wait a bit and try again |
123 | */ | 124 | */ |
124 | spin_unlock(&context_lock); | 125 | raw_spin_unlock(&context_lock); |
125 | cpu_relax(); | 126 | cpu_relax(); |
126 | spin_lock(&context_lock); | 127 | raw_spin_lock(&context_lock); |
127 | 128 | ||
128 | /* This will cause the caller to try again */ | 129 | /* This will cause the caller to try again */ |
129 | return MMU_NO_CONTEXT; | 130 | return MMU_NO_CONTEXT; |
@@ -194,7 +195,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
194 | unsigned long *map; | 195 | unsigned long *map; |
195 | 196 | ||
196 | /* No lockless fast path .. yet */ | 197 | /* No lockless fast path .. yet */ |
197 | spin_lock(&context_lock); | 198 | raw_spin_lock(&context_lock); |
198 | 199 | ||
199 | pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", | 200 | pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", |
200 | cpu, next, next->context.active, next->context.id); | 201 | cpu, next, next->context.active, next->context.id); |
@@ -278,7 +279,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
278 | /* Flick the MMU and release lock */ | 279 | /* Flick the MMU and release lock */ |
279 | pr_hardcont(" -> %d\n", id); | 280 | pr_hardcont(" -> %d\n", id); |
280 | set_context(id, next->pgd); | 281 | set_context(id, next->pgd); |
281 | spin_unlock(&context_lock); | 282 | raw_spin_unlock(&context_lock); |
282 | } | 283 | } |
283 | 284 | ||
284 | /* | 285 | /* |
@@ -307,7 +308,7 @@ void destroy_context(struct mm_struct *mm) | |||
307 | 308 | ||
308 | WARN_ON(mm->context.active != 0); | 309 | WARN_ON(mm->context.active != 0); |
309 | 310 | ||
310 | spin_lock_irqsave(&context_lock, flags); | 311 | raw_spin_lock_irqsave(&context_lock, flags); |
311 | id = mm->context.id; | 312 | id = mm->context.id; |
312 | if (id != MMU_NO_CONTEXT) { | 313 | if (id != MMU_NO_CONTEXT) { |
313 | __clear_bit(id, context_map); | 314 | __clear_bit(id, context_map); |
@@ -318,7 +319,7 @@ void destroy_context(struct mm_struct *mm) | |||
318 | context_mm[id] = NULL; | 319 | context_mm[id] = NULL; |
319 | nr_free_contexts++; | 320 | nr_free_contexts++; |
320 | } | 321 | } |
321 | spin_unlock_irqrestore(&context_lock, flags); | 322 | raw_spin_unlock_irqrestore(&context_lock, flags); |
322 | } | 323 | } |
323 | 324 | ||
324 | #ifdef CONFIG_SMP | 325 | #ifdef CONFIG_SMP |
@@ -353,7 +354,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
353 | read_lock(&tasklist_lock); | 354 | read_lock(&tasklist_lock); |
354 | for_each_process(p) { | 355 | for_each_process(p) { |
355 | if (p->mm) | 356 | if (p->mm) |
356 | cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm)); | 357 | cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); |
357 | } | 358 | } |
358 | read_unlock(&tasklist_lock); | 359 | read_unlock(&tasklist_lock); |
359 | break; | 360 | break; |
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d2e5321d5ea6..d49a77503e19 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h | |||
@@ -98,23 +98,13 @@ extern void _tlbia(void); | |||
98 | 98 | ||
99 | #ifdef CONFIG_PPC32 | 99 | #ifdef CONFIG_PPC32 |
100 | 100 | ||
101 | struct tlbcam { | ||
102 | u32 MAS0; | ||
103 | u32 MAS1; | ||
104 | u32 MAS2; | ||
105 | u32 MAS3; | ||
106 | u32 MAS7; | ||
107 | }; | ||
108 | |||
109 | extern void mapin_ram(void); | 101 | extern void mapin_ram(void); |
110 | extern int map_page(unsigned long va, phys_addr_t pa, int flags); | 102 | extern int map_page(unsigned long va, phys_addr_t pa, int flags); |
111 | extern void setbat(int index, unsigned long virt, phys_addr_t phys, | 103 | extern void setbat(int index, unsigned long virt, phys_addr_t phys, |
112 | unsigned int size, int flags); | 104 | unsigned int size, int flags); |
113 | extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, | ||
114 | unsigned int size, int flags, unsigned int pid); | ||
115 | extern void invalidate_tlbcam_entry(int index); | ||
116 | 105 | ||
117 | extern int __map_without_bats; | 106 | extern int __map_without_bats; |
107 | extern int __allow_ioremap_reserved; | ||
118 | extern unsigned long ioremap_base; | 108 | extern unsigned long ioremap_base; |
119 | extern unsigned int rtas_data, rtas_size; | 109 | extern unsigned int rtas_data, rtas_size; |
120 | 110 | ||
@@ -136,24 +126,32 @@ extern phys_addr_t total_lowmem; | |||
136 | extern phys_addr_t memstart_addr; | 126 | extern phys_addr_t memstart_addr; |
137 | extern phys_addr_t lowmem_end_addr; | 127 | extern phys_addr_t lowmem_end_addr; |
138 | 128 | ||
129 | #ifdef CONFIG_WII | ||
130 | extern unsigned long wii_hole_start; | ||
131 | extern unsigned long wii_hole_size; | ||
132 | |||
133 | extern unsigned long wii_mmu_mapin_mem2(unsigned long top); | ||
134 | extern void wii_memory_fixups(void); | ||
135 | #endif | ||
136 | |||
139 | /* ...and now those things that may be slightly different between processor | 137 | /* ...and now those things that may be slightly different between processor |
140 | * architectures. -- Dan | 138 | * architectures. -- Dan |
141 | */ | 139 | */ |
142 | #if defined(CONFIG_8xx) | 140 | #if defined(CONFIG_8xx) |
143 | #define MMU_init_hw() do { } while(0) | 141 | #define MMU_init_hw() do { } while(0) |
144 | #define mmu_mapin_ram() (0UL) | 142 | #define mmu_mapin_ram(top) (0UL) |
145 | 143 | ||
146 | #elif defined(CONFIG_4xx) | 144 | #elif defined(CONFIG_4xx) |
147 | extern void MMU_init_hw(void); | 145 | extern void MMU_init_hw(void); |
148 | extern unsigned long mmu_mapin_ram(void); | 146 | extern unsigned long mmu_mapin_ram(unsigned long top); |
149 | 147 | ||
150 | #elif defined(CONFIG_FSL_BOOKE) | 148 | #elif defined(CONFIG_FSL_BOOKE) |
151 | extern void MMU_init_hw(void); | 149 | extern void MMU_init_hw(void); |
152 | extern unsigned long mmu_mapin_ram(void); | 150 | extern unsigned long mmu_mapin_ram(unsigned long top); |
153 | extern void adjust_total_lowmem(void); | 151 | extern void adjust_total_lowmem(void); |
154 | 152 | ||
155 | #elif defined(CONFIG_PPC32) | 153 | #elif defined(CONFIG_PPC32) |
156 | /* anything 32-bit except 4xx or 8xx */ | 154 | /* anything 32-bit except 4xx or 8xx */ |
157 | extern void MMU_init_hw(void); | 155 | extern void MMU_init_hw(void); |
158 | extern unsigned long mmu_mapin_ram(void); | 156 | extern unsigned long mmu_mapin_ram(unsigned long top); |
159 | #endif | 157 | #endif |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b037d95eeadc..eaa7633515b7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -242,10 +242,11 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); | |||
242 | */ | 242 | */ |
243 | static int __init find_min_common_depth(void) | 243 | static int __init find_min_common_depth(void) |
244 | { | 244 | { |
245 | int depth; | 245 | int depth, index; |
246 | const unsigned int *ref_points; | 246 | const unsigned int *ref_points; |
247 | struct device_node *rtas_root; | 247 | struct device_node *rtas_root; |
248 | unsigned int len; | 248 | unsigned int len; |
249 | struct device_node *options; | ||
249 | 250 | ||
250 | rtas_root = of_find_node_by_path("/rtas"); | 251 | rtas_root = of_find_node_by_path("/rtas"); |
251 | 252 | ||
@@ -258,11 +259,23 @@ static int __init find_min_common_depth(void) | |||
258 | * configuration (should be all 0's) and the second is for a normal | 259 | * configuration (should be all 0's) and the second is for a normal |
259 | * NUMA configuration. | 260 | * NUMA configuration. |
260 | */ | 261 | */ |
262 | index = 1; | ||
261 | ref_points = of_get_property(rtas_root, | 263 | ref_points = of_get_property(rtas_root, |
262 | "ibm,associativity-reference-points", &len); | 264 | "ibm,associativity-reference-points", &len); |
263 | 265 | ||
266 | /* | ||
267 | * For type 1 affinity information we want the first field | ||
268 | */ | ||
269 | options = of_find_node_by_path("/options"); | ||
270 | if (options) { | ||
271 | const char *str; | ||
272 | str = of_get_property(options, "ibm,associativity-form", NULL); | ||
273 | if (str && !strcmp(str, "1")) | ||
274 | index = 0; | ||
275 | } | ||
276 | |||
264 | if ((len >= 2 * sizeof(unsigned int)) && ref_points) { | 277 | if ((len >= 2 * sizeof(unsigned int)) && ref_points) { |
265 | depth = ref_points[1]; | 278 | depth = ref_points[index]; |
266 | } else { | 279 | } else { |
267 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); | 280 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); |
268 | depth = -1; | 281 | depth = -1; |
@@ -451,7 +464,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) | |||
451 | nid = of_node_to_nid_single(cpu); | 464 | nid = of_node_to_nid_single(cpu); |
452 | 465 | ||
453 | if (nid < 0 || !node_online(nid)) | 466 | if (nid < 0 || !node_online(nid)) |
454 | nid = any_online_node(NODE_MASK_ALL); | 467 | nid = first_online_node; |
455 | out: | 468 | out: |
456 | map_cpu_to_node(lcpu, nid); | 469 | map_cpu_to_node(lcpu, nid); |
457 | 470 | ||
@@ -1114,7 +1127,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
1114 | int nid, found = 0; | 1127 | int nid, found = 0; |
1115 | 1128 | ||
1116 | if (!numa_enabled || (min_common_depth < 0)) | 1129 | if (!numa_enabled || (min_common_depth < 0)) |
1117 | return any_online_node(NODE_MASK_ALL); | 1130 | return first_online_node; |
1118 | 1131 | ||
1119 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); | 1132 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
1120 | if (memory) { | 1133 | if (memory) { |
@@ -1125,7 +1138,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
1125 | } | 1138 | } |
1126 | 1139 | ||
1127 | if (nid < 0 || !node_online(nid)) | 1140 | if (nid < 0 || !node_online(nid)) |
1128 | nid = any_online_node(NODE_MASK_ALL); | 1141 | nid = first_online_node; |
1129 | 1142 | ||
1130 | if (NODE_DATA(nid)->node_spanned_pages) | 1143 | if (NODE_DATA(nid)->node_spanned_pages) |
1131 | return nid; | 1144 | return nid; |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 53040931de32..ebc2f38eb381 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/gfp.h> | ||
25 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
27 | #include <linux/percpu.h> | 28 | #include <linux/percpu.h> |
@@ -49,12 +50,12 @@ struct pte_freelist_batch | |||
49 | { | 50 | { |
50 | struct rcu_head rcu; | 51 | struct rcu_head rcu; |
51 | unsigned int index; | 52 | unsigned int index; |
52 | pgtable_free_t tables[0]; | 53 | unsigned long tables[0]; |
53 | }; | 54 | }; |
54 | 55 | ||
55 | #define PTE_FREELIST_SIZE \ | 56 | #define PTE_FREELIST_SIZE \ |
56 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ | 57 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ |
57 | / sizeof(pgtable_free_t)) | 58 | / sizeof(unsigned long)) |
58 | 59 | ||
59 | static void pte_free_smp_sync(void *arg) | 60 | static void pte_free_smp_sync(void *arg) |
60 | { | 61 | { |
@@ -64,13 +65,13 @@ static void pte_free_smp_sync(void *arg) | |||
64 | /* This is only called when we are critically out of memory | 65 | /* This is only called when we are critically out of memory |
65 | * (and fail to get a page in pte_free_tlb). | 66 | * (and fail to get a page in pte_free_tlb). |
66 | */ | 67 | */ |
67 | static void pgtable_free_now(pgtable_free_t pgf) | 68 | static void pgtable_free_now(void *table, unsigned shift) |
68 | { | 69 | { |
69 | pte_freelist_forced_free++; | 70 | pte_freelist_forced_free++; |
70 | 71 | ||
71 | smp_call_function(pte_free_smp_sync, NULL, 1); | 72 | smp_call_function(pte_free_smp_sync, NULL, 1); |
72 | 73 | ||
73 | pgtable_free(pgf); | 74 | pgtable_free(table, shift); |
74 | } | 75 | } |
75 | 76 | ||
76 | static void pte_free_rcu_callback(struct rcu_head *head) | 77 | static void pte_free_rcu_callback(struct rcu_head *head) |
@@ -79,8 +80,12 @@ static void pte_free_rcu_callback(struct rcu_head *head) | |||
79 | container_of(head, struct pte_freelist_batch, rcu); | 80 | container_of(head, struct pte_freelist_batch, rcu); |
80 | unsigned int i; | 81 | unsigned int i; |
81 | 82 | ||
82 | for (i = 0; i < batch->index; i++) | 83 | for (i = 0; i < batch->index; i++) { |
83 | pgtable_free(batch->tables[i]); | 84 | void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE); |
85 | unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE; | ||
86 | |||
87 | pgtable_free(table, shift); | ||
88 | } | ||
84 | 89 | ||
85 | free_page((unsigned long)batch); | 90 | free_page((unsigned long)batch); |
86 | } | 91 | } |
@@ -91,25 +96,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch) | |||
91 | call_rcu(&batch->rcu, pte_free_rcu_callback); | 96 | call_rcu(&batch->rcu, pte_free_rcu_callback); |
92 | } | 97 | } |
93 | 98 | ||
94 | void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) | 99 | void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) |
95 | { | 100 | { |
96 | /* This is safe since tlb_gather_mmu has disabled preemption */ | 101 | /* This is safe since tlb_gather_mmu has disabled preemption */ |
97 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); | 102 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); |
103 | unsigned long pgf; | ||
98 | 104 | ||
99 | if (atomic_read(&tlb->mm->mm_users) < 2 || | 105 | if (atomic_read(&tlb->mm->mm_users) < 2 || |
100 | cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ | 106 | cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ |
101 | pgtable_free(pgf); | 107 | pgtable_free(table, shift); |
102 | return; | 108 | return; |
103 | } | 109 | } |
104 | 110 | ||
105 | if (*batchp == NULL) { | 111 | if (*batchp == NULL) { |
106 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); | 112 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); |
107 | if (*batchp == NULL) { | 113 | if (*batchp == NULL) { |
108 | pgtable_free_now(pgf); | 114 | pgtable_free_now(table, shift); |
109 | return; | 115 | return; |
110 | } | 116 | } |
111 | (*batchp)->index = 0; | 117 | (*batchp)->index = 0; |
112 | } | 118 | } |
119 | BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); | ||
120 | pgf = (unsigned long)table | shift; | ||
113 | (*batchp)->tables[(*batchp)->index++] = pgf; | 121 | (*batchp)->tables[(*batchp)->index++] = pgf; |
114 | if ((*batchp)->index == PTE_FREELIST_SIZE) { | 122 | if ((*batchp)->index == PTE_FREELIST_SIZE) { |
115 | pte_free_submit(*batchp); | 123 | pte_free_submit(*batchp); |
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cb96cb2e17cc..b9243e7557ae 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
@@ -26,6 +26,8 @@ | |||
26 | #include <linux/vmalloc.h> | 26 | #include <linux/vmalloc.h> |
27 | #include <linux/init.h> | 27 | #include <linux/init.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/lmb.h> | ||
30 | #include <linux/slab.h> | ||
29 | 31 | ||
30 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
31 | #include <asm/pgalloc.h> | 33 | #include <asm/pgalloc.h> |
@@ -191,7 +193,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, | |||
191 | * Don't allow anybody to remap normal RAM that we're using. | 193 | * Don't allow anybody to remap normal RAM that we're using. |
192 | * mem_init() sets high_memory so only do the check after that. | 194 | * mem_init() sets high_memory so only do the check after that. |
193 | */ | 195 | */ |
194 | if (mem_init_done && (p < virt_to_phys(high_memory))) { | 196 | if (mem_init_done && (p < virt_to_phys(high_memory)) && |
197 | !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) { | ||
195 | printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", | 198 | printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", |
196 | (unsigned long long)p, __builtin_return_address(0)); | 199 | (unsigned long long)p, __builtin_return_address(0)); |
197 | return NULL; | 200 | return NULL; |
@@ -283,18 +286,18 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) | |||
283 | } | 286 | } |
284 | 287 | ||
285 | /* | 288 | /* |
286 | * Map in a big chunk of physical memory starting at PAGE_OFFSET. | 289 | * Map in a chunk of physical memory starting at start. |
287 | */ | 290 | */ |
288 | void __init mapin_ram(void) | 291 | void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) |
289 | { | 292 | { |
290 | unsigned long v, s, f; | 293 | unsigned long v, s, f; |
291 | phys_addr_t p; | 294 | phys_addr_t p; |
292 | int ktext; | 295 | int ktext; |
293 | 296 | ||
294 | s = mmu_mapin_ram(); | 297 | s = offset; |
295 | v = PAGE_OFFSET + s; | 298 | v = PAGE_OFFSET + s; |
296 | p = memstart_addr + s; | 299 | p = memstart_addr + s; |
297 | for (; s < total_lowmem; s += PAGE_SIZE) { | 300 | for (; s < top; s += PAGE_SIZE) { |
298 | ktext = ((char *) v >= _stext && (char *) v < etext); | 301 | ktext = ((char *) v >= _stext && (char *) v < etext); |
299 | f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL; | 302 | f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL; |
300 | map_page(v, p, f); | 303 | map_page(v, p, f); |
@@ -307,6 +310,30 @@ void __init mapin_ram(void) | |||
307 | } | 310 | } |
308 | } | 311 | } |
309 | 312 | ||
313 | void __init mapin_ram(void) | ||
314 | { | ||
315 | unsigned long s, top; | ||
316 | |||
317 | #ifndef CONFIG_WII | ||
318 | top = total_lowmem; | ||
319 | s = mmu_mapin_ram(top); | ||
320 | __mapin_ram_chunk(s, top); | ||
321 | #else | ||
322 | if (!wii_hole_size) { | ||
323 | s = mmu_mapin_ram(total_lowmem); | ||
324 | __mapin_ram_chunk(s, total_lowmem); | ||
325 | } else { | ||
326 | top = wii_hole_start; | ||
327 | s = mmu_mapin_ram(top); | ||
328 | __mapin_ram_chunk(s, top); | ||
329 | |||
330 | top = lmb_end_of_DRAM(); | ||
331 | s = wii_mmu_mapin_mem2(top); | ||
332 | __mapin_ram_chunk(s, top); | ||
333 | } | ||
334 | #endif | ||
335 | } | ||
336 | |||
310 | /* Scan the real Linux page tables and return a PTE pointer for | 337 | /* Scan the real Linux page tables and return a PTE pointer for |
311 | * a virtual address in a context. | 338 | * a virtual address in a context. |
312 | * Returns true (1) if PTE was found, zero otherwise. The pointer to | 339 | * Returns true (1) if PTE was found, zero otherwise. The pointer to |
@@ -356,7 +383,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot) | |||
356 | return 0; | 383 | return 0; |
357 | if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) | 384 | if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) |
358 | return -EINVAL; | 385 | return -EINVAL; |
359 | set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); | 386 | __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); |
360 | wmb(); | 387 | wmb(); |
361 | #ifdef CONFIG_PPC_STD_MMU | 388 | #ifdef CONFIG_PPC_STD_MMU |
362 | flush_hash_pages(0, address, pmd_val(*kpmd), 1); | 389 | flush_hash_pages(0, address, pmd_val(*kpmd), 1); |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 853d5565eed5..d95679a5fb29 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
36 | #include <linux/bootmem.h> | 36 | #include <linux/bootmem.h> |
37 | #include <linux/lmb.h> | 37 | #include <linux/lmb.h> |
38 | #include <linux/slab.h> | ||
38 | 39 | ||
39 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
40 | #include <asm/page.h> | 41 | #include <asm/page.h> |
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 2d2a87e10154..f11c2cdcb0fe 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c | |||
@@ -72,7 +72,7 @@ unsigned long p_mapped_by_bats(phys_addr_t pa) | |||
72 | return 0; | 72 | return 0; |
73 | } | 73 | } |
74 | 74 | ||
75 | unsigned long __init mmu_mapin_ram(void) | 75 | unsigned long __init mmu_mapin_ram(unsigned long top) |
76 | { | 76 | { |
77 | unsigned long tot, bl, done; | 77 | unsigned long tot, bl, done; |
78 | unsigned long max_size = (256<<20); | 78 | unsigned long max_size = (256<<20); |
@@ -86,7 +86,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
86 | 86 | ||
87 | /* Make sure we don't map a block larger than the | 87 | /* Make sure we don't map a block larger than the |
88 | smallest alignment of the physical address. */ | 88 | smallest alignment of the physical address. */ |
89 | tot = total_lowmem; | 89 | tot = top; |
90 | for (bl = 128<<10; bl < max_size; bl <<= 1) { | 90 | for (bl = 128<<10; bl < max_size; bl <<= 1) { |
91 | if (bl * 2 > tot) | 91 | if (bl * 2 > tot) |
92 | break; | 92 | break; |
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 4cafc0c33d0a..e4f8f1fc81a5 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <linux/errno.h> | 10 | #include <linux/errno.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/gfp.h> | 12 | #include <linux/gfp.h> |
13 | #include <linux/slab.h> | ||
14 | #include <linux/types.h> | 13 | #include <linux/types.h> |
15 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
16 | #include <linux/hugetlb.h> | 15 | #include <linux/hugetlb.h> |
@@ -24,9 +23,9 @@ | |||
24 | * Also makes sure that the subpage_prot_table structure is | 23 | * Also makes sure that the subpage_prot_table structure is |
25 | * reinitialized for the next user. | 24 | * reinitialized for the next user. |
26 | */ | 25 | */ |
27 | void subpage_prot_free(pgd_t *pgd) | 26 | void subpage_prot_free(struct mm_struct *mm) |
28 | { | 27 | { |
29 | struct subpage_prot_table *spt = pgd_subpage_prot(pgd); | 28 | struct subpage_prot_table *spt = &mm->context.spt; |
30 | unsigned long i, j, addr; | 29 | unsigned long i, j, addr; |
31 | u32 **p; | 30 | u32 **p; |
32 | 31 | ||
@@ -51,6 +50,13 @@ void subpage_prot_free(pgd_t *pgd) | |||
51 | spt->maxaddr = 0; | 50 | spt->maxaddr = 0; |
52 | } | 51 | } |
53 | 52 | ||
53 | void subpage_prot_init_new_context(struct mm_struct *mm) | ||
54 | { | ||
55 | struct subpage_prot_table *spt = &mm->context.spt; | ||
56 | |||
57 | memset(spt, 0, sizeof(*spt)); | ||
58 | } | ||
59 | |||
54 | static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, | 60 | static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, |
55 | int npages) | 61 | int npages) |
56 | { | 62 | { |
@@ -87,7 +93,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, | |||
87 | static void subpage_prot_clear(unsigned long addr, unsigned long len) | 93 | static void subpage_prot_clear(unsigned long addr, unsigned long len) |
88 | { | 94 | { |
89 | struct mm_struct *mm = current->mm; | 95 | struct mm_struct *mm = current->mm; |
90 | struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); | 96 | struct subpage_prot_table *spt = &mm->context.spt; |
91 | u32 **spm, *spp; | 97 | u32 **spm, *spp; |
92 | int i, nw; | 98 | int i, nw; |
93 | unsigned long next, limit; | 99 | unsigned long next, limit; |
@@ -136,7 +142,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) | |||
136 | long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) | 142 | long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) |
137 | { | 143 | { |
138 | struct mm_struct *mm = current->mm; | 144 | struct mm_struct *mm = current->mm; |
139 | struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); | 145 | struct subpage_prot_table *spt = &mm->context.spt; |
140 | u32 **spm, *spp; | 146 | u32 **spm, *spp; |
141 | int i, nw; | 147 | int i, nw; |
142 | unsigned long next, limit; | 148 | unsigned long next, limit; |
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 2b2f35f6985e..1ec06576f619 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
@@ -53,11 +53,6 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
53 | 53 | ||
54 | i = batch->index; | 54 | i = batch->index; |
55 | 55 | ||
56 | /* We mask the address for the base page size. Huge pages will | ||
57 | * have applied their own masking already | ||
58 | */ | ||
59 | addr &= PAGE_MASK; | ||
60 | |||
61 | /* Get page size (maybe move back to caller). | 56 | /* Get page size (maybe move back to caller). |
62 | * | 57 | * |
63 | * NOTE: when using special 64K mappings in 4K environment like | 58 | * NOTE: when using special 64K mappings in 4K environment like |
@@ -68,12 +63,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
68 | if (huge) { | 63 | if (huge) { |
69 | #ifdef CONFIG_HUGETLB_PAGE | 64 | #ifdef CONFIG_HUGETLB_PAGE |
70 | psize = get_slice_psize(mm, addr); | 65 | psize = get_slice_psize(mm, addr); |
66 | /* Mask the address for the correct page size */ | ||
67 | addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); | ||
71 | #else | 68 | #else |
72 | BUG(); | 69 | BUG(); |
73 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ | 70 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |
74 | #endif | 71 | #endif |
75 | } else | 72 | } else { |
76 | psize = pte_pagesize_index(mm, addr, pte); | 73 | psize = pte_pagesize_index(mm, addr, pte); |
74 | /* Mask the address for the standard page size. If we | ||
75 | * have a 64k page kernel, but the hardware does not | ||
76 | * support 64k pages, this might be different from the | ||
77 | * hardware page size encoded in the slice table. */ | ||
78 | addr &= PAGE_MASK; | ||
79 | } | ||
80 | |||
77 | 81 | ||
78 | /* Build full vaddr */ | 82 | /* Build full vaddr */ |
79 | if (!is_kernel_addr(addr)) { | 83 | if (!is_kernel_addr(addr)) { |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index f288279e679d..8b04c54e596f 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Low leve TLB miss handlers for Book3E | 2 | * Low level TLB miss handlers for Book3E |
3 | * | 3 | * |
4 | * Copyright (C) 2008-2009 | 4 | * Copyright (C) 2008-2009 |
5 | * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. | 5 | * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 2fbc680c2c71..e81d5d67f834 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -150,7 +150,7 @@ EXPORT_SYMBOL(local_flush_tlb_page); | |||
150 | */ | 150 | */ |
151 | #ifdef CONFIG_SMP | 151 | #ifdef CONFIG_SMP |
152 | 152 | ||
153 | static DEFINE_SPINLOCK(tlbivax_lock); | 153 | static DEFINE_RAW_SPINLOCK(tlbivax_lock); |
154 | 154 | ||
155 | static int mm_is_core_local(struct mm_struct *mm) | 155 | static int mm_is_core_local(struct mm_struct *mm) |
156 | { | 156 | { |
@@ -232,10 +232,10 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, | |||
232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { | 232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { |
233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); | 233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); |
234 | if (lock) | 234 | if (lock) |
235 | spin_lock(&tlbivax_lock); | 235 | raw_spin_lock(&tlbivax_lock); |
236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); | 236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); |
237 | if (lock) | 237 | if (lock) |
238 | spin_unlock(&tlbivax_lock); | 238 | raw_spin_unlock(&tlbivax_lock); |
239 | goto bail; | 239 | goto bail; |
240 | } else { | 240 | } else { |
241 | struct tlb_flush_param p = { | 241 | struct tlb_flush_param p = { |