diff options
Diffstat (limited to 'arch/powerpc/mm')
27 files changed, 818 insertions, 833 deletions
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c index f5e7b9ce63dd..65abfcfaaa9e 100644 --- a/arch/powerpc/mm/40x_mmu.c +++ b/arch/powerpc/mm/40x_mmu.c | |||
| @@ -84,14 +84,14 @@ void __init MMU_init_hw(void) | |||
| 84 | * vectors and the kernel live in real-mode. | 84 | * vectors and the kernel live in real-mode. |
| 85 | */ | 85 | */ |
| 86 | 86 | ||
| 87 | mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ | 87 | mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */ |
| 88 | mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ | 88 | mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */ |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | #define LARGE_PAGE_SIZE_16M (1<<24) | 91 | #define LARGE_PAGE_SIZE_16M (1<<24) |
| 92 | #define LARGE_PAGE_SIZE_4M (1<<22) | 92 | #define LARGE_PAGE_SIZE_4M (1<<22) |
| 93 | 93 | ||
| 94 | unsigned long __init mmu_mapin_ram(void) | 94 | unsigned long __init mmu_mapin_ram(unsigned long top) |
| 95 | { | 95 | { |
| 96 | unsigned long v, s, mapped; | 96 | unsigned long v, s, mapped; |
| 97 | phys_addr_t p; | 97 | phys_addr_t p; |
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 98052ac96580..3986264b0993 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c | |||
| @@ -88,7 +88,7 @@ void __init MMU_init_hw(void) | |||
| 88 | flush_instruction_cache(); | 88 | flush_instruction_cache(); |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | unsigned long __init mmu_mapin_ram(void) | 91 | unsigned long __init mmu_mapin_ram(unsigned long top) |
| 92 | { | 92 | { |
| 93 | unsigned long addr; | 93 | unsigned long addr; |
| 94 | 94 | ||
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 6fb8fc8d2fea..ce68708bbad5 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
| @@ -28,7 +28,10 @@ obj-$(CONFIG_44x) += 44x_mmu.o | |||
| 28 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o | 28 | obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o |
| 29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o | 29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o |
| 30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o | 30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o |
| 31 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) |
| 32 | obj-y += hugetlbpage.o | ||
| 33 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o | ||
| 34 | endif | ||
| 32 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o | 35 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o |
| 33 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o | 36 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o |
| 34 | obj-$(CONFIG_HIGHMEM) += highmem.o | 37 | obj-$(CONFIG_HIGHMEM) += highmem.o |
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 36692f5c9a76..757c0bed9a91 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
| 26 | #include <linux/slab.h> | ||
| 26 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
| 27 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
| 28 | #include <linux/string.h> | 29 | #include <linux/string.h> |
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index e7dae82c1285..26fb6b990b0a 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
| 41 | #include <asm/tlbflush.h> | 41 | #include <asm/tlbflush.h> |
| 42 | #include <asm/siginfo.h> | 42 | #include <asm/siginfo.h> |
| 43 | 43 | #include <mm/mmu_decl.h> | |
| 44 | 44 | ||
| 45 | #ifdef CONFIG_KPROBES | 45 | #ifdef CONFIG_KPROBES |
| 46 | static inline int notify_page_fault(struct pt_regs *regs) | 46 | static inline int notify_page_fault(struct pt_regs *regs) |
| @@ -246,6 +246,12 @@ good_area: | |||
| 246 | goto bad_area; | 246 | goto bad_area; |
| 247 | #endif /* CONFIG_6xx */ | 247 | #endif /* CONFIG_6xx */ |
| 248 | #if defined(CONFIG_8xx) | 248 | #if defined(CONFIG_8xx) |
| 249 | /* 8xx sometimes need to load a invalid/non-present TLBs. | ||
| 250 | * These must be invalidated separately as linux mm don't. | ||
| 251 | */ | ||
| 252 | if (error_code & 0x40000000) /* no translation? */ | ||
| 253 | _tlbil_va(address, 0, 0, 0); | ||
| 254 | |||
| 249 | /* The MPC8xx seems to always set 0x80000000, which is | 255 | /* The MPC8xx seems to always set 0x80000000, which is |
| 250 | * "undefined". Of those that can be set, this is the only | 256 | * "undefined". Of those that can be set, this is the only |
| 251 | * one which seems bad. | 257 | * one which seems bad. |
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index dc93e95b256e..1ed6b52f3031 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c | |||
| @@ -54,26 +54,35 @@ | |||
| 54 | 54 | ||
| 55 | #include "mmu_decl.h" | 55 | #include "mmu_decl.h" |
| 56 | 56 | ||
| 57 | extern void loadcam_entry(unsigned int index); | ||
| 58 | unsigned int tlbcam_index; | 57 | unsigned int tlbcam_index; |
| 59 | static unsigned long cam[CONFIG_LOWMEM_CAM_NUM]; | ||
| 60 | 58 | ||
| 61 | #define NUM_TLBCAMS (16) | 59 | #define NUM_TLBCAMS (64) |
| 62 | 60 | ||
| 63 | #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) | 61 | #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) |
| 64 | #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" | 62 | #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" |
| 65 | #endif | 63 | #endif |
| 66 | 64 | ||
| 67 | struct tlbcam TLBCAM[NUM_TLBCAMS]; | 65 | struct tlbcam { |
| 66 | u32 MAS0; | ||
| 67 | u32 MAS1; | ||
| 68 | unsigned long MAS2; | ||
| 69 | u32 MAS3; | ||
| 70 | u32 MAS7; | ||
| 71 | } TLBCAM[NUM_TLBCAMS]; | ||
| 68 | 72 | ||
| 69 | struct tlbcamrange { | 73 | struct tlbcamrange { |
| 70 | unsigned long start; | 74 | unsigned long start; |
| 71 | unsigned long limit; | 75 | unsigned long limit; |
| 72 | phys_addr_t phys; | 76 | phys_addr_t phys; |
| 73 | } tlbcam_addrs[NUM_TLBCAMS]; | 77 | } tlbcam_addrs[NUM_TLBCAMS]; |
| 74 | 78 | ||
| 75 | extern unsigned int tlbcam_index; | 79 | extern unsigned int tlbcam_index; |
| 76 | 80 | ||
| 81 | unsigned long tlbcam_sz(int idx) | ||
| 82 | { | ||
| 83 | return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1; | ||
| 84 | } | ||
| 85 | |||
| 77 | /* | 86 | /* |
| 78 | * Return PA for this VA if it is mapped by a CAM, or 0 | 87 | * Return PA for this VA if it is mapped by a CAM, or 0 |
| 79 | */ | 88 | */ |
| @@ -94,23 +103,36 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) | |||
| 94 | int b; | 103 | int b; |
| 95 | for (b = 0; b < tlbcam_index; ++b) | 104 | for (b = 0; b < tlbcam_index; ++b) |
| 96 | if (pa >= tlbcam_addrs[b].phys | 105 | if (pa >= tlbcam_addrs[b].phys |
| 97 | && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) | 106 | && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) |
| 98 | +tlbcam_addrs[b].phys) | 107 | +tlbcam_addrs[b].phys) |
| 99 | return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); | 108 | return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); |
| 100 | return 0; | 109 | return 0; |
| 101 | } | 110 | } |
| 102 | 111 | ||
| 112 | void loadcam_entry(int idx) | ||
| 113 | { | ||
| 114 | mtspr(SPRN_MAS0, TLBCAM[idx].MAS0); | ||
| 115 | mtspr(SPRN_MAS1, TLBCAM[idx].MAS1); | ||
| 116 | mtspr(SPRN_MAS2, TLBCAM[idx].MAS2); | ||
| 117 | mtspr(SPRN_MAS3, TLBCAM[idx].MAS3); | ||
| 118 | |||
| 119 | if (mmu_has_feature(MMU_FTR_BIG_PHYS)) | ||
| 120 | mtspr(SPRN_MAS7, TLBCAM[idx].MAS7); | ||
| 121 | |||
| 122 | asm volatile("isync;tlbwe;isync" : : : "memory"); | ||
| 123 | } | ||
| 124 | |||
| 103 | /* | 125 | /* |
| 104 | * Set up one of the I/D BAT (block address translation) register pairs. | 126 | * Set up one of the I/D BAT (block address translation) register pairs. |
| 105 | * The parameters are not checked; in particular size must be a power | 127 | * The parameters are not checked; in particular size must be a power |
| 106 | * of 4 between 4k and 256M. | 128 | * of 4 between 4k and 256M. |
| 107 | */ | 129 | */ |
| 108 | void settlbcam(int index, unsigned long virt, phys_addr_t phys, | 130 | static void settlbcam(int index, unsigned long virt, phys_addr_t phys, |
| 109 | unsigned int size, int flags, unsigned int pid) | 131 | unsigned long size, unsigned long flags, unsigned int pid) |
| 110 | { | 132 | { |
| 111 | unsigned int tsize, lz; | 133 | unsigned int tsize, lz; |
| 112 | 134 | ||
| 113 | asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); | 135 | asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (size)); |
| 114 | tsize = 21 - lz; | 136 | tsize = 21 - lz; |
| 115 | 137 | ||
| 116 | #ifdef CONFIG_SMP | 138 | #ifdef CONFIG_SMP |
| @@ -128,18 +150,15 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, | |||
| 128 | TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; | 150 | TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; |
| 129 | TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; | 151 | TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; |
| 130 | 152 | ||
| 131 | TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR; | 153 | TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR; |
| 132 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); | 154 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); |
| 155 | if (mmu_has_feature(MMU_FTR_BIG_PHYS)) | ||
| 156 | TLBCAM[index].MAS7 = (u64)phys >> 32; | ||
| 133 | 157 | ||
| 134 | #ifndef CONFIG_KGDB /* want user access for breakpoints */ | ||
| 135 | if (flags & _PAGE_USER) { | 158 | if (flags & _PAGE_USER) { |
| 136 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | 159 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; |
| 137 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | 160 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); |
| 138 | } | 161 | } |
| 139 | #else | ||
| 140 | TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; | ||
| 141 | TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); | ||
| 142 | #endif | ||
| 143 | 162 | ||
| 144 | tlbcam_addrs[index].start = virt; | 163 | tlbcam_addrs[index].start = virt; |
| 145 | tlbcam_addrs[index].limit = virt + size - 1; | 164 | tlbcam_addrs[index].limit = virt + size - 1; |
| @@ -148,27 +167,44 @@ void settlbcam(int index, unsigned long virt, phys_addr_t phys, | |||
| 148 | loadcam_entry(index); | 167 | loadcam_entry(index); |
| 149 | } | 168 | } |
| 150 | 169 | ||
| 151 | void invalidate_tlbcam_entry(int index) | 170 | unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx) |
| 152 | { | ||
| 153 | TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); | ||
| 154 | TLBCAM[index].MAS1 = ~MAS1_VALID; | ||
| 155 | |||
| 156 | loadcam_entry(index); | ||
| 157 | } | ||
| 158 | |||
| 159 | unsigned long __init mmu_mapin_ram(void) | ||
| 160 | { | 171 | { |
| 172 | int i; | ||
| 161 | unsigned long virt = PAGE_OFFSET; | 173 | unsigned long virt = PAGE_OFFSET; |
| 162 | phys_addr_t phys = memstart_addr; | 174 | phys_addr_t phys = memstart_addr; |
| 175 | unsigned long amount_mapped = 0; | ||
| 176 | unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; | ||
| 177 | |||
| 178 | /* Convert (4^max) kB to (2^max) bytes */ | ||
| 179 | max_cam = max_cam * 2 + 10; | ||
| 180 | |||
| 181 | /* Calculate CAM values */ | ||
| 182 | for (i = 0; ram && i < max_cam_idx; i++) { | ||
| 183 | unsigned int camsize = __ilog2(ram) & ~1U; | ||
| 184 | unsigned int align = __ffs(virt | phys) & ~1U; | ||
| 185 | unsigned long cam_sz; | ||
| 186 | |||
| 187 | if (camsize > align) | ||
| 188 | camsize = align; | ||
| 189 | if (camsize > max_cam) | ||
| 190 | camsize = max_cam; | ||
| 191 | |||
| 192 | cam_sz = 1UL << camsize; | ||
| 193 | settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0); | ||
| 163 | 194 | ||
| 164 | while (tlbcam_index < ARRAY_SIZE(cam) && cam[tlbcam_index]) { | 195 | ram -= cam_sz; |
| 165 | settlbcam(tlbcam_index, virt, phys, cam[tlbcam_index], PAGE_KERNEL_X, 0); | 196 | amount_mapped += cam_sz; |
| 166 | virt += cam[tlbcam_index]; | 197 | virt += cam_sz; |
| 167 | phys += cam[tlbcam_index]; | 198 | phys += cam_sz; |
| 168 | tlbcam_index++; | ||
| 169 | } | 199 | } |
| 200 | tlbcam_index = i; | ||
| 201 | |||
| 202 | return amount_mapped; | ||
| 203 | } | ||
| 170 | 204 | ||
| 171 | return virt - PAGE_OFFSET; | 205 | unsigned long __init mmu_mapin_ram(unsigned long top) |
| 206 | { | ||
| 207 | return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1; | ||
| 172 | } | 208 | } |
| 173 | 209 | ||
| 174 | /* | 210 | /* |
| @@ -179,46 +215,21 @@ void __init MMU_init_hw(void) | |||
| 179 | flush_instruction_cache(); | 215 | flush_instruction_cache(); |
| 180 | } | 216 | } |
| 181 | 217 | ||
| 182 | void __init | 218 | void __init adjust_total_lowmem(void) |
| 183 | adjust_total_lowmem(void) | ||
| 184 | { | 219 | { |
| 185 | phys_addr_t ram; | 220 | unsigned long ram; |
| 186 | unsigned int max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xff; | ||
| 187 | char buf[ARRAY_SIZE(cam) * 5 + 1], *p = buf; | ||
| 188 | int i; | 221 | int i; |
| 189 | unsigned long virt = PAGE_OFFSET & 0xffffffffUL; | ||
| 190 | unsigned long phys = memstart_addr & 0xffffffffUL; | ||
| 191 | |||
| 192 | /* Convert (4^max) kB to (2^max) bytes */ | ||
| 193 | max_cam = max_cam * 2 + 10; | ||
| 194 | 222 | ||
| 195 | /* adjust lowmem size to __max_low_memory */ | 223 | /* adjust lowmem size to __max_low_memory */ |
| 196 | ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); | 224 | ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem); |
| 197 | 225 | ||
| 198 | /* Calculate CAM values */ | 226 | __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM); |
| 199 | __max_low_memory = 0; | ||
| 200 | for (i = 0; ram && i < ARRAY_SIZE(cam); i++) { | ||
| 201 | unsigned int camsize = __ilog2(ram) & ~1U; | ||
| 202 | unsigned int align = __ffs(virt | phys) & ~1U; | ||
| 203 | 227 | ||
| 204 | if (camsize > align) | 228 | pr_info("Memory CAM mapping: "); |
| 205 | camsize = align; | 229 | for (i = 0; i < tlbcam_index - 1; i++) |
| 206 | if (camsize > max_cam) | 230 | pr_cont("%lu/", tlbcam_sz(i) >> 20); |
| 207 | camsize = max_cam; | 231 | pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20, |
| 208 | |||
| 209 | cam[i] = 1UL << camsize; | ||
| 210 | ram -= cam[i]; | ||
| 211 | __max_low_memory += cam[i]; | ||
| 212 | virt += cam[i]; | ||
| 213 | phys += cam[i]; | ||
| 214 | |||
| 215 | p += sprintf(p, "%lu/", cam[i] >> 20); | ||
| 216 | } | ||
| 217 | for (; i < ARRAY_SIZE(cam); i++) | ||
| 218 | p += sprintf(p, "0/"); | ||
| 219 | p[-1] = '\0'; | ||
| 220 | |||
| 221 | pr_info("Memory CAM mapping: %s Mb, residual: %dMb\n", buf, | ||
| 222 | (unsigned int)((total_lowmem - __max_low_memory) >> 20)); | 232 | (unsigned int)((total_lowmem - __max_low_memory) >> 20)); |
| 233 | |||
| 223 | __initial_memory_limit_addr = memstart_addr + __max_low_memory; | 234 | __initial_memory_limit_addr = memstart_addr + __max_low_memory; |
| 224 | } | 235 | } |
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index bc122a120bf0..d7efdbf640c7 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c | |||
| @@ -55,57 +55,6 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
| 55 | return 1; | 55 | return 1; |
| 56 | } | 56 | } |
| 57 | 57 | ||
| 58 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 59 | static noinline int gup_huge_pte(pte_t *ptep, struct hstate *hstate, | ||
| 60 | unsigned long *addr, unsigned long end, | ||
| 61 | int write, struct page **pages, int *nr) | ||
| 62 | { | ||
| 63 | unsigned long mask; | ||
| 64 | unsigned long pte_end; | ||
| 65 | struct page *head, *page; | ||
| 66 | pte_t pte; | ||
| 67 | int refs; | ||
| 68 | |||
| 69 | pte_end = (*addr + huge_page_size(hstate)) & huge_page_mask(hstate); | ||
| 70 | if (pte_end < end) | ||
| 71 | end = pte_end; | ||
| 72 | |||
| 73 | pte = *ptep; | ||
| 74 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
| 75 | if (write) | ||
| 76 | mask |= _PAGE_RW; | ||
| 77 | if ((pte_val(pte) & mask) != mask) | ||
| 78 | return 0; | ||
| 79 | /* hugepages are never "special" */ | ||
| 80 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
| 81 | |||
| 82 | refs = 0; | ||
| 83 | head = pte_page(pte); | ||
| 84 | page = head + ((*addr & ~huge_page_mask(hstate)) >> PAGE_SHIFT); | ||
| 85 | do { | ||
| 86 | VM_BUG_ON(compound_head(page) != head); | ||
| 87 | pages[*nr] = page; | ||
| 88 | (*nr)++; | ||
| 89 | page++; | ||
| 90 | refs++; | ||
| 91 | } while (*addr += PAGE_SIZE, *addr != end); | ||
| 92 | |||
| 93 | if (!page_cache_add_speculative(head, refs)) { | ||
| 94 | *nr -= refs; | ||
| 95 | return 0; | ||
| 96 | } | ||
| 97 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
| 98 | /* Could be optimized better */ | ||
| 99 | while (*nr) { | ||
| 100 | put_page(page); | ||
| 101 | (*nr)--; | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | return 1; | ||
| 106 | } | ||
| 107 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
| 108 | |||
| 109 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | 58 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, |
| 110 | int write, struct page **pages, int *nr) | 59 | int write, struct page **pages, int *nr) |
| 111 | { | 60 | { |
| @@ -119,7 +68,11 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
| 119 | next = pmd_addr_end(addr, end); | 68 | next = pmd_addr_end(addr, end); |
| 120 | if (pmd_none(pmd)) | 69 | if (pmd_none(pmd)) |
| 121 | return 0; | 70 | return 0; |
| 122 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | 71 | if (is_hugepd(pmdp)) { |
| 72 | if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT, | ||
| 73 | addr, next, write, pages, nr)) | ||
| 74 | return 0; | ||
| 75 | } else if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
| 123 | return 0; | 76 | return 0; |
| 124 | } while (pmdp++, addr = next, addr != end); | 77 | } while (pmdp++, addr = next, addr != end); |
| 125 | 78 | ||
| @@ -139,7 +92,11 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
| 139 | next = pud_addr_end(addr, end); | 92 | next = pud_addr_end(addr, end); |
| 140 | if (pud_none(pud)) | 93 | if (pud_none(pud)) |
| 141 | return 0; | 94 | return 0; |
| 142 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | 95 | if (is_hugepd(pudp)) { |
| 96 | if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT, | ||
| 97 | addr, next, write, pages, nr)) | ||
| 98 | return 0; | ||
| 99 | } else if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
| 143 | return 0; | 100 | return 0; |
| 144 | } while (pudp++, addr = next, addr != end); | 101 | } while (pudp++, addr = next, addr != end); |
| 145 | 102 | ||
| @@ -154,10 +111,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
| 154 | unsigned long next; | 111 | unsigned long next; |
| 155 | pgd_t *pgdp; | 112 | pgd_t *pgdp; |
| 156 | int nr = 0; | 113 | int nr = 0; |
| 157 | #ifdef CONFIG_PPC64 | ||
| 158 | unsigned int shift; | ||
| 159 | int psize; | ||
| 160 | #endif | ||
| 161 | 114 | ||
| 162 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); | 115 | pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read"); |
| 163 | 116 | ||
| @@ -172,25 +125,6 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
| 172 | 125 | ||
| 173 | pr_devel(" aligned: %lx .. %lx\n", start, end); | 126 | pr_devel(" aligned: %lx .. %lx\n", start, end); |
| 174 | 127 | ||
| 175 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 176 | /* We bail out on slice boundary crossing when hugetlb is | ||
| 177 | * enabled in order to not have to deal with two different | ||
| 178 | * page table formats | ||
| 179 | */ | ||
| 180 | if (addr < SLICE_LOW_TOP) { | ||
| 181 | if (end > SLICE_LOW_TOP) | ||
| 182 | goto slow_irqon; | ||
| 183 | |||
| 184 | if (unlikely(GET_LOW_SLICE_INDEX(addr) != | ||
| 185 | GET_LOW_SLICE_INDEX(end - 1))) | ||
| 186 | goto slow_irqon; | ||
| 187 | } else { | ||
| 188 | if (unlikely(GET_HIGH_SLICE_INDEX(addr) != | ||
| 189 | GET_HIGH_SLICE_INDEX(end - 1))) | ||
| 190 | goto slow_irqon; | ||
| 191 | } | ||
| 192 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
| 193 | |||
| 194 | /* | 128 | /* |
| 195 | * XXX: batch / limit 'nr', to avoid large irq off latency | 129 | * XXX: batch / limit 'nr', to avoid large irq off latency |
| 196 | * needs some instrumenting to determine the common sizes used by | 130 | * needs some instrumenting to determine the common sizes used by |
| @@ -210,54 +144,23 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
| 210 | */ | 144 | */ |
| 211 | local_irq_disable(); | 145 | local_irq_disable(); |
| 212 | 146 | ||
| 213 | #ifdef CONFIG_PPC64 | 147 | pgdp = pgd_offset(mm, addr); |
| 214 | /* Those bits are related to hugetlbfs implementation and only exist | 148 | do { |
| 215 | * on 64-bit for now | 149 | pgd_t pgd = *pgdp; |
| 216 | */ | 150 | |
| 217 | psize = get_slice_psize(mm, addr); | 151 | pr_devel(" %016lx: normal pgd %p\n", addr, |
| 218 | shift = mmu_psize_defs[psize].shift; | 152 | (void *)pgd_val(pgd)); |
| 219 | #endif /* CONFIG_PPC64 */ | 153 | next = pgd_addr_end(addr, end); |
| 220 | 154 | if (pgd_none(pgd)) | |
| 221 | #ifdef CONFIG_HUGETLB_PAGE | 155 | goto slow; |
| 222 | if (unlikely(mmu_huge_psizes[psize])) { | 156 | if (is_hugepd(pgdp)) { |
| 223 | pte_t *ptep; | 157 | if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT, |
| 224 | unsigned long a = addr; | 158 | addr, next, write, pages, &nr)) |
| 225 | unsigned long sz = ((1UL) << shift); | ||
| 226 | struct hstate *hstate = size_to_hstate(sz); | ||
| 227 | |||
| 228 | BUG_ON(!hstate); | ||
| 229 | /* | ||
| 230 | * XXX: could be optimized to avoid hstate | ||
| 231 | * lookup entirely (just use shift) | ||
| 232 | */ | ||
| 233 | |||
| 234 | do { | ||
| 235 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, a)].shift); | ||
| 236 | ptep = huge_pte_offset(mm, a); | ||
| 237 | pr_devel(" %016lx: huge ptep %p\n", a, ptep); | ||
| 238 | if (!ptep || !gup_huge_pte(ptep, hstate, &a, end, write, pages, | ||
| 239 | &nr)) | ||
| 240 | goto slow; | ||
| 241 | } while (a != end); | ||
| 242 | } else | ||
| 243 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
| 244 | { | ||
| 245 | pgdp = pgd_offset(mm, addr); | ||
| 246 | do { | ||
| 247 | pgd_t pgd = *pgdp; | ||
| 248 | |||
| 249 | #ifdef CONFIG_PPC64 | ||
| 250 | VM_BUG_ON(shift != mmu_psize_defs[get_slice_psize(mm, addr)].shift); | ||
| 251 | #endif | ||
| 252 | pr_devel(" %016lx: normal pgd %p\n", addr, | ||
| 253 | (void *)pgd_val(pgd)); | ||
| 254 | next = pgd_addr_end(addr, end); | ||
| 255 | if (pgd_none(pgd)) | ||
| 256 | goto slow; | ||
| 257 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
| 258 | goto slow; | 159 | goto slow; |
| 259 | } while (pgdp++, addr = next, addr != end); | 160 | } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) |
| 260 | } | 161 | goto slow; |
| 162 | } while (pgdp++, addr = next, addr != end); | ||
| 163 | |||
| 261 | local_irq_enable(); | 164 | local_irq_enable(); |
| 262 | 165 | ||
| 263 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | 166 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); |
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 056d23a1b105..784a400e0781 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
| @@ -37,7 +37,7 @@ | |||
| 37 | 37 | ||
| 38 | #define HPTE_LOCK_BIT 3 | 38 | #define HPTE_LOCK_BIT 3 |
| 39 | 39 | ||
| 40 | static DEFINE_SPINLOCK(native_tlbie_lock); | 40 | static DEFINE_RAW_SPINLOCK(native_tlbie_lock); |
| 41 | 41 | ||
| 42 | static inline void __tlbie(unsigned long va, int psize, int ssize) | 42 | static inline void __tlbie(unsigned long va, int psize, int ssize) |
| 43 | { | 43 | { |
| @@ -104,7 +104,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) | |||
| 104 | if (use_local) | 104 | if (use_local) |
| 105 | use_local = mmu_psize_defs[psize].tlbiel; | 105 | use_local = mmu_psize_defs[psize].tlbiel; |
| 106 | if (lock_tlbie && !use_local) | 106 | if (lock_tlbie && !use_local) |
| 107 | spin_lock(&native_tlbie_lock); | 107 | raw_spin_lock(&native_tlbie_lock); |
| 108 | asm volatile("ptesync": : :"memory"); | 108 | asm volatile("ptesync": : :"memory"); |
| 109 | if (use_local) { | 109 | if (use_local) { |
| 110 | __tlbiel(va, psize, ssize); | 110 | __tlbiel(va, psize, ssize); |
| @@ -114,7 +114,7 @@ static inline void tlbie(unsigned long va, int psize, int ssize, int local) | |||
| 114 | asm volatile("eieio; tlbsync; ptesync": : :"memory"); | 114 | asm volatile("eieio; tlbsync; ptesync": : :"memory"); |
| 115 | } | 115 | } |
| 116 | if (lock_tlbie && !use_local) | 116 | if (lock_tlbie && !use_local) |
| 117 | spin_unlock(&native_tlbie_lock); | 117 | raw_spin_unlock(&native_tlbie_lock); |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | static inline void native_lock_hpte(struct hash_pte *hptep) | 120 | static inline void native_lock_hpte(struct hash_pte *hptep) |
| @@ -122,7 +122,7 @@ static inline void native_lock_hpte(struct hash_pte *hptep) | |||
| 122 | unsigned long *word = &hptep->v; | 122 | unsigned long *word = &hptep->v; |
| 123 | 123 | ||
| 124 | while (1) { | 124 | while (1) { |
| 125 | if (!test_and_set_bit(HPTE_LOCK_BIT, word)) | 125 | if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word)) |
| 126 | break; | 126 | break; |
| 127 | while(test_bit(HPTE_LOCK_BIT, word)) | 127 | while(test_bit(HPTE_LOCK_BIT, word)) |
| 128 | cpu_relax(); | 128 | cpu_relax(); |
| @@ -133,8 +133,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep) | |||
| 133 | { | 133 | { |
| 134 | unsigned long *word = &hptep->v; | 134 | unsigned long *word = &hptep->v; |
| 135 | 135 | ||
| 136 | asm volatile("lwsync":::"memory"); | 136 | clear_bit_unlock(HPTE_LOCK_BIT, word); |
| 137 | clear_bit(HPTE_LOCK_BIT, word); | ||
| 138 | } | 137 | } |
| 139 | 138 | ||
| 140 | static long native_hpte_insert(unsigned long hpte_group, unsigned long va, | 139 | static long native_hpte_insert(unsigned long hpte_group, unsigned long va, |
| @@ -434,7 +433,7 @@ static void native_hpte_clear(void) | |||
| 434 | /* we take the tlbie lock and hold it. Some hardware will | 433 | /* we take the tlbie lock and hold it. Some hardware will |
| 435 | * deadlock if we try to tlbie from two processors at once. | 434 | * deadlock if we try to tlbie from two processors at once. |
| 436 | */ | 435 | */ |
| 437 | spin_lock(&native_tlbie_lock); | 436 | raw_spin_lock(&native_tlbie_lock); |
| 438 | 437 | ||
| 439 | slots = pteg_count * HPTES_PER_GROUP; | 438 | slots = pteg_count * HPTES_PER_GROUP; |
| 440 | 439 | ||
| @@ -458,7 +457,7 @@ static void native_hpte_clear(void) | |||
| 458 | } | 457 | } |
| 459 | 458 | ||
| 460 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | 459 | asm volatile("eieio; tlbsync; ptesync":::"memory"); |
| 461 | spin_unlock(&native_tlbie_lock); | 460 | raw_spin_unlock(&native_tlbie_lock); |
| 462 | local_irq_restore(flags); | 461 | local_irq_restore(flags); |
| 463 | } | 462 | } |
| 464 | 463 | ||
| @@ -521,7 +520,7 @@ static void native_flush_hash_range(unsigned long number, int local) | |||
| 521 | int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); | 520 | int lock_tlbie = !cpu_has_feature(CPU_FTR_LOCKLESS_TLBIE); |
| 522 | 521 | ||
| 523 | if (lock_tlbie) | 522 | if (lock_tlbie) |
| 524 | spin_lock(&native_tlbie_lock); | 523 | raw_spin_lock(&native_tlbie_lock); |
| 525 | 524 | ||
| 526 | asm volatile("ptesync":::"memory"); | 525 | asm volatile("ptesync":::"memory"); |
| 527 | for (i = 0; i < number; i++) { | 526 | for (i = 0; i < number; i++) { |
| @@ -536,7 +535,7 @@ static void native_flush_hash_range(unsigned long number, int local) | |||
| 536 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | 535 | asm volatile("eieio; tlbsync; ptesync":::"memory"); |
| 537 | 536 | ||
| 538 | if (lock_tlbie) | 537 | if (lock_tlbie) |
| 539 | spin_unlock(&native_tlbie_lock); | 538 | raw_spin_unlock(&native_tlbie_lock); |
| 540 | } | 539 | } |
| 541 | 540 | ||
| 542 | local_irq_restore(flags); | 541 | local_irq_restore(flags); |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1ade7eb6ae00..3ecdcec0a39e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
| @@ -92,6 +92,7 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; | |||
| 92 | struct hash_pte *htab_address; | 92 | struct hash_pte *htab_address; |
| 93 | unsigned long htab_size_bytes; | 93 | unsigned long htab_size_bytes; |
| 94 | unsigned long htab_hash_mask; | 94 | unsigned long htab_hash_mask; |
| 95 | EXPORT_SYMBOL_GPL(htab_hash_mask); | ||
| 95 | int mmu_linear_psize = MMU_PAGE_4K; | 96 | int mmu_linear_psize = MMU_PAGE_4K; |
| 96 | int mmu_virtual_psize = MMU_PAGE_4K; | 97 | int mmu_virtual_psize = MMU_PAGE_4K; |
| 97 | int mmu_vmalloc_psize = MMU_PAGE_4K; | 98 | int mmu_vmalloc_psize = MMU_PAGE_4K; |
| @@ -102,6 +103,7 @@ int mmu_io_psize = MMU_PAGE_4K; | |||
| 102 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; | 103 | int mmu_kernel_ssize = MMU_SEGSIZE_256M; |
| 103 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; | 104 | int mmu_highuser_ssize = MMU_SEGSIZE_256M; |
| 104 | u16 mmu_slb_size = 64; | 105 | u16 mmu_slb_size = 64; |
| 106 | EXPORT_SYMBOL_GPL(mmu_slb_size); | ||
| 105 | #ifdef CONFIG_HUGETLB_PAGE | 107 | #ifdef CONFIG_HUGETLB_PAGE |
| 106 | unsigned int HPAGE_SHIFT; | 108 | unsigned int HPAGE_SHIFT; |
| 107 | #endif | 109 | #endif |
| @@ -338,7 +340,7 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, | |||
| 338 | else | 340 | else |
| 339 | def->tlbiel = 0; | 341 | def->tlbiel = 0; |
| 340 | 342 | ||
| 341 | DBG(" %d: shift=%02x, sllp=%04x, avpnm=%08x, " | 343 | DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, " |
| 342 | "tlbiel=%d, penc=%d\n", | 344 | "tlbiel=%d, penc=%d\n", |
| 343 | idx, shift, def->sllp, def->avpnm, def->tlbiel, | 345 | idx, shift, def->sllp, def->avpnm, def->tlbiel, |
| 344 | def->penc); | 346 | def->penc); |
| @@ -481,16 +483,6 @@ static void __init htab_init_page_sizes(void) | |||
| 481 | #ifdef CONFIG_HUGETLB_PAGE | 483 | #ifdef CONFIG_HUGETLB_PAGE |
| 482 | /* Reserve 16G huge page memory sections for huge pages */ | 484 | /* Reserve 16G huge page memory sections for huge pages */ |
| 483 | of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); | 485 | of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); |
| 484 | |||
| 485 | /* Set default large page size. Currently, we pick 16M or 1M depending | ||
| 486 | * on what is available | ||
| 487 | */ | ||
| 488 | if (mmu_psize_defs[MMU_PAGE_16M].shift) | ||
| 489 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; | ||
| 490 | /* With 4k/4level pagetables, we can't (for now) cope with a | ||
| 491 | * huge page size < PMD_SIZE */ | ||
| 492 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | ||
| 493 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; | ||
| 494 | #endif /* CONFIG_HUGETLB_PAGE */ | 486 | #endif /* CONFIG_HUGETLB_PAGE */ |
| 495 | } | 487 | } |
| 496 | 488 | ||
| @@ -671,7 +663,7 @@ static void __init htab_initialize(void) | |||
| 671 | base = (unsigned long)__va(lmb.memory.region[i].base); | 663 | base = (unsigned long)__va(lmb.memory.region[i].base); |
| 672 | size = lmb.memory.region[i].size; | 664 | size = lmb.memory.region[i].size; |
| 673 | 665 | ||
| 674 | DBG("creating mapping for region: %lx..%lx (prot: %x)\n", | 666 | DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", |
| 675 | base, size, prot); | 667 | base, size, prot); |
| 676 | 668 | ||
| 677 | #ifdef CONFIG_U3_DART | 669 | #ifdef CONFIG_U3_DART |
| @@ -785,7 +777,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) | |||
| 785 | /* page is dirty */ | 777 | /* page is dirty */ |
| 786 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | 778 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { |
| 787 | if (trap == 0x400) { | 779 | if (trap == 0x400) { |
| 788 | __flush_dcache_icache(page_address(page)); | 780 | flush_dcache_icache_page(page); |
| 789 | set_bit(PG_arch_1, &page->flags); | 781 | set_bit(PG_arch_1, &page->flags); |
| 790 | } else | 782 | } else |
| 791 | pp |= HPTE_R_N; | 783 | pp |= HPTE_R_N; |
| @@ -843,9 +835,9 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) | |||
| 843 | * Result is 0: full permissions, _PAGE_RW: read-only, | 835 | * Result is 0: full permissions, _PAGE_RW: read-only, |
| 844 | * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. | 836 | * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. |
| 845 | */ | 837 | */ |
| 846 | static int subpage_protection(pgd_t *pgdir, unsigned long ea) | 838 | static int subpage_protection(struct mm_struct *mm, unsigned long ea) |
| 847 | { | 839 | { |
| 848 | struct subpage_prot_table *spt = pgd_subpage_prot(pgdir); | 840 | struct subpage_prot_table *spt = &mm->context.spt; |
| 849 | u32 spp = 0; | 841 | u32 spp = 0; |
| 850 | u32 **sbpm, *sbpp; | 842 | u32 **sbpm, *sbpp; |
| 851 | 843 | ||
| @@ -873,7 +865,7 @@ static int subpage_protection(pgd_t *pgdir, unsigned long ea) | |||
| 873 | } | 865 | } |
| 874 | 866 | ||
| 875 | #else /* CONFIG_PPC_SUBPAGE_PROT */ | 867 | #else /* CONFIG_PPC_SUBPAGE_PROT */ |
| 876 | static inline int subpage_protection(pgd_t *pgdir, unsigned long ea) | 868 | static inline int subpage_protection(struct mm_struct *mm, unsigned long ea) |
| 877 | { | 869 | { |
| 878 | return 0; | 870 | return 0; |
| 879 | } | 871 | } |
| @@ -887,10 +879,11 @@ static inline int subpage_protection(pgd_t *pgdir, unsigned long ea) | |||
| 887 | */ | 879 | */ |
| 888 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | 880 | int hash_page(unsigned long ea, unsigned long access, unsigned long trap) |
| 889 | { | 881 | { |
| 890 | void *pgdir; | 882 | pgd_t *pgdir; |
| 891 | unsigned long vsid; | 883 | unsigned long vsid; |
| 892 | struct mm_struct *mm; | 884 | struct mm_struct *mm; |
| 893 | pte_t *ptep; | 885 | pte_t *ptep; |
| 886 | unsigned hugeshift; | ||
| 894 | const struct cpumask *tmp; | 887 | const struct cpumask *tmp; |
| 895 | int rc, user_region = 0, local = 0; | 888 | int rc, user_region = 0, local = 0; |
| 896 | int psize, ssize; | 889 | int psize, ssize; |
| @@ -943,30 +936,31 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
| 943 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) | 936 | if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) |
| 944 | local = 1; | 937 | local = 1; |
| 945 | 938 | ||
| 946 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 947 | /* Handle hugepage regions */ | ||
| 948 | if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { | ||
| 949 | DBG_LOW(" -> huge page !\n"); | ||
| 950 | return hash_huge_page(mm, access, ea, vsid, local, trap); | ||
| 951 | } | ||
| 952 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
| 953 | |||
| 954 | #ifndef CONFIG_PPC_64K_PAGES | 939 | #ifndef CONFIG_PPC_64K_PAGES |
| 955 | /* If we use 4K pages and our psize is not 4K, then we are hitting | 940 | /* If we use 4K pages and our psize is not 4K, then we might |
| 956 | * a special driver mapping, we need to align the address before | 941 | * be hitting a special driver mapping, and need to align the |
| 957 | * we fetch the PTE | 942 | * address before we fetch the PTE. |
| 943 | * | ||
| 944 | * It could also be a hugepage mapping, in which case this is | ||
| 945 | * not necessary, but it's not harmful, either. | ||
| 958 | */ | 946 | */ |
| 959 | if (psize != MMU_PAGE_4K) | 947 | if (psize != MMU_PAGE_4K) |
| 960 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); | 948 | ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1); |
| 961 | #endif /* CONFIG_PPC_64K_PAGES */ | 949 | #endif /* CONFIG_PPC_64K_PAGES */ |
| 962 | 950 | ||
| 963 | /* Get PTE and page size from page tables */ | 951 | /* Get PTE and page size from page tables */ |
| 964 | ptep = find_linux_pte(pgdir, ea); | 952 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift); |
| 965 | if (ptep == NULL || !pte_present(*ptep)) { | 953 | if (ptep == NULL || !pte_present(*ptep)) { |
| 966 | DBG_LOW(" no PTE !\n"); | 954 | DBG_LOW(" no PTE !\n"); |
| 967 | return 1; | 955 | return 1; |
| 968 | } | 956 | } |
| 969 | 957 | ||
| 958 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 959 | if (hugeshift) | ||
| 960 | return __hash_page_huge(ea, access, vsid, ptep, trap, local, | ||
| 961 | ssize, hugeshift, psize); | ||
| 962 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
| 963 | |||
| 970 | #ifndef CONFIG_PPC_64K_PAGES | 964 | #ifndef CONFIG_PPC_64K_PAGES |
| 971 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); | 965 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); |
| 972 | #else | 966 | #else |
| @@ -1031,7 +1025,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
| 1031 | else | 1025 | else |
| 1032 | #endif /* CONFIG_PPC_HAS_HASH_64K */ | 1026 | #endif /* CONFIG_PPC_HAS_HASH_64K */ |
| 1033 | { | 1027 | { |
| 1034 | int spp = subpage_protection(pgdir, ea); | 1028 | int spp = subpage_protection(mm, ea); |
| 1035 | if (access & spp) | 1029 | if (access & spp) |
| 1036 | rc = -2; | 1030 | rc = -2; |
| 1037 | else | 1031 | else |
| @@ -1121,7 +1115,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, | |||
| 1121 | { | 1115 | { |
| 1122 | unsigned long hash, index, shift, hidx, slot; | 1116 | unsigned long hash, index, shift, hidx, slot; |
| 1123 | 1117 | ||
| 1124 | DBG_LOW("flush_hash_page(va=%016x)\n", va); | 1118 | DBG_LOW("flush_hash_page(va=%016lx)\n", va); |
| 1125 | pte_iterate_hashed_subpages(pte, psize, va, index, shift) { | 1119 | pte_iterate_hashed_subpages(pte, psize, va, index, shift) { |
| 1126 | hash = hpt_hash(va, shift, ssize); | 1120 | hash = hpt_hash(va, shift, ssize); |
| 1127 | hidx = __rpte_to_hidx(pte, index); | 1121 | hidx = __rpte_to_hidx(pte, index); |
| @@ -1129,7 +1123,7 @@ void flush_hash_page(unsigned long va, real_pte_t pte, int psize, int ssize, | |||
| 1129 | hash = ~hash; | 1123 | hash = ~hash; |
| 1130 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1124 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
| 1131 | slot += hidx & _PTEIDX_GROUP_IX; | 1125 | slot += hidx & _PTEIDX_GROUP_IX; |
| 1132 | DBG_LOW(" sub %d: hash=%x, hidx=%x\n", index, slot, hidx); | 1126 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); |
| 1133 | ppc_md.hpte_invalidate(slot, va, psize, ssize, local); | 1127 | ppc_md.hpte_invalidate(slot, va, psize, ssize, local); |
| 1134 | } pte_iterate_hashed_end(); | 1128 | } pte_iterate_hashed_end(); |
| 1135 | } | 1129 | } |
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c new file mode 100644 index 000000000000..199539882f92 --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage-hash64.c | |||
| @@ -0,0 +1,139 @@ | |||
| 1 | /* | ||
| 2 | * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later) | ||
| 3 | * | ||
| 4 | * Copyright (C) 2003 David Gibson, IBM Corporation. | ||
| 5 | * | ||
| 6 | * Based on the IA-32 version: | ||
| 7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/mm.h> | ||
| 11 | #include <linux/hugetlb.h> | ||
| 12 | #include <asm/pgtable.h> | ||
| 13 | #include <asm/pgalloc.h> | ||
| 14 | #include <asm/cacheflush.h> | ||
| 15 | #include <asm/machdep.h> | ||
| 16 | |||
| 17 | int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, | ||
| 18 | pte_t *ptep, unsigned long trap, int local, int ssize, | ||
| 19 | unsigned int shift, unsigned int mmu_psize) | ||
| 20 | { | ||
| 21 | unsigned long old_pte, new_pte; | ||
| 22 | unsigned long va, rflags, pa, sz; | ||
| 23 | long slot; | ||
| 24 | int err = 1; | ||
| 25 | |||
| 26 | BUG_ON(shift != mmu_psize_defs[mmu_psize].shift); | ||
| 27 | |||
| 28 | /* Search the Linux page table for a match with va */ | ||
| 29 | va = hpt_va(ea, vsid, ssize); | ||
| 30 | |||
| 31 | /* | ||
| 32 | * Check the user's access rights to the page. If access should be | ||
| 33 | * prevented then send the problem up to do_page_fault. | ||
| 34 | */ | ||
| 35 | if (unlikely(access & ~pte_val(*ptep))) | ||
| 36 | goto out; | ||
| 37 | /* | ||
| 38 | * At this point, we have a pte (old_pte) which can be used to build | ||
| 39 | * or update an HPTE. There are 2 cases: | ||
| 40 | * | ||
| 41 | * 1. There is a valid (present) pte with no associated HPTE (this is | ||
| 42 | * the most common case) | ||
| 43 | * 2. There is a valid (present) pte with an associated HPTE. The | ||
| 44 | * current values of the pp bits in the HPTE prevent access | ||
| 45 | * because we are doing software DIRTY bit management and the | ||
| 46 | * page is currently not DIRTY. | ||
| 47 | */ | ||
| 48 | |||
| 49 | |||
| 50 | do { | ||
| 51 | old_pte = pte_val(*ptep); | ||
| 52 | if (old_pte & _PAGE_BUSY) | ||
| 53 | goto out; | ||
| 54 | new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; | ||
| 55 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
| 56 | old_pte, new_pte)); | ||
| 57 | |||
| 58 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
| 59 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | ||
| 60 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
| 61 | sz = ((1UL) << shift); | ||
| 62 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
| 63 | /* No CPU has hugepages but lacks no execute, so we | ||
| 64 | * don't need to worry about that case */ | ||
| 65 | rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); | ||
| 66 | |||
| 67 | /* Check if pte already has an hpte (case 2) */ | ||
| 68 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | ||
| 69 | /* There MIGHT be an HPTE for this pte */ | ||
| 70 | unsigned long hash, slot; | ||
| 71 | |||
| 72 | hash = hpt_hash(va, shift, ssize); | ||
| 73 | if (old_pte & _PAGE_F_SECOND) | ||
| 74 | hash = ~hash; | ||
| 75 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
| 76 | slot += (old_pte & _PAGE_F_GIX) >> 12; | ||
| 77 | |||
| 78 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, | ||
| 79 | ssize, local) == -1) | ||
| 80 | old_pte &= ~_PAGE_HPTEFLAGS; | ||
| 81 | } | ||
| 82 | |||
| 83 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | ||
| 84 | unsigned long hash = hpt_hash(va, shift, ssize); | ||
| 85 | unsigned long hpte_group; | ||
| 86 | |||
| 87 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | ||
| 88 | |||
| 89 | repeat: | ||
| 90 | hpte_group = ((hash & htab_hash_mask) * | ||
| 91 | HPTES_PER_GROUP) & ~0x7UL; | ||
| 92 | |||
| 93 | /* clear HPTE slot informations in new PTE */ | ||
| 94 | #ifdef CONFIG_PPC_64K_PAGES | ||
| 95 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; | ||
| 96 | #else | ||
| 97 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
| 98 | #endif | ||
| 99 | /* Add in WIMG bits */ | ||
| 100 | rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
| 101 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
| 102 | |||
| 103 | /* Insert into the hash table, primary slot */ | ||
| 104 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | ||
| 105 | mmu_psize, ssize); | ||
| 106 | |||
| 107 | /* Primary is full, try the secondary */ | ||
| 108 | if (unlikely(slot == -1)) { | ||
| 109 | hpte_group = ((~hash & htab_hash_mask) * | ||
| 110 | HPTES_PER_GROUP) & ~0x7UL; | ||
| 111 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | ||
| 112 | HPTE_V_SECONDARY, | ||
| 113 | mmu_psize, ssize); | ||
| 114 | if (slot == -1) { | ||
| 115 | if (mftb() & 0x1) | ||
| 116 | hpte_group = ((hash & htab_hash_mask) * | ||
| 117 | HPTES_PER_GROUP)&~0x7UL; | ||
| 118 | |||
| 119 | ppc_md.hpte_remove(hpte_group); | ||
| 120 | goto repeat; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | |||
| 124 | if (unlikely(slot == -2)) | ||
| 125 | panic("hash_huge_page: pte_insert failed\n"); | ||
| 126 | |||
| 127 | new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); | ||
| 128 | } | ||
| 129 | |||
| 130 | /* | ||
| 131 | * No need to use ldarx/stdcx here | ||
| 132 | */ | ||
| 133 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
| 134 | |||
| 135 | err = 0; | ||
| 136 | |||
| 137 | out: | ||
| 138 | return err; | ||
| 139 | } | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 90df6ffe3a43..9bb249c3046e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
| @@ -7,29 +7,18 @@ | |||
| 7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> | 7 | * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include <linux/init.h> | ||
| 11 | #include <linux/fs.h> | ||
| 12 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
| 13 | #include <linux/hugetlb.h> | 11 | #include <linux/io.h> |
| 14 | #include <linux/pagemap.h> | ||
| 15 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 16 | #include <linux/err.h> | 13 | #include <linux/hugetlb.h> |
| 17 | #include <linux/sysctl.h> | 14 | #include <asm/pgtable.h> |
| 18 | #include <asm/mman.h> | ||
| 19 | #include <asm/pgalloc.h> | 15 | #include <asm/pgalloc.h> |
| 20 | #include <asm/tlb.h> | 16 | #include <asm/tlb.h> |
| 21 | #include <asm/tlbflush.h> | ||
| 22 | #include <asm/mmu_context.h> | ||
| 23 | #include <asm/machdep.h> | ||
| 24 | #include <asm/cputable.h> | ||
| 25 | #include <asm/spu.h> | ||
| 26 | 17 | ||
| 27 | #define PAGE_SHIFT_64K 16 | 18 | #define PAGE_SHIFT_64K 16 |
| 28 | #define PAGE_SHIFT_16M 24 | 19 | #define PAGE_SHIFT_16M 24 |
| 29 | #define PAGE_SHIFT_16G 34 | 20 | #define PAGE_SHIFT_16G 34 |
| 30 | 21 | ||
| 31 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | ||
| 32 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | ||
| 33 | #define MAX_NUMBER_GPAGES 1024 | 22 | #define MAX_NUMBER_GPAGES 1024 |
| 34 | 23 | ||
| 35 | /* Tracks the 16G pages after the device tree is scanned and before the | 24 | /* Tracks the 16G pages after the device tree is scanned and before the |
| @@ -37,53 +26,17 @@ | |||
| 37 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; | 26 | static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; |
| 38 | static unsigned nr_gpages; | 27 | static unsigned nr_gpages; |
| 39 | 28 | ||
| 40 | /* Array of valid huge page sizes - non-zero value(hugepte_shift) is | ||
| 41 | * stored for the huge page sizes that are valid. | ||
| 42 | */ | ||
| 43 | unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ | ||
| 44 | |||
| 45 | #define hugepte_shift mmu_huge_psizes | ||
| 46 | #define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) | ||
| 47 | #define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) | ||
| 48 | |||
| 49 | #define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ | ||
| 50 | + hugepte_shift[psize]) | ||
| 51 | #define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) | ||
| 52 | #define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) | ||
| 53 | |||
| 54 | /* Subtract one from array size because we don't need a cache for 4K since | ||
| 55 | * is not a huge page size */ | ||
| 56 | #define HUGE_PGTABLE_INDEX(psize) (HUGEPTE_CACHE_NUM + psize - 1) | ||
| 57 | #define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) | ||
| 58 | |||
| 59 | static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { | ||
| 60 | [MMU_PAGE_64K] = "hugepte_cache_64K", | ||
| 61 | [MMU_PAGE_1M] = "hugepte_cache_1M", | ||
| 62 | [MMU_PAGE_16M] = "hugepte_cache_16M", | ||
| 63 | [MMU_PAGE_16G] = "hugepte_cache_16G", | ||
| 64 | }; | ||
| 65 | |||
| 66 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | 29 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() |
| 67 | * will choke on pointers to hugepte tables, which is handy for | 30 | * will choke on pointers to hugepte tables, which is handy for |
| 68 | * catching screwups early. */ | 31 | * catching screwups early. */ |
| 69 | #define HUGEPD_OK 0x1 | ||
| 70 | |||
| 71 | typedef struct { unsigned long pd; } hugepd_t; | ||
| 72 | |||
| 73 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
| 74 | 32 | ||
| 75 | static inline int shift_to_mmu_psize(unsigned int shift) | 33 | static inline int shift_to_mmu_psize(unsigned int shift) |
| 76 | { | 34 | { |
| 77 | switch (shift) { | 35 | int psize; |
| 78 | #ifndef CONFIG_PPC_64K_PAGES | 36 | |
| 79 | case PAGE_SHIFT_64K: | 37 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) |
| 80 | return MMU_PAGE_64K; | 38 | if (mmu_psize_defs[psize].shift == shift) |
| 81 | #endif | 39 | return psize; |
| 82 | case PAGE_SHIFT_16M: | ||
| 83 | return MMU_PAGE_16M; | ||
| 84 | case PAGE_SHIFT_16G: | ||
| 85 | return MMU_PAGE_16G; | ||
| 86 | } | ||
| 87 | return -1; | 40 | return -1; |
| 88 | } | 41 | } |
| 89 | 42 | ||
| @@ -94,71 +47,126 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) | |||
| 94 | BUG(); | 47 | BUG(); |
| 95 | } | 48 | } |
| 96 | 49 | ||
| 50 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
| 51 | |||
| 97 | static inline pte_t *hugepd_page(hugepd_t hpd) | 52 | static inline pte_t *hugepd_page(hugepd_t hpd) |
| 98 | { | 53 | { |
| 99 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | 54 | BUG_ON(!hugepd_ok(hpd)); |
| 100 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | 55 | return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); |
| 56 | } | ||
| 57 | |||
| 58 | static inline unsigned int hugepd_shift(hugepd_t hpd) | ||
| 59 | { | ||
| 60 | return hpd.pd & HUGEPD_SHIFT_MASK; | ||
| 101 | } | 61 | } |
| 102 | 62 | ||
| 103 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, | 63 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) |
| 104 | struct hstate *hstate) | ||
| 105 | { | 64 | { |
| 106 | unsigned int shift = huge_page_shift(hstate); | 65 | unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); |
| 107 | int psize = shift_to_mmu_psize(shift); | ||
| 108 | unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); | ||
| 109 | pte_t *dir = hugepd_page(*hpdp); | 66 | pte_t *dir = hugepd_page(*hpdp); |
| 110 | 67 | ||
| 111 | return dir + idx; | 68 | return dir + idx; |
| 112 | } | 69 | } |
| 113 | 70 | ||
| 71 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
| 72 | { | ||
| 73 | pgd_t *pg; | ||
| 74 | pud_t *pu; | ||
| 75 | pmd_t *pm; | ||
| 76 | hugepd_t *hpdp = NULL; | ||
| 77 | unsigned pdshift = PGDIR_SHIFT; | ||
| 78 | |||
| 79 | if (shift) | ||
| 80 | *shift = 0; | ||
| 81 | |||
| 82 | pg = pgdir + pgd_index(ea); | ||
| 83 | if (is_hugepd(pg)) { | ||
| 84 | hpdp = (hugepd_t *)pg; | ||
| 85 | } else if (!pgd_none(*pg)) { | ||
| 86 | pdshift = PUD_SHIFT; | ||
| 87 | pu = pud_offset(pg, ea); | ||
| 88 | if (is_hugepd(pu)) | ||
| 89 | hpdp = (hugepd_t *)pu; | ||
| 90 | else if (!pud_none(*pu)) { | ||
| 91 | pdshift = PMD_SHIFT; | ||
| 92 | pm = pmd_offset(pu, ea); | ||
| 93 | if (is_hugepd(pm)) | ||
| 94 | hpdp = (hugepd_t *)pm; | ||
| 95 | else if (!pmd_none(*pm)) { | ||
| 96 | return pte_offset_map(pm, ea); | ||
| 97 | } | ||
| 98 | } | ||
| 99 | } | ||
| 100 | |||
| 101 | if (!hpdp) | ||
| 102 | return NULL; | ||
| 103 | |||
| 104 | if (shift) | ||
| 105 | *shift = hugepd_shift(*hpdp); | ||
| 106 | return hugepte_offset(hpdp, ea, pdshift); | ||
| 107 | } | ||
| 108 | |||
| 109 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
| 110 | { | ||
| 111 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | ||
| 112 | } | ||
| 113 | |||
| 114 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | 114 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, |
| 115 | unsigned long address, unsigned int psize) | 115 | unsigned long address, unsigned pdshift, unsigned pshift) |
| 116 | { | 116 | { |
| 117 | pte_t *new = kmem_cache_zalloc(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], | 117 | pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), |
| 118 | GFP_KERNEL|__GFP_REPEAT); | 118 | GFP_KERNEL|__GFP_REPEAT); |
| 119 | |||
| 120 | BUG_ON(pshift > HUGEPD_SHIFT_MASK); | ||
| 121 | BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); | ||
| 119 | 122 | ||
| 120 | if (! new) | 123 | if (! new) |
| 121 | return -ENOMEM; | 124 | return -ENOMEM; |
| 122 | 125 | ||
| 123 | spin_lock(&mm->page_table_lock); | 126 | spin_lock(&mm->page_table_lock); |
| 124 | if (!hugepd_none(*hpdp)) | 127 | if (!hugepd_none(*hpdp)) |
| 125 | kmem_cache_free(pgtable_cache[HUGE_PGTABLE_INDEX(psize)], new); | 128 | kmem_cache_free(PGT_CACHE(pdshift - pshift), new); |
| 126 | else | 129 | else |
| 127 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | 130 | hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; |
| 128 | spin_unlock(&mm->page_table_lock); | 131 | spin_unlock(&mm->page_table_lock); |
| 129 | return 0; | 132 | return 0; |
| 130 | } | 133 | } |
| 131 | 134 | ||
| 132 | 135 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) | |
| 133 | static pud_t *hpud_offset(pgd_t *pgd, unsigned long addr, struct hstate *hstate) | ||
| 134 | { | ||
| 135 | if (huge_page_shift(hstate) < PUD_SHIFT) | ||
| 136 | return pud_offset(pgd, addr); | ||
| 137 | else | ||
| 138 | return (pud_t *) pgd; | ||
| 139 | } | ||
| 140 | static pud_t *hpud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, | ||
| 141 | struct hstate *hstate) | ||
| 142 | { | ||
| 143 | if (huge_page_shift(hstate) < PUD_SHIFT) | ||
| 144 | return pud_alloc(mm, pgd, addr); | ||
| 145 | else | ||
| 146 | return (pud_t *) pgd; | ||
| 147 | } | ||
| 148 | static pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) | ||
| 149 | { | 136 | { |
| 150 | if (huge_page_shift(hstate) < PMD_SHIFT) | 137 | pgd_t *pg; |
| 151 | return pmd_offset(pud, addr); | 138 | pud_t *pu; |
| 152 | else | 139 | pmd_t *pm; |
| 153 | return (pmd_t *) pud; | 140 | hugepd_t *hpdp = NULL; |
| 154 | } | 141 | unsigned pshift = __ffs(sz); |
| 155 | static pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, | 142 | unsigned pdshift = PGDIR_SHIFT; |
| 156 | struct hstate *hstate) | 143 | |
| 157 | { | 144 | addr &= ~(sz-1); |
| 158 | if (huge_page_shift(hstate) < PMD_SHIFT) | 145 | |
| 159 | return pmd_alloc(mm, pud, addr); | 146 | pg = pgd_offset(mm, addr); |
| 160 | else | 147 | if (pshift >= PUD_SHIFT) { |
| 161 | return (pmd_t *) pud; | 148 | hpdp = (hugepd_t *)pg; |
| 149 | } else { | ||
| 150 | pdshift = PUD_SHIFT; | ||
| 151 | pu = pud_alloc(mm, pg, addr); | ||
| 152 | if (pshift >= PMD_SHIFT) { | ||
| 153 | hpdp = (hugepd_t *)pu; | ||
| 154 | } else { | ||
| 155 | pdshift = PMD_SHIFT; | ||
| 156 | pm = pmd_alloc(mm, pu, addr); | ||
| 157 | hpdp = (hugepd_t *)pm; | ||
| 158 | } | ||
| 159 | } | ||
| 160 | |||
| 161 | if (!hpdp) | ||
| 162 | return NULL; | ||
| 163 | |||
| 164 | BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp)); | ||
| 165 | |||
| 166 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) | ||
| 167 | return NULL; | ||
| 168 | |||
| 169 | return hugepte_offset(hpdp, addr, pdshift); | ||
| 162 | } | 170 | } |
| 163 | 171 | ||
| 164 | /* Build list of addresses of gigantic pages. This function is used in early | 172 | /* Build list of addresses of gigantic pages. This function is used in early |
| @@ -192,94 +200,38 @@ int alloc_bootmem_huge_page(struct hstate *hstate) | |||
| 192 | return 1; | 200 | return 1; |
| 193 | } | 201 | } |
| 194 | 202 | ||
| 195 | |||
| 196 | /* Modelled after find_linux_pte() */ | ||
| 197 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | ||
| 198 | { | ||
| 199 | pgd_t *pg; | ||
| 200 | pud_t *pu; | ||
| 201 | pmd_t *pm; | ||
| 202 | |||
| 203 | unsigned int psize; | ||
| 204 | unsigned int shift; | ||
| 205 | unsigned long sz; | ||
| 206 | struct hstate *hstate; | ||
| 207 | psize = get_slice_psize(mm, addr); | ||
| 208 | shift = mmu_psize_to_shift(psize); | ||
| 209 | sz = ((1UL) << shift); | ||
| 210 | hstate = size_to_hstate(sz); | ||
| 211 | |||
| 212 | addr &= hstate->mask; | ||
| 213 | |||
| 214 | pg = pgd_offset(mm, addr); | ||
| 215 | if (!pgd_none(*pg)) { | ||
| 216 | pu = hpud_offset(pg, addr, hstate); | ||
| 217 | if (!pud_none(*pu)) { | ||
| 218 | pm = hpmd_offset(pu, addr, hstate); | ||
| 219 | if (!pmd_none(*pm)) | ||
| 220 | return hugepte_offset((hugepd_t *)pm, addr, | ||
| 221 | hstate); | ||
| 222 | } | ||
| 223 | } | ||
| 224 | |||
| 225 | return NULL; | ||
| 226 | } | ||
| 227 | |||
| 228 | pte_t *huge_pte_alloc(struct mm_struct *mm, | ||
| 229 | unsigned long addr, unsigned long sz) | ||
| 230 | { | ||
| 231 | pgd_t *pg; | ||
| 232 | pud_t *pu; | ||
| 233 | pmd_t *pm; | ||
| 234 | hugepd_t *hpdp = NULL; | ||
| 235 | struct hstate *hstate; | ||
| 236 | unsigned int psize; | ||
| 237 | hstate = size_to_hstate(sz); | ||
| 238 | |||
| 239 | psize = get_slice_psize(mm, addr); | ||
| 240 | BUG_ON(!mmu_huge_psizes[psize]); | ||
| 241 | |||
| 242 | addr &= hstate->mask; | ||
| 243 | |||
| 244 | pg = pgd_offset(mm, addr); | ||
| 245 | pu = hpud_alloc(mm, pg, addr, hstate); | ||
| 246 | |||
| 247 | if (pu) { | ||
| 248 | pm = hpmd_alloc(mm, pu, addr, hstate); | ||
| 249 | if (pm) | ||
| 250 | hpdp = (hugepd_t *)pm; | ||
| 251 | } | ||
| 252 | |||
| 253 | if (! hpdp) | ||
| 254 | return NULL; | ||
| 255 | |||
| 256 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) | ||
| 257 | return NULL; | ||
| 258 | |||
| 259 | return hugepte_offset(hpdp, addr, hstate); | ||
| 260 | } | ||
| 261 | |||
| 262 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | 203 | int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) |
| 263 | { | 204 | { |
| 264 | return 0; | 205 | return 0; |
| 265 | } | 206 | } |
| 266 | 207 | ||
| 267 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, | 208 | static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, |
| 268 | unsigned int psize) | 209 | unsigned long start, unsigned long end, |
| 210 | unsigned long floor, unsigned long ceiling) | ||
| 269 | { | 211 | { |
| 270 | pte_t *hugepte = hugepd_page(*hpdp); | 212 | pte_t *hugepte = hugepd_page(*hpdp); |
| 213 | unsigned shift = hugepd_shift(*hpdp); | ||
| 214 | unsigned long pdmask = ~((1UL << pdshift) - 1); | ||
| 215 | |||
| 216 | start &= pdmask; | ||
| 217 | if (start < floor) | ||
| 218 | return; | ||
| 219 | if (ceiling) { | ||
| 220 | ceiling &= pdmask; | ||
| 221 | if (! ceiling) | ||
| 222 | return; | ||
| 223 | } | ||
| 224 | if (end - 1 > ceiling - 1) | ||
| 225 | return; | ||
| 271 | 226 | ||
| 272 | hpdp->pd = 0; | 227 | hpdp->pd = 0; |
| 273 | tlb->need_flush = 1; | 228 | tlb->need_flush = 1; |
| 274 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, | 229 | pgtable_free_tlb(tlb, hugepte, pdshift - shift); |
| 275 | HUGEPTE_CACHE_NUM+psize-1, | ||
| 276 | PGF_CACHENUM_MASK)); | ||
| 277 | } | 230 | } |
| 278 | 231 | ||
| 279 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | 232 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
| 280 | unsigned long addr, unsigned long end, | 233 | unsigned long addr, unsigned long end, |
| 281 | unsigned long floor, unsigned long ceiling, | 234 | unsigned long floor, unsigned long ceiling) |
| 282 | unsigned int psize) | ||
| 283 | { | 235 | { |
| 284 | pmd_t *pmd; | 236 | pmd_t *pmd; |
| 285 | unsigned long next; | 237 | unsigned long next; |
| @@ -291,7 +243,8 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, | |||
| 291 | next = pmd_addr_end(addr, end); | 243 | next = pmd_addr_end(addr, end); |
| 292 | if (pmd_none(*pmd)) | 244 | if (pmd_none(*pmd)) |
| 293 | continue; | 245 | continue; |
| 294 | free_hugepte_range(tlb, (hugepd_t *)pmd, psize); | 246 | free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT, |
| 247 | addr, next, floor, ceiling); | ||
| 295 | } while (pmd++, addr = next, addr != end); | 248 | } while (pmd++, addr = next, addr != end); |
| 296 | 249 | ||
| 297 | start &= PUD_MASK; | 250 | start &= PUD_MASK; |
| @@ -317,23 +270,19 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | |||
| 317 | pud_t *pud; | 270 | pud_t *pud; |
| 318 | unsigned long next; | 271 | unsigned long next; |
| 319 | unsigned long start; | 272 | unsigned long start; |
| 320 | unsigned int shift; | ||
| 321 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
| 322 | shift = mmu_psize_to_shift(psize); | ||
| 323 | 273 | ||
| 324 | start = addr; | 274 | start = addr; |
| 325 | pud = pud_offset(pgd, addr); | 275 | pud = pud_offset(pgd, addr); |
| 326 | do { | 276 | do { |
| 327 | next = pud_addr_end(addr, end); | 277 | next = pud_addr_end(addr, end); |
| 328 | if (shift < PMD_SHIFT) { | 278 | if (!is_hugepd(pud)) { |
| 329 | if (pud_none_or_clear_bad(pud)) | 279 | if (pud_none_or_clear_bad(pud)) |
| 330 | continue; | 280 | continue; |
| 331 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, | 281 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, |
| 332 | ceiling, psize); | 282 | ceiling); |
| 333 | } else { | 283 | } else { |
| 334 | if (pud_none(*pud)) | 284 | free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT, |
| 335 | continue; | 285 | addr, next, floor, ceiling); |
| 336 | free_hugepte_range(tlb, (hugepd_t *)pud, psize); | ||
| 337 | } | 286 | } |
| 338 | } while (pud++, addr = next, addr != end); | 287 | } while (pud++, addr = next, addr != end); |
| 339 | 288 | ||
| @@ -364,121 +313,56 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, | |||
| 364 | { | 313 | { |
| 365 | pgd_t *pgd; | 314 | pgd_t *pgd; |
| 366 | unsigned long next; | 315 | unsigned long next; |
| 367 | unsigned long start; | ||
| 368 | 316 | ||
| 369 | /* | 317 | /* |
| 370 | * Comments below take from the normal free_pgd_range(). They | 318 | * Because there are a number of different possible pagetable |
| 371 | * apply here too. The tests against HUGEPD_MASK below are | 319 | * layouts for hugepage ranges, we limit knowledge of how |
| 372 | * essential, because we *don't* test for this at the bottom | 320 | * things should be laid out to the allocation path |
| 373 | * level. Without them we'll attempt to free a hugepte table | 321 | * (huge_pte_alloc(), above). Everything else works out the |
| 374 | * when we unmap just part of it, even if there are other | 322 | * structure as it goes from information in the hugepd |
| 375 | * active mappings using it. | 323 | * pointers. That means that we can't here use the |
| 376 | * | 324 | * optimization used in the normal page free_pgd_range(), of |
| 377 | * The next few lines have given us lots of grief... | 325 | * checking whether we're actually covering a large enough |
| 378 | * | 326 | * range to have to do anything at the top level of the walk |
| 379 | * Why are we testing HUGEPD* at this top level? Because | 327 | * instead of at the bottom. |
| 380 | * often there will be no work to do at all, and we'd prefer | ||
| 381 | * not to go all the way down to the bottom just to discover | ||
| 382 | * that. | ||
| 383 | * | 328 | * |
| 384 | * Why all these "- 1"s? Because 0 represents both the bottom | 329 | * To make sense of this, you should probably go read the big |
| 385 | * of the address space and the top of it (using -1 for the | 330 | * block comment at the top of the normal free_pgd_range(), |
| 386 | * top wouldn't help much: the masks would do the wrong thing). | 331 | * too. |
| 387 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
| 388 | * the address space, but end 0 and ceiling 0 refer to the top | ||
| 389 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
| 390 | * that end 0 case should be mythical). | ||
| 391 | * | ||
| 392 | * Wherever addr is brought up or ceiling brought down, we | ||
| 393 | * must be careful to reject "the opposite 0" before it | ||
| 394 | * confuses the subsequent tests. But what about where end is | ||
| 395 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
| 396 | * 0 there. | ||
| 397 | * | ||
| 398 | * Whereas we round start (addr) and ceiling down, by different | ||
| 399 | * masks at different levels, in order to test whether a table | ||
| 400 | * now has no other vmas using it, so can be freed, we don't | ||
| 401 | * bother to round floor or end up - the tests don't need that. | ||
| 402 | */ | 332 | */ |
| 403 | unsigned int psize = get_slice_psize(tlb->mm, addr); | ||
| 404 | |||
| 405 | addr &= HUGEPD_MASK(psize); | ||
| 406 | if (addr < floor) { | ||
| 407 | addr += HUGEPD_SIZE(psize); | ||
| 408 | if (!addr) | ||
| 409 | return; | ||
| 410 | } | ||
| 411 | if (ceiling) { | ||
| 412 | ceiling &= HUGEPD_MASK(psize); | ||
| 413 | if (!ceiling) | ||
| 414 | return; | ||
| 415 | } | ||
| 416 | if (end - 1 > ceiling - 1) | ||
| 417 | end -= HUGEPD_SIZE(psize); | ||
| 418 | if (addr > end - 1) | ||
| 419 | return; | ||
| 420 | 333 | ||
| 421 | start = addr; | ||
| 422 | pgd = pgd_offset(tlb->mm, addr); | 334 | pgd = pgd_offset(tlb->mm, addr); |
| 423 | do { | 335 | do { |
| 424 | psize = get_slice_psize(tlb->mm, addr); | ||
| 425 | BUG_ON(!mmu_huge_psizes[psize]); | ||
| 426 | next = pgd_addr_end(addr, end); | 336 | next = pgd_addr_end(addr, end); |
| 427 | if (mmu_psize_to_shift(psize) < PUD_SHIFT) { | 337 | if (!is_hugepd(pgd)) { |
| 428 | if (pgd_none_or_clear_bad(pgd)) | 338 | if (pgd_none_or_clear_bad(pgd)) |
| 429 | continue; | 339 | continue; |
| 430 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); | 340 | hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); |
| 431 | } else { | 341 | } else { |
| 432 | if (pgd_none(*pgd)) | 342 | free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, |
| 433 | continue; | 343 | addr, next, floor, ceiling); |
| 434 | free_hugepte_range(tlb, (hugepd_t *)pgd, psize); | ||
| 435 | } | 344 | } |
| 436 | } while (pgd++, addr = next, addr != end); | 345 | } while (pgd++, addr = next, addr != end); |
| 437 | } | 346 | } |
| 438 | 347 | ||
| 439 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | ||
| 440 | pte_t *ptep, pte_t pte) | ||
| 441 | { | ||
| 442 | if (pte_present(*ptep)) { | ||
| 443 | /* We open-code pte_clear because we need to pass the right | ||
| 444 | * argument to hpte_need_flush (huge / !huge). Might not be | ||
| 445 | * necessary anymore if we make hpte_need_flush() get the | ||
| 446 | * page size from the slices | ||
| 447 | */ | ||
| 448 | unsigned int psize = get_slice_psize(mm, addr); | ||
| 449 | unsigned int shift = mmu_psize_to_shift(psize); | ||
| 450 | unsigned long sz = ((1UL) << shift); | ||
| 451 | struct hstate *hstate = size_to_hstate(sz); | ||
| 452 | pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); | ||
| 453 | } | ||
| 454 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
| 455 | } | ||
| 456 | |||
| 457 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | ||
| 458 | pte_t *ptep) | ||
| 459 | { | ||
| 460 | unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); | ||
| 461 | return __pte(old); | ||
| 462 | } | ||
| 463 | |||
| 464 | struct page * | 348 | struct page * |
| 465 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | 349 | follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) |
| 466 | { | 350 | { |
| 467 | pte_t *ptep; | 351 | pte_t *ptep; |
| 468 | struct page *page; | 352 | struct page *page; |
| 469 | unsigned int mmu_psize = get_slice_psize(mm, address); | 353 | unsigned shift; |
| 354 | unsigned long mask; | ||
| 355 | |||
| 356 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | ||
| 470 | 357 | ||
| 471 | /* Verify it is a huge page else bail. */ | 358 | /* Verify it is a huge page else bail. */ |
| 472 | if (!mmu_huge_psizes[mmu_psize]) | 359 | if (!ptep || !shift) |
| 473 | return ERR_PTR(-EINVAL); | 360 | return ERR_PTR(-EINVAL); |
| 474 | 361 | ||
| 475 | ptep = huge_pte_offset(mm, address); | 362 | mask = (1UL << shift) - 1; |
| 476 | page = pte_page(*ptep); | 363 | page = pte_page(*ptep); |
| 477 | if (page) { | 364 | if (page) |
| 478 | unsigned int shift = mmu_psize_to_shift(mmu_psize); | 365 | page += (address & mask) / PAGE_SIZE; |
| 479 | unsigned long sz = ((1UL) << shift); | ||
| 480 | page += (address % sz) / PAGE_SIZE; | ||
| 481 | } | ||
| 482 | 366 | ||
| 483 | return page; | 367 | return page; |
| 484 | } | 368 | } |
| @@ -501,6 +385,82 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
| 501 | return NULL; | 385 | return NULL; |
| 502 | } | 386 | } |
| 503 | 387 | ||
| 388 | static noinline int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
| 389 | unsigned long end, int write, struct page **pages, int *nr) | ||
| 390 | { | ||
| 391 | unsigned long mask; | ||
| 392 | unsigned long pte_end; | ||
| 393 | struct page *head, *page; | ||
| 394 | pte_t pte; | ||
| 395 | int refs; | ||
| 396 | |||
| 397 | pte_end = (addr + sz) & ~(sz-1); | ||
| 398 | if (pte_end < end) | ||
| 399 | end = pte_end; | ||
| 400 | |||
| 401 | pte = *ptep; | ||
| 402 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
| 403 | if (write) | ||
| 404 | mask |= _PAGE_RW; | ||
| 405 | |||
| 406 | if ((pte_val(pte) & mask) != mask) | ||
| 407 | return 0; | ||
| 408 | |||
| 409 | /* hugepages are never "special" */ | ||
| 410 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
| 411 | |||
| 412 | refs = 0; | ||
| 413 | head = pte_page(pte); | ||
| 414 | |||
| 415 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
| 416 | do { | ||
| 417 | VM_BUG_ON(compound_head(page) != head); | ||
| 418 | pages[*nr] = page; | ||
| 419 | (*nr)++; | ||
| 420 | page++; | ||
| 421 | refs++; | ||
| 422 | } while (addr += PAGE_SIZE, addr != end); | ||
| 423 | |||
| 424 | if (!page_cache_add_speculative(head, refs)) { | ||
| 425 | *nr -= refs; | ||
| 426 | return 0; | ||
| 427 | } | ||
| 428 | |||
| 429 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
| 430 | /* Could be optimized better */ | ||
| 431 | while (*nr) { | ||
| 432 | put_page(page); | ||
| 433 | (*nr)--; | ||
| 434 | } | ||
| 435 | } | ||
| 436 | |||
| 437 | return 1; | ||
| 438 | } | ||
| 439 | |||
| 440 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, | ||
| 441 | unsigned long sz) | ||
| 442 | { | ||
| 443 | unsigned long __boundary = (addr + sz) & ~(sz-1); | ||
| 444 | return (__boundary - 1 < end - 1) ? __boundary : end; | ||
| 445 | } | ||
| 446 | |||
| 447 | int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, | ||
| 448 | unsigned long addr, unsigned long end, | ||
| 449 | int write, struct page **pages, int *nr) | ||
| 450 | { | ||
| 451 | pte_t *ptep; | ||
| 452 | unsigned long sz = 1UL << hugepd_shift(*hugepd); | ||
| 453 | unsigned long next; | ||
| 454 | |||
| 455 | ptep = hugepte_offset(hugepd, addr, pdshift); | ||
| 456 | do { | ||
| 457 | next = hugepte_addr_end(addr, end, sz); | ||
| 458 | if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr)) | ||
| 459 | return 0; | ||
| 460 | } while (ptep++, addr = next, addr != end); | ||
| 461 | |||
| 462 | return 1; | ||
| 463 | } | ||
| 504 | 464 | ||
| 505 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 465 | unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
| 506 | unsigned long len, unsigned long pgoff, | 466 | unsigned long len, unsigned long pgoff, |
| @@ -509,8 +469,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
| 509 | struct hstate *hstate = hstate_file(file); | 469 | struct hstate *hstate = hstate_file(file); |
| 510 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); | 470 | int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); |
| 511 | 471 | ||
| 512 | if (!mmu_huge_psizes[mmu_psize]) | ||
| 513 | return -EINVAL; | ||
| 514 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); | 472 | return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); |
| 515 | } | 473 | } |
| 516 | 474 | ||
| @@ -521,229 +479,46 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) | |||
| 521 | return 1UL << mmu_psize_to_shift(psize); | 479 | return 1UL << mmu_psize_to_shift(psize); |
| 522 | } | 480 | } |
| 523 | 481 | ||
| 524 | /* | 482 | static int __init add_huge_page_size(unsigned long long size) |
| 525 | * Called by asm hashtable.S for doing lazy icache flush | ||
| 526 | */ | ||
| 527 | static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, | ||
| 528 | pte_t pte, int trap, unsigned long sz) | ||
| 529 | { | 483 | { |
| 530 | struct page *page; | 484 | int shift = __ffs(size); |
| 531 | int i; | 485 | int mmu_psize; |
| 532 | |||
| 533 | if (!pfn_valid(pte_pfn(pte))) | ||
| 534 | return rflags; | ||
| 535 | |||
| 536 | page = pte_page(pte); | ||
| 537 | |||
| 538 | /* page is dirty */ | ||
| 539 | if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { | ||
| 540 | if (trap == 0x400) { | ||
| 541 | for (i = 0; i < (sz / PAGE_SIZE); i++) | ||
| 542 | __flush_dcache_icache(page_address(page+i)); | ||
| 543 | set_bit(PG_arch_1, &page->flags); | ||
| 544 | } else { | ||
| 545 | rflags |= HPTE_R_N; | ||
| 546 | } | ||
| 547 | } | ||
| 548 | return rflags; | ||
| 549 | } | ||
| 550 | 486 | ||
| 551 | int hash_huge_page(struct mm_struct *mm, unsigned long access, | 487 | /* Check that it is a page size supported by the hardware and |
| 552 | unsigned long ea, unsigned long vsid, int local, | 488 | * that it fits within pagetable and slice limits. */ |
| 553 | unsigned long trap) | 489 | if (!is_power_of_2(size) |
| 554 | { | 490 | || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) |
| 555 | pte_t *ptep; | 491 | return -EINVAL; |
| 556 | unsigned long old_pte, new_pte; | ||
| 557 | unsigned long va, rflags, pa, sz; | ||
| 558 | long slot; | ||
| 559 | int err = 1; | ||
| 560 | int ssize = user_segment_size(ea); | ||
| 561 | unsigned int mmu_psize; | ||
| 562 | int shift; | ||
| 563 | mmu_psize = get_slice_psize(mm, ea); | ||
| 564 | |||
| 565 | if (!mmu_huge_psizes[mmu_psize]) | ||
| 566 | goto out; | ||
| 567 | ptep = huge_pte_offset(mm, ea); | ||
| 568 | |||
| 569 | /* Search the Linux page table for a match with va */ | ||
| 570 | va = hpt_va(ea, vsid, ssize); | ||
| 571 | 492 | ||
| 572 | /* | 493 | if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) |
| 573 | * If no pte found or not present, send the problem up to | 494 | return -EINVAL; |
| 574 | * do_page_fault | ||
| 575 | */ | ||
| 576 | if (unlikely(!ptep || pte_none(*ptep))) | ||
| 577 | goto out; | ||
| 578 | 495 | ||
| 579 | /* | 496 | #ifdef CONFIG_SPU_FS_64K_LS |
| 580 | * Check the user's access rights to the page. If access should be | 497 | /* Disable support for 64K huge pages when 64K SPU local store |
| 581 | * prevented then send the problem up to do_page_fault. | 498 | * support is enabled as the current implementation conflicts. |
| 582 | */ | ||
| 583 | if (unlikely(access & ~pte_val(*ptep))) | ||
| 584 | goto out; | ||
| 585 | /* | ||
| 586 | * At this point, we have a pte (old_pte) which can be used to build | ||
| 587 | * or update an HPTE. There are 2 cases: | ||
| 588 | * | ||
| 589 | * 1. There is a valid (present) pte with no associated HPTE (this is | ||
| 590 | * the most common case) | ||
| 591 | * 2. There is a valid (present) pte with an associated HPTE. The | ||
| 592 | * current values of the pp bits in the HPTE prevent access | ||
| 593 | * because we are doing software DIRTY bit management and the | ||
| 594 | * page is currently not DIRTY. | ||
| 595 | */ | 499 | */ |
| 500 | if (shift == PAGE_SHIFT_64K) | ||
| 501 | return -EINVAL; | ||
| 502 | #endif /* CONFIG_SPU_FS_64K_LS */ | ||
| 596 | 503 | ||
| 504 | BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); | ||
| 597 | 505 | ||
| 598 | do { | 506 | /* Return if huge page size has already been setup */ |
| 599 | old_pte = pte_val(*ptep); | 507 | if (size_to_hstate(size)) |
| 600 | if (old_pte & _PAGE_BUSY) | 508 | return 0; |
| 601 | goto out; | ||
| 602 | new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; | ||
| 603 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
| 604 | old_pte, new_pte)); | ||
| 605 | |||
| 606 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
| 607 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | ||
| 608 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
| 609 | shift = mmu_psize_to_shift(mmu_psize); | ||
| 610 | sz = ((1UL) << shift); | ||
| 611 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) | ||
| 612 | /* No CPU has hugepages but lacks no execute, so we | ||
| 613 | * don't need to worry about that case */ | ||
| 614 | rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), | ||
| 615 | trap, sz); | ||
| 616 | |||
| 617 | /* Check if pte already has an hpte (case 2) */ | ||
| 618 | if (unlikely(old_pte & _PAGE_HASHPTE)) { | ||
| 619 | /* There MIGHT be an HPTE for this pte */ | ||
| 620 | unsigned long hash, slot; | ||
| 621 | |||
| 622 | hash = hpt_hash(va, shift, ssize); | ||
| 623 | if (old_pte & _PAGE_F_SECOND) | ||
| 624 | hash = ~hash; | ||
| 625 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
| 626 | slot += (old_pte & _PAGE_F_GIX) >> 12; | ||
| 627 | |||
| 628 | if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, | ||
| 629 | ssize, local) == -1) | ||
| 630 | old_pte &= ~_PAGE_HPTEFLAGS; | ||
| 631 | } | ||
| 632 | |||
| 633 | if (likely(!(old_pte & _PAGE_HASHPTE))) { | ||
| 634 | unsigned long hash = hpt_hash(va, shift, ssize); | ||
| 635 | unsigned long hpte_group; | ||
| 636 | |||
| 637 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; | ||
| 638 | |||
| 639 | repeat: | ||
| 640 | hpte_group = ((hash & htab_hash_mask) * | ||
| 641 | HPTES_PER_GROUP) & ~0x7UL; | ||
| 642 | |||
| 643 | /* clear HPTE slot informations in new PTE */ | ||
| 644 | #ifdef CONFIG_PPC_64K_PAGES | ||
| 645 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; | ||
| 646 | #else | ||
| 647 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
| 648 | #endif | ||
| 649 | /* Add in WIMG bits */ | ||
| 650 | rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
| 651 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
| 652 | |||
| 653 | /* Insert into the hash table, primary slot */ | ||
| 654 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, | ||
| 655 | mmu_psize, ssize); | ||
| 656 | |||
| 657 | /* Primary is full, try the secondary */ | ||
| 658 | if (unlikely(slot == -1)) { | ||
| 659 | hpte_group = ((~hash & htab_hash_mask) * | ||
| 660 | HPTES_PER_GROUP) & ~0x7UL; | ||
| 661 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, | ||
| 662 | HPTE_V_SECONDARY, | ||
| 663 | mmu_psize, ssize); | ||
| 664 | if (slot == -1) { | ||
| 665 | if (mftb() & 0x1) | ||
| 666 | hpte_group = ((hash & htab_hash_mask) * | ||
| 667 | HPTES_PER_GROUP)&~0x7UL; | ||
| 668 | |||
| 669 | ppc_md.hpte_remove(hpte_group); | ||
| 670 | goto repeat; | ||
| 671 | } | ||
| 672 | } | ||
| 673 | |||
| 674 | if (unlikely(slot == -2)) | ||
| 675 | panic("hash_huge_page: pte_insert failed\n"); | ||
| 676 | |||
| 677 | new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX); | ||
| 678 | } | ||
| 679 | |||
| 680 | /* | ||
| 681 | * No need to use ldarx/stdcx here | ||
| 682 | */ | ||
| 683 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
| 684 | |||
| 685 | err = 0; | ||
| 686 | 509 | ||
| 687 | out: | 510 | hugetlb_add_hstate(shift - PAGE_SHIFT); |
| 688 | return err; | ||
| 689 | } | ||
| 690 | 511 | ||
| 691 | static void __init set_huge_psize(int psize) | 512 | return 0; |
| 692 | { | ||
| 693 | /* Check that it is a page size supported by the hardware and | ||
| 694 | * that it fits within pagetable limits. */ | ||
| 695 | if (mmu_psize_defs[psize].shift && | ||
| 696 | mmu_psize_defs[psize].shift < SID_SHIFT_1T && | ||
| 697 | (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || | ||
| 698 | mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || | ||
| 699 | mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { | ||
| 700 | /* Return if huge page size has already been setup or is the | ||
| 701 | * same as the base page size. */ | ||
| 702 | if (mmu_huge_psizes[psize] || | ||
| 703 | mmu_psize_defs[psize].shift == PAGE_SHIFT) | ||
| 704 | return; | ||
| 705 | if (WARN_ON(HUGEPTE_CACHE_NAME(psize) == NULL)) | ||
| 706 | return; | ||
| 707 | hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); | ||
| 708 | |||
| 709 | switch (mmu_psize_defs[psize].shift) { | ||
| 710 | case PAGE_SHIFT_64K: | ||
| 711 | /* We only allow 64k hpages with 4k base page, | ||
| 712 | * which was checked above, and always put them | ||
| 713 | * at the PMD */ | ||
| 714 | hugepte_shift[psize] = PMD_SHIFT; | ||
| 715 | break; | ||
| 716 | case PAGE_SHIFT_16M: | ||
| 717 | /* 16M pages can be at two different levels | ||
| 718 | * of pagestables based on base page size */ | ||
| 719 | if (PAGE_SHIFT == PAGE_SHIFT_64K) | ||
| 720 | hugepte_shift[psize] = PMD_SHIFT; | ||
| 721 | else /* 4k base page */ | ||
| 722 | hugepte_shift[psize] = PUD_SHIFT; | ||
| 723 | break; | ||
| 724 | case PAGE_SHIFT_16G: | ||
| 725 | /* 16G pages are always at PGD level */ | ||
| 726 | hugepte_shift[psize] = PGDIR_SHIFT; | ||
| 727 | break; | ||
| 728 | } | ||
| 729 | hugepte_shift[psize] -= mmu_psize_defs[psize].shift; | ||
| 730 | } else | ||
| 731 | hugepte_shift[psize] = 0; | ||
| 732 | } | 513 | } |
| 733 | 514 | ||
| 734 | static int __init hugepage_setup_sz(char *str) | 515 | static int __init hugepage_setup_sz(char *str) |
| 735 | { | 516 | { |
| 736 | unsigned long long size; | 517 | unsigned long long size; |
| 737 | int mmu_psize; | ||
| 738 | int shift; | ||
| 739 | 518 | ||
| 740 | size = memparse(str, &str); | 519 | size = memparse(str, &str); |
| 741 | 520 | ||
| 742 | shift = __ffs(size); | 521 | if (add_huge_page_size(size) != 0) |
| 743 | mmu_psize = shift_to_mmu_psize(shift); | ||
| 744 | if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) | ||
| 745 | set_huge_psize(mmu_psize); | ||
| 746 | else | ||
| 747 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); | 522 | printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); |
| 748 | 523 | ||
| 749 | return 1; | 524 | return 1; |
| @@ -752,41 +527,55 @@ __setup("hugepagesz=", hugepage_setup_sz); | |||
| 752 | 527 | ||
| 753 | static int __init hugetlbpage_init(void) | 528 | static int __init hugetlbpage_init(void) |
| 754 | { | 529 | { |
| 755 | unsigned int psize; | 530 | int psize; |
| 756 | 531 | ||
| 757 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | 532 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) |
| 758 | return -ENODEV; | 533 | return -ENODEV; |
| 759 | 534 | ||
| 760 | /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE | 535 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { |
| 761 | * and adjust PTE_NONCACHE_NUM if the number of supported huge page | 536 | unsigned shift; |
| 762 | * sizes changes. | 537 | unsigned pdshift; |
| 763 | */ | ||
| 764 | set_huge_psize(MMU_PAGE_16M); | ||
| 765 | set_huge_psize(MMU_PAGE_16G); | ||
| 766 | 538 | ||
| 767 | /* Temporarily disable support for 64K huge pages when 64K SPU local | 539 | if (!mmu_psize_defs[psize].shift) |
| 768 | * store support is enabled as the current implementation conflicts. | 540 | continue; |
| 769 | */ | ||
| 770 | #ifndef CONFIG_SPU_FS_64K_LS | ||
| 771 | set_huge_psize(MMU_PAGE_64K); | ||
| 772 | #endif | ||
| 773 | 541 | ||
| 774 | for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { | 542 | shift = mmu_psize_to_shift(psize); |
| 775 | if (mmu_huge_psizes[psize]) { | 543 | |
| 776 | pgtable_cache[HUGE_PGTABLE_INDEX(psize)] = | 544 | if (add_huge_page_size(1ULL << shift) < 0) |
| 777 | kmem_cache_create( | 545 | continue; |
| 778 | HUGEPTE_CACHE_NAME(psize), | 546 | |
| 779 | HUGEPTE_TABLE_SIZE(psize), | 547 | if (shift < PMD_SHIFT) |
| 780 | HUGEPTE_TABLE_SIZE(psize), | 548 | pdshift = PMD_SHIFT; |
| 781 | 0, | 549 | else if (shift < PUD_SHIFT) |
| 782 | NULL); | 550 | pdshift = PUD_SHIFT; |
| 783 | if (!pgtable_cache[HUGE_PGTABLE_INDEX(psize)]) | 551 | else |
| 784 | panic("hugetlbpage_init(): could not create %s"\ | 552 | pdshift = PGDIR_SHIFT; |
| 785 | "\n", HUGEPTE_CACHE_NAME(psize)); | 553 | |
| 786 | } | 554 | pgtable_cache_add(pdshift - shift, NULL); |
| 555 | if (!PGT_CACHE(pdshift - shift)) | ||
| 556 | panic("hugetlbpage_init(): could not create " | ||
| 557 | "pgtable cache for %d bit pagesize\n", shift); | ||
| 787 | } | 558 | } |
| 788 | 559 | ||
| 560 | /* Set default large page size. Currently, we pick 16M or 1M | ||
| 561 | * depending on what is available | ||
| 562 | */ | ||
| 563 | if (mmu_psize_defs[MMU_PAGE_16M].shift) | ||
| 564 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; | ||
| 565 | else if (mmu_psize_defs[MMU_PAGE_1M].shift) | ||
| 566 | HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; | ||
| 567 | |||
| 789 | return 0; | 568 | return 0; |
| 790 | } | 569 | } |
| 791 | 570 | ||
| 792 | module_init(hugetlbpage_init); | 571 | module_init(hugetlbpage_init); |
| 572 | |||
| 573 | void flush_dcache_icache_hugepage(struct page *page) | ||
| 574 | { | ||
| 575 | int i; | ||
| 576 | |||
| 577 | BUG_ON(!PageCompound(page)); | ||
| 578 | |||
| 579 | for (i = 0; i < (1UL << compound_order(page)); i++) | ||
| 580 | __flush_dcache_icache(page_address(page+i)); | ||
| 581 | } | ||
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 9ddcfb4dc139..767333005eb4 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/initrd.h> | 31 | #include <linux/initrd.h> |
| 32 | #include <linux/pagemap.h> | 32 | #include <linux/pagemap.h> |
| 33 | #include <linux/lmb.h> | 33 | #include <linux/lmb.h> |
| 34 | #include <linux/gfp.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/pgalloc.h> | 36 | #include <asm/pgalloc.h> |
| 36 | #include <asm/prom.h> | 37 | #include <asm/prom.h> |
| @@ -47,7 +48,7 @@ | |||
| 47 | #include "mmu_decl.h" | 48 | #include "mmu_decl.h" |
| 48 | 49 | ||
| 49 | #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) | 50 | #if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) |
| 50 | /* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ | 51 | /* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */ |
| 51 | #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) | 52 | #if (CONFIG_LOWMEM_SIZE > (0xF0000000 - PAGE_OFFSET)) |
| 52 | #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" | 53 | #error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" |
| 53 | #endif | 54 | #endif |
| @@ -82,6 +83,11 @@ extern struct task_struct *current_set[NR_CPUS]; | |||
| 82 | int __map_without_bats; | 83 | int __map_without_bats; |
| 83 | int __map_without_ltlbs; | 84 | int __map_without_ltlbs; |
| 84 | 85 | ||
| 86 | /* | ||
| 87 | * This tells the system to allow ioremapping memory marked as reserved. | ||
| 88 | */ | ||
| 89 | int __allow_ioremap_reserved; | ||
| 90 | |||
| 85 | /* max amount of low RAM to map in */ | 91 | /* max amount of low RAM to map in */ |
| 86 | unsigned long __max_low_memory = MAX_LOW_MEM; | 92 | unsigned long __max_low_memory = MAX_LOW_MEM; |
| 87 | 93 | ||
| @@ -131,9 +137,13 @@ void __init MMU_init(void) | |||
| 131 | MMU_setup(); | 137 | MMU_setup(); |
| 132 | 138 | ||
| 133 | if (lmb.memory.cnt > 1) { | 139 | if (lmb.memory.cnt > 1) { |
| 140 | #ifndef CONFIG_WII | ||
| 134 | lmb.memory.cnt = 1; | 141 | lmb.memory.cnt = 1; |
| 135 | lmb_analyze(); | 142 | lmb_analyze(); |
| 136 | printk(KERN_WARNING "Only using first contiguous memory region"); | 143 | printk(KERN_WARNING "Only using first contiguous memory region"); |
| 144 | #else | ||
| 145 | wii_memory_fixups(); | ||
| 146 | #endif | ||
| 137 | } | 147 | } |
| 138 | 148 | ||
| 139 | total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; | 149 | total_lowmem = total_memory = lmb_end_of_DRAM() - memstart_addr; |
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 335c578b9cc3..d7fa50b09b4a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
| @@ -41,6 +41,8 @@ | |||
| 41 | #include <linux/module.h> | 41 | #include <linux/module.h> |
| 42 | #include <linux/poison.h> | 42 | #include <linux/poison.h> |
| 43 | #include <linux/lmb.h> | 43 | #include <linux/lmb.h> |
| 44 | #include <linux/hugetlb.h> | ||
| 45 | #include <linux/slab.h> | ||
| 44 | 46 | ||
| 45 | #include <asm/pgalloc.h> | 47 | #include <asm/pgalloc.h> |
| 46 | #include <asm/page.h> | 48 | #include <asm/page.h> |
| @@ -119,30 +121,63 @@ static void pmd_ctor(void *addr) | |||
| 119 | memset(addr, 0, PMD_TABLE_SIZE); | 121 | memset(addr, 0, PMD_TABLE_SIZE); |
| 120 | } | 122 | } |
| 121 | 123 | ||
| 122 | static const unsigned int pgtable_cache_size[2] = { | 124 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; |
| 123 | PGD_TABLE_SIZE, PMD_TABLE_SIZE | 125 | |
| 124 | }; | 126 | /* |
| 125 | static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { | 127 | * Create a kmem_cache() for pagetables. This is not used for PTE |
| 126 | #ifdef CONFIG_PPC_64K_PAGES | 128 | * pages - they're linked to struct page, come from the normal free |
| 127 | "pgd_cache", "pmd_cache", | 129 | * pages pool and have a different entry size (see real_pte_t) to |
| 128 | #else | 130 | * everything else. Caches created by this function are used for all |
| 129 | "pgd_cache", "pud_pmd_cache", | 131 | * the higher level pagetables, and for hugepage pagetables. |
| 130 | #endif /* CONFIG_PPC_64K_PAGES */ | 132 | */ |
| 131 | }; | 133 | void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) |
| 132 | 134 | { | |
| 133 | #ifdef CONFIG_HUGETLB_PAGE | 135 | char *name; |
| 134 | /* Hugepages need an extra cache per hugepagesize, initialized in | 136 | unsigned long table_size = sizeof(void *) << shift; |
| 135 | * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT | 137 | unsigned long align = table_size; |
| 136 | * is not compile time constant. */ | 138 | |
| 137 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; | 139 | /* When batching pgtable pointers for RCU freeing, we store |
| 138 | #else | 140 | * the index size in the low bits. Table alignment must be |
| 139 | struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; | 141 | * big enough to fit it. |
| 140 | #endif | 142 | * |
| 143 | * Likewise, hugeapge pagetable pointers contain a (different) | ||
| 144 | * shift value in the low bits. All tables must be aligned so | ||
| 145 | * as to leave enough 0 bits in the address to contain it. */ | ||
| 146 | unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, | ||
| 147 | HUGEPD_SHIFT_MASK + 1); | ||
| 148 | struct kmem_cache *new; | ||
| 149 | |||
| 150 | /* It would be nice if this was a BUILD_BUG_ON(), but at the | ||
| 151 | * moment, gcc doesn't seem to recognize is_power_of_2 as a | ||
| 152 | * constant expression, so so much for that. */ | ||
| 153 | BUG_ON(!is_power_of_2(minalign)); | ||
| 154 | BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); | ||
| 155 | |||
| 156 | if (PGT_CACHE(shift)) | ||
| 157 | return; /* Already have a cache of this size */ | ||
| 158 | |||
| 159 | align = max_t(unsigned long, align, minalign); | ||
| 160 | name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); | ||
| 161 | new = kmem_cache_create(name, table_size, align, 0, ctor); | ||
| 162 | PGT_CACHE(shift) = new; | ||
| 163 | |||
| 164 | pr_debug("Allocated pgtable cache for order %d\n", shift); | ||
| 165 | } | ||
| 166 | |||
| 141 | 167 | ||
| 142 | void pgtable_cache_init(void) | 168 | void pgtable_cache_init(void) |
| 143 | { | 169 | { |
| 144 | pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); | 170 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); |
| 145 | pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); | 171 | pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); |
| 172 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) | ||
| 173 | panic("Couldn't allocate pgtable caches"); | ||
| 174 | |||
| 175 | /* In all current configs, when the PUD index exists it's the | ||
| 176 | * same size as either the pgd or pmd index. Verify that the | ||
| 177 | * initialization above has also created a PUD cache. This | ||
| 178 | * will need re-examiniation if we add new possibilities for | ||
| 179 | * the pagetable layout. */ | ||
| 180 | BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)); | ||
| 146 | } | 181 | } |
| 147 | 182 | ||
| 148 | #ifdef CONFIG_SPARSEMEM_VMEMMAP | 183 | #ifdef CONFIG_SPARSEMEM_VMEMMAP |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 59736317bf0e..0f594d774bf7 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
| 23 | #include <linux/errno.h> | 23 | #include <linux/errno.h> |
| 24 | #include <linux/string.h> | 24 | #include <linux/string.h> |
| 25 | #include <linux/gfp.h> | ||
| 25 | #include <linux/types.h> | 26 | #include <linux/types.h> |
| 26 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
| 27 | #include <linux/stddef.h> | 28 | #include <linux/stddef.h> |
| @@ -32,6 +33,7 @@ | |||
| 32 | #include <linux/pagemap.h> | 33 | #include <linux/pagemap.h> |
| 33 | #include <linux/suspend.h> | 34 | #include <linux/suspend.h> |
| 34 | #include <linux/lmb.h> | 35 | #include <linux/lmb.h> |
| 36 | #include <linux/hugetlb.h> | ||
| 35 | 37 | ||
| 36 | #include <asm/pgalloc.h> | 38 | #include <asm/pgalloc.h> |
| 37 | #include <asm/prom.h> | 39 | #include <asm/prom.h> |
| @@ -47,6 +49,7 @@ | |||
| 47 | #include <asm/sparsemem.h> | 49 | #include <asm/sparsemem.h> |
| 48 | #include <asm/vdso.h> | 50 | #include <asm/vdso.h> |
| 49 | #include <asm/fixmap.h> | 51 | #include <asm/fixmap.h> |
| 52 | #include <asm/swiotlb.h> | ||
| 50 | 53 | ||
| 51 | #include "mmu_decl.h" | 54 | #include "mmu_decl.h" |
| 52 | 55 | ||
| @@ -319,6 +322,11 @@ void __init mem_init(void) | |||
| 319 | struct page *page; | 322 | struct page *page; |
| 320 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | 323 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; |
| 321 | 324 | ||
| 325 | #ifdef CONFIG_SWIOTLB | ||
| 326 | if (ppc_swiotlb_enable) | ||
| 327 | swiotlb_init(1); | ||
| 328 | #endif | ||
| 329 | |||
| 322 | num_physpages = lmb.memory.size >> PAGE_SHIFT; | 330 | num_physpages = lmb.memory.size >> PAGE_SHIFT; |
| 323 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | 331 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); |
| 324 | 332 | ||
| @@ -417,18 +425,26 @@ EXPORT_SYMBOL(flush_dcache_page); | |||
| 417 | 425 | ||
| 418 | void flush_dcache_icache_page(struct page *page) | 426 | void flush_dcache_icache_page(struct page *page) |
| 419 | { | 427 | { |
| 428 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 429 | if (PageCompound(page)) { | ||
| 430 | flush_dcache_icache_hugepage(page); | ||
| 431 | return; | ||
| 432 | } | ||
| 433 | #endif | ||
| 420 | #ifdef CONFIG_BOOKE | 434 | #ifdef CONFIG_BOOKE |
| 421 | void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); | 435 | { |
| 422 | __flush_dcache_icache(start); | 436 | void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); |
| 423 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | 437 | __flush_dcache_icache(start); |
| 438 | kunmap_atomic(start, KM_PPC_SYNC_ICACHE); | ||
| 439 | } | ||
| 424 | #elif defined(CONFIG_8xx) || defined(CONFIG_PPC64) | 440 | #elif defined(CONFIG_8xx) || defined(CONFIG_PPC64) |
| 425 | /* On 8xx there is no need to kmap since highmem is not supported */ | 441 | /* On 8xx there is no need to kmap since highmem is not supported */ |
| 426 | __flush_dcache_icache(page_address(page)); | 442 | __flush_dcache_icache(page_address(page)); |
| 427 | #else | 443 | #else |
| 428 | __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); | 444 | __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); |
| 429 | #endif | 445 | #endif |
| 430 | |||
| 431 | } | 446 | } |
| 447 | |||
| 432 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) | 448 | void clear_user_page(void *page, unsigned long vaddr, struct page *pg) |
| 433 | { | 449 | { |
| 434 | clear_page(page); | 450 | clear_page(page); |
| @@ -485,13 +501,13 @@ EXPORT_SYMBOL(flush_icache_user_range); | |||
| 485 | * This must always be called with the pte lock held. | 501 | * This must always be called with the pte lock held. |
| 486 | */ | 502 | */ |
| 487 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, | 503 | void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, |
| 488 | pte_t pte) | 504 | pte_t *ptep) |
| 489 | { | 505 | { |
| 490 | #ifdef CONFIG_PPC_STD_MMU | 506 | #ifdef CONFIG_PPC_STD_MMU |
| 491 | unsigned long access = 0, trap; | 507 | unsigned long access = 0, trap; |
| 492 | 508 | ||
| 493 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | 509 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ |
| 494 | if (!pte_young(pte) || address >= TASK_SIZE) | 510 | if (!pte_young(*ptep) || address >= TASK_SIZE) |
| 495 | return; | 511 | return; |
| 496 | 512 | ||
| 497 | /* We try to figure out if we are coming from an instruction | 513 | /* We try to figure out if we are coming from an instruction |
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c index 0d957a4c70fe..5a783d8e8e8e 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap_64.c | |||
| @@ -47,7 +47,7 @@ static inline int mmap_is_legacy(void) | |||
| 47 | if (current->personality & ADDR_COMPAT_LAYOUT) | 47 | if (current->personality & ADDR_COMPAT_LAYOUT) |
| 48 | return 1; | 48 | return 1; |
| 49 | 49 | ||
| 50 | if (current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) | 50 | if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) |
| 51 | return 1; | 51 | return 1; |
| 52 | 52 | ||
| 53 | return sysctl_legacy_va_layout; | 53 | return sysctl_legacy_va_layout; |
| @@ -77,7 +77,7 @@ static unsigned long mmap_rnd(void) | |||
| 77 | 77 | ||
| 78 | static inline unsigned long mmap_base(void) | 78 | static inline unsigned long mmap_base(void) |
| 79 | { | 79 | { |
| 80 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | 80 | unsigned long gap = rlimit(RLIMIT_STACK); |
| 81 | 81 | ||
| 82 | if (gap < MIN_GAP) | 82 | if (gap < MIN_GAP) |
| 83 | gap = MIN_GAP; | 83 | gap = MIN_GAP; |
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c index dbeb86ac90cd..2535828aa84b 100644 --- a/arch/powerpc/mm/mmu_context_hash64.c +++ b/arch/powerpc/mm/mmu_context_hash64.c | |||
| @@ -18,11 +18,13 @@ | |||
| 18 | #include <linux/mm.h> | 18 | #include <linux/mm.h> |
| 19 | #include <linux/spinlock.h> | 19 | #include <linux/spinlock.h> |
| 20 | #include <linux/idr.h> | 20 | #include <linux/idr.h> |
| 21 | #include <linux/module.h> | ||
| 22 | #include <linux/gfp.h> | ||
| 21 | 23 | ||
| 22 | #include <asm/mmu_context.h> | 24 | #include <asm/mmu_context.h> |
| 23 | 25 | ||
| 24 | static DEFINE_SPINLOCK(mmu_context_lock); | 26 | static DEFINE_SPINLOCK(mmu_context_lock); |
| 25 | static DEFINE_IDR(mmu_context_idr); | 27 | static DEFINE_IDA(mmu_context_ida); |
| 26 | 28 | ||
| 27 | /* | 29 | /* |
| 28 | * The proto-VSID space has 2^35 - 1 segments available for user mappings. | 30 | * The proto-VSID space has 2^35 - 1 segments available for user mappings. |
| @@ -32,17 +34,17 @@ static DEFINE_IDR(mmu_context_idr); | |||
| 32 | #define NO_CONTEXT 0 | 34 | #define NO_CONTEXT 0 |
| 33 | #define MAX_CONTEXT ((1UL << 19) - 1) | 35 | #define MAX_CONTEXT ((1UL << 19) - 1) |
| 34 | 36 | ||
| 35 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | 37 | int __init_new_context(void) |
| 36 | { | 38 | { |
| 37 | int index; | 39 | int index; |
| 38 | int err; | 40 | int err; |
| 39 | 41 | ||
| 40 | again: | 42 | again: |
| 41 | if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) | 43 | if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) |
| 42 | return -ENOMEM; | 44 | return -ENOMEM; |
| 43 | 45 | ||
| 44 | spin_lock(&mmu_context_lock); | 46 | spin_lock(&mmu_context_lock); |
| 45 | err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); | 47 | err = ida_get_new_above(&mmu_context_ida, 1, &index); |
| 46 | spin_unlock(&mmu_context_lock); | 48 | spin_unlock(&mmu_context_lock); |
| 47 | 49 | ||
| 48 | if (err == -EAGAIN) | 50 | if (err == -EAGAIN) |
| @@ -52,27 +54,46 @@ again: | |||
| 52 | 54 | ||
| 53 | if (index > MAX_CONTEXT) { | 55 | if (index > MAX_CONTEXT) { |
| 54 | spin_lock(&mmu_context_lock); | 56 | spin_lock(&mmu_context_lock); |
| 55 | idr_remove(&mmu_context_idr, index); | 57 | ida_remove(&mmu_context_ida, index); |
| 56 | spin_unlock(&mmu_context_lock); | 58 | spin_unlock(&mmu_context_lock); |
| 57 | return -ENOMEM; | 59 | return -ENOMEM; |
| 58 | } | 60 | } |
| 59 | 61 | ||
| 62 | return index; | ||
| 63 | } | ||
| 64 | EXPORT_SYMBOL_GPL(__init_new_context); | ||
| 65 | |||
| 66 | int init_new_context(struct task_struct *tsk, struct mm_struct *mm) | ||
| 67 | { | ||
| 68 | int index; | ||
| 69 | |||
| 70 | index = __init_new_context(); | ||
| 71 | if (index < 0) | ||
| 72 | return index; | ||
| 73 | |||
| 60 | /* The old code would re-promote on fork, we don't do that | 74 | /* The old code would re-promote on fork, we don't do that |
| 61 | * when using slices as it could cause problem promoting slices | 75 | * when using slices as it could cause problem promoting slices |
| 62 | * that have been forced down to 4K | 76 | * that have been forced down to 4K |
| 63 | */ | 77 | */ |
| 64 | if (slice_mm_new_context(mm)) | 78 | if (slice_mm_new_context(mm)) |
| 65 | slice_set_user_psize(mm, mmu_virtual_psize); | 79 | slice_set_user_psize(mm, mmu_virtual_psize); |
| 80 | subpage_prot_init_new_context(mm); | ||
| 66 | mm->context.id = index; | 81 | mm->context.id = index; |
| 67 | 82 | ||
| 68 | return 0; | 83 | return 0; |
| 69 | } | 84 | } |
| 70 | 85 | ||
| 71 | void destroy_context(struct mm_struct *mm) | 86 | void __destroy_context(int context_id) |
| 72 | { | 87 | { |
| 73 | spin_lock(&mmu_context_lock); | 88 | spin_lock(&mmu_context_lock); |
| 74 | idr_remove(&mmu_context_idr, mm->context.id); | 89 | ida_remove(&mmu_context_ida, context_id); |
| 75 | spin_unlock(&mmu_context_lock); | 90 | spin_unlock(&mmu_context_lock); |
| 91 | } | ||
| 92 | EXPORT_SYMBOL_GPL(__destroy_context); | ||
| 76 | 93 | ||
| 94 | void destroy_context(struct mm_struct *mm) | ||
| 95 | { | ||
| 96 | __destroy_context(mm->context.id); | ||
| 97 | subpage_prot_free(mm); | ||
| 77 | mm->context.id = NO_CONTEXT; | 98 | mm->context.id = NO_CONTEXT; |
| 78 | } | 99 | } |
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index be4f34c30a0b..1f2d9ff09895 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
| 48 | #include <linux/notifier.h> | 48 | #include <linux/notifier.h> |
| 49 | #include <linux/cpu.h> | 49 | #include <linux/cpu.h> |
| 50 | #include <linux/slab.h> | ||
| 50 | 51 | ||
| 51 | #include <asm/mmu_context.h> | 52 | #include <asm/mmu_context.h> |
| 52 | #include <asm/tlbflush.h> | 53 | #include <asm/tlbflush.h> |
| @@ -56,7 +57,7 @@ static unsigned int next_context, nr_free_contexts; | |||
| 56 | static unsigned long *context_map; | 57 | static unsigned long *context_map; |
| 57 | static unsigned long *stale_map[NR_CPUS]; | 58 | static unsigned long *stale_map[NR_CPUS]; |
| 58 | static struct mm_struct **context_mm; | 59 | static struct mm_struct **context_mm; |
| 59 | static DEFINE_SPINLOCK(context_lock); | 60 | static DEFINE_RAW_SPINLOCK(context_lock); |
| 60 | 61 | ||
| 61 | #define CTX_MAP_SIZE \ | 62 | #define CTX_MAP_SIZE \ |
| 62 | (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) | 63 | (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) |
| @@ -121,9 +122,9 @@ static unsigned int steal_context_smp(unsigned int id) | |||
| 121 | /* This will happen if you have more CPUs than available contexts, | 122 | /* This will happen if you have more CPUs than available contexts, |
| 122 | * all we can do here is wait a bit and try again | 123 | * all we can do here is wait a bit and try again |
| 123 | */ | 124 | */ |
| 124 | spin_unlock(&context_lock); | 125 | raw_spin_unlock(&context_lock); |
| 125 | cpu_relax(); | 126 | cpu_relax(); |
| 126 | spin_lock(&context_lock); | 127 | raw_spin_lock(&context_lock); |
| 127 | 128 | ||
| 128 | /* This will cause the caller to try again */ | 129 | /* This will cause the caller to try again */ |
| 129 | return MMU_NO_CONTEXT; | 130 | return MMU_NO_CONTEXT; |
| @@ -194,7 +195,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
| 194 | unsigned long *map; | 195 | unsigned long *map; |
| 195 | 196 | ||
| 196 | /* No lockless fast path .. yet */ | 197 | /* No lockless fast path .. yet */ |
| 197 | spin_lock(&context_lock); | 198 | raw_spin_lock(&context_lock); |
| 198 | 199 | ||
| 199 | pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", | 200 | pr_hard("[%d] activating context for mm @%p, active=%d, id=%d", |
| 200 | cpu, next, next->context.active, next->context.id); | 201 | cpu, next, next->context.active, next->context.id); |
| @@ -278,7 +279,7 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
| 278 | /* Flick the MMU and release lock */ | 279 | /* Flick the MMU and release lock */ |
| 279 | pr_hardcont(" -> %d\n", id); | 280 | pr_hardcont(" -> %d\n", id); |
| 280 | set_context(id, next->pgd); | 281 | set_context(id, next->pgd); |
| 281 | spin_unlock(&context_lock); | 282 | raw_spin_unlock(&context_lock); |
| 282 | } | 283 | } |
| 283 | 284 | ||
| 284 | /* | 285 | /* |
| @@ -307,7 +308,7 @@ void destroy_context(struct mm_struct *mm) | |||
| 307 | 308 | ||
| 308 | WARN_ON(mm->context.active != 0); | 309 | WARN_ON(mm->context.active != 0); |
| 309 | 310 | ||
| 310 | spin_lock_irqsave(&context_lock, flags); | 311 | raw_spin_lock_irqsave(&context_lock, flags); |
| 311 | id = mm->context.id; | 312 | id = mm->context.id; |
| 312 | if (id != MMU_NO_CONTEXT) { | 313 | if (id != MMU_NO_CONTEXT) { |
| 313 | __clear_bit(id, context_map); | 314 | __clear_bit(id, context_map); |
| @@ -318,7 +319,7 @@ void destroy_context(struct mm_struct *mm) | |||
| 318 | context_mm[id] = NULL; | 319 | context_mm[id] = NULL; |
| 319 | nr_free_contexts++; | 320 | nr_free_contexts++; |
| 320 | } | 321 | } |
| 321 | spin_unlock_irqrestore(&context_lock, flags); | 322 | raw_spin_unlock_irqrestore(&context_lock, flags); |
| 322 | } | 323 | } |
| 323 | 324 | ||
| 324 | #ifdef CONFIG_SMP | 325 | #ifdef CONFIG_SMP |
| @@ -353,7 +354,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
| 353 | read_lock(&tasklist_lock); | 354 | read_lock(&tasklist_lock); |
| 354 | for_each_process(p) { | 355 | for_each_process(p) { |
| 355 | if (p->mm) | 356 | if (p->mm) |
| 356 | cpu_mask_clear_cpu(cpu, mm_cpumask(p->mm)); | 357 | cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); |
| 357 | } | 358 | } |
| 358 | read_unlock(&tasklist_lock); | 359 | read_unlock(&tasklist_lock); |
| 359 | break; | 360 | break; |
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d2e5321d5ea6..d49a77503e19 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h | |||
| @@ -98,23 +98,13 @@ extern void _tlbia(void); | |||
| 98 | 98 | ||
| 99 | #ifdef CONFIG_PPC32 | 99 | #ifdef CONFIG_PPC32 |
| 100 | 100 | ||
| 101 | struct tlbcam { | ||
| 102 | u32 MAS0; | ||
| 103 | u32 MAS1; | ||
| 104 | u32 MAS2; | ||
| 105 | u32 MAS3; | ||
| 106 | u32 MAS7; | ||
| 107 | }; | ||
| 108 | |||
| 109 | extern void mapin_ram(void); | 101 | extern void mapin_ram(void); |
| 110 | extern int map_page(unsigned long va, phys_addr_t pa, int flags); | 102 | extern int map_page(unsigned long va, phys_addr_t pa, int flags); |
| 111 | extern void setbat(int index, unsigned long virt, phys_addr_t phys, | 103 | extern void setbat(int index, unsigned long virt, phys_addr_t phys, |
| 112 | unsigned int size, int flags); | 104 | unsigned int size, int flags); |
| 113 | extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, | ||
| 114 | unsigned int size, int flags, unsigned int pid); | ||
| 115 | extern void invalidate_tlbcam_entry(int index); | ||
| 116 | 105 | ||
| 117 | extern int __map_without_bats; | 106 | extern int __map_without_bats; |
| 107 | extern int __allow_ioremap_reserved; | ||
| 118 | extern unsigned long ioremap_base; | 108 | extern unsigned long ioremap_base; |
| 119 | extern unsigned int rtas_data, rtas_size; | 109 | extern unsigned int rtas_data, rtas_size; |
| 120 | 110 | ||
| @@ -136,24 +126,32 @@ extern phys_addr_t total_lowmem; | |||
| 136 | extern phys_addr_t memstart_addr; | 126 | extern phys_addr_t memstart_addr; |
| 137 | extern phys_addr_t lowmem_end_addr; | 127 | extern phys_addr_t lowmem_end_addr; |
| 138 | 128 | ||
| 129 | #ifdef CONFIG_WII | ||
| 130 | extern unsigned long wii_hole_start; | ||
| 131 | extern unsigned long wii_hole_size; | ||
| 132 | |||
| 133 | extern unsigned long wii_mmu_mapin_mem2(unsigned long top); | ||
| 134 | extern void wii_memory_fixups(void); | ||
| 135 | #endif | ||
| 136 | |||
| 139 | /* ...and now those things that may be slightly different between processor | 137 | /* ...and now those things that may be slightly different between processor |
| 140 | * architectures. -- Dan | 138 | * architectures. -- Dan |
| 141 | */ | 139 | */ |
| 142 | #if defined(CONFIG_8xx) | 140 | #if defined(CONFIG_8xx) |
| 143 | #define MMU_init_hw() do { } while(0) | 141 | #define MMU_init_hw() do { } while(0) |
| 144 | #define mmu_mapin_ram() (0UL) | 142 | #define mmu_mapin_ram(top) (0UL) |
| 145 | 143 | ||
| 146 | #elif defined(CONFIG_4xx) | 144 | #elif defined(CONFIG_4xx) |
| 147 | extern void MMU_init_hw(void); | 145 | extern void MMU_init_hw(void); |
| 148 | extern unsigned long mmu_mapin_ram(void); | 146 | extern unsigned long mmu_mapin_ram(unsigned long top); |
| 149 | 147 | ||
| 150 | #elif defined(CONFIG_FSL_BOOKE) | 148 | #elif defined(CONFIG_FSL_BOOKE) |
| 151 | extern void MMU_init_hw(void); | 149 | extern void MMU_init_hw(void); |
| 152 | extern unsigned long mmu_mapin_ram(void); | 150 | extern unsigned long mmu_mapin_ram(unsigned long top); |
| 153 | extern void adjust_total_lowmem(void); | 151 | extern void adjust_total_lowmem(void); |
| 154 | 152 | ||
| 155 | #elif defined(CONFIG_PPC32) | 153 | #elif defined(CONFIG_PPC32) |
| 156 | /* anything 32-bit except 4xx or 8xx */ | 154 | /* anything 32-bit except 4xx or 8xx */ |
| 157 | extern void MMU_init_hw(void); | 155 | extern void MMU_init_hw(void); |
| 158 | extern unsigned long mmu_mapin_ram(void); | 156 | extern unsigned long mmu_mapin_ram(unsigned long top); |
| 159 | #endif | 157 | #endif |
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b037d95eeadc..eaa7633515b7 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
| @@ -242,10 +242,11 @@ EXPORT_SYMBOL_GPL(of_node_to_nid); | |||
| 242 | */ | 242 | */ |
| 243 | static int __init find_min_common_depth(void) | 243 | static int __init find_min_common_depth(void) |
| 244 | { | 244 | { |
| 245 | int depth; | 245 | int depth, index; |
| 246 | const unsigned int *ref_points; | 246 | const unsigned int *ref_points; |
| 247 | struct device_node *rtas_root; | 247 | struct device_node *rtas_root; |
| 248 | unsigned int len; | 248 | unsigned int len; |
| 249 | struct device_node *options; | ||
| 249 | 250 | ||
| 250 | rtas_root = of_find_node_by_path("/rtas"); | 251 | rtas_root = of_find_node_by_path("/rtas"); |
| 251 | 252 | ||
| @@ -258,11 +259,23 @@ static int __init find_min_common_depth(void) | |||
| 258 | * configuration (should be all 0's) and the second is for a normal | 259 | * configuration (should be all 0's) and the second is for a normal |
| 259 | * NUMA configuration. | 260 | * NUMA configuration. |
| 260 | */ | 261 | */ |
| 262 | index = 1; | ||
| 261 | ref_points = of_get_property(rtas_root, | 263 | ref_points = of_get_property(rtas_root, |
| 262 | "ibm,associativity-reference-points", &len); | 264 | "ibm,associativity-reference-points", &len); |
| 263 | 265 | ||
| 266 | /* | ||
| 267 | * For type 1 affinity information we want the first field | ||
| 268 | */ | ||
| 269 | options = of_find_node_by_path("/options"); | ||
| 270 | if (options) { | ||
| 271 | const char *str; | ||
| 272 | str = of_get_property(options, "ibm,associativity-form", NULL); | ||
| 273 | if (str && !strcmp(str, "1")) | ||
| 274 | index = 0; | ||
| 275 | } | ||
| 276 | |||
| 264 | if ((len >= 2 * sizeof(unsigned int)) && ref_points) { | 277 | if ((len >= 2 * sizeof(unsigned int)) && ref_points) { |
| 265 | depth = ref_points[1]; | 278 | depth = ref_points[index]; |
| 266 | } else { | 279 | } else { |
| 267 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); | 280 | dbg("NUMA: ibm,associativity-reference-points not found.\n"); |
| 268 | depth = -1; | 281 | depth = -1; |
| @@ -451,7 +464,7 @@ static int __cpuinit numa_setup_cpu(unsigned long lcpu) | |||
| 451 | nid = of_node_to_nid_single(cpu); | 464 | nid = of_node_to_nid_single(cpu); |
| 452 | 465 | ||
| 453 | if (nid < 0 || !node_online(nid)) | 466 | if (nid < 0 || !node_online(nid)) |
| 454 | nid = any_online_node(NODE_MASK_ALL); | 467 | nid = first_online_node; |
| 455 | out: | 468 | out: |
| 456 | map_cpu_to_node(lcpu, nid); | 469 | map_cpu_to_node(lcpu, nid); |
| 457 | 470 | ||
| @@ -1114,7 +1127,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
| 1114 | int nid, found = 0; | 1127 | int nid, found = 0; |
| 1115 | 1128 | ||
| 1116 | if (!numa_enabled || (min_common_depth < 0)) | 1129 | if (!numa_enabled || (min_common_depth < 0)) |
| 1117 | return any_online_node(NODE_MASK_ALL); | 1130 | return first_online_node; |
| 1118 | 1131 | ||
| 1119 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); | 1132 | memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); |
| 1120 | if (memory) { | 1133 | if (memory) { |
| @@ -1125,7 +1138,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr) | |||
| 1125 | } | 1138 | } |
| 1126 | 1139 | ||
| 1127 | if (nid < 0 || !node_online(nid)) | 1140 | if (nid < 0 || !node_online(nid)) |
| 1128 | nid = any_online_node(NODE_MASK_ALL); | 1141 | nid = first_online_node; |
| 1129 | 1142 | ||
| 1130 | if (NODE_DATA(nid)->node_spanned_pages) | 1143 | if (NODE_DATA(nid)->node_spanned_pages) |
| 1131 | return nid; | 1144 | return nid; |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 53040931de32..ebc2f38eb381 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | */ | 22 | */ |
| 23 | 23 | ||
| 24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
| 25 | #include <linux/gfp.h> | ||
| 25 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
| 26 | #include <linux/init.h> | 27 | #include <linux/init.h> |
| 27 | #include <linux/percpu.h> | 28 | #include <linux/percpu.h> |
| @@ -49,12 +50,12 @@ struct pte_freelist_batch | |||
| 49 | { | 50 | { |
| 50 | struct rcu_head rcu; | 51 | struct rcu_head rcu; |
| 51 | unsigned int index; | 52 | unsigned int index; |
| 52 | pgtable_free_t tables[0]; | 53 | unsigned long tables[0]; |
| 53 | }; | 54 | }; |
| 54 | 55 | ||
| 55 | #define PTE_FREELIST_SIZE \ | 56 | #define PTE_FREELIST_SIZE \ |
| 56 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ | 57 | ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ |
| 57 | / sizeof(pgtable_free_t)) | 58 | / sizeof(unsigned long)) |
| 58 | 59 | ||
| 59 | static void pte_free_smp_sync(void *arg) | 60 | static void pte_free_smp_sync(void *arg) |
| 60 | { | 61 | { |
| @@ -64,13 +65,13 @@ static void pte_free_smp_sync(void *arg) | |||
| 64 | /* This is only called when we are critically out of memory | 65 | /* This is only called when we are critically out of memory |
| 65 | * (and fail to get a page in pte_free_tlb). | 66 | * (and fail to get a page in pte_free_tlb). |
| 66 | */ | 67 | */ |
| 67 | static void pgtable_free_now(pgtable_free_t pgf) | 68 | static void pgtable_free_now(void *table, unsigned shift) |
| 68 | { | 69 | { |
| 69 | pte_freelist_forced_free++; | 70 | pte_freelist_forced_free++; |
| 70 | 71 | ||
| 71 | smp_call_function(pte_free_smp_sync, NULL, 1); | 72 | smp_call_function(pte_free_smp_sync, NULL, 1); |
| 72 | 73 | ||
| 73 | pgtable_free(pgf); | 74 | pgtable_free(table, shift); |
| 74 | } | 75 | } |
| 75 | 76 | ||
| 76 | static void pte_free_rcu_callback(struct rcu_head *head) | 77 | static void pte_free_rcu_callback(struct rcu_head *head) |
| @@ -79,8 +80,12 @@ static void pte_free_rcu_callback(struct rcu_head *head) | |||
| 79 | container_of(head, struct pte_freelist_batch, rcu); | 80 | container_of(head, struct pte_freelist_batch, rcu); |
| 80 | unsigned int i; | 81 | unsigned int i; |
| 81 | 82 | ||
| 82 | for (i = 0; i < batch->index; i++) | 83 | for (i = 0; i < batch->index; i++) { |
| 83 | pgtable_free(batch->tables[i]); | 84 | void *table = (void *)(batch->tables[i] & ~MAX_PGTABLE_INDEX_SIZE); |
| 85 | unsigned shift = batch->tables[i] & MAX_PGTABLE_INDEX_SIZE; | ||
| 86 | |||
| 87 | pgtable_free(table, shift); | ||
| 88 | } | ||
| 84 | 89 | ||
| 85 | free_page((unsigned long)batch); | 90 | free_page((unsigned long)batch); |
| 86 | } | 91 | } |
| @@ -91,25 +96,28 @@ static void pte_free_submit(struct pte_freelist_batch *batch) | |||
| 91 | call_rcu(&batch->rcu, pte_free_rcu_callback); | 96 | call_rcu(&batch->rcu, pte_free_rcu_callback); |
| 92 | } | 97 | } |
| 93 | 98 | ||
| 94 | void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) | 99 | void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift) |
| 95 | { | 100 | { |
| 96 | /* This is safe since tlb_gather_mmu has disabled preemption */ | 101 | /* This is safe since tlb_gather_mmu has disabled preemption */ |
| 97 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); | 102 | struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur); |
| 103 | unsigned long pgf; | ||
| 98 | 104 | ||
| 99 | if (atomic_read(&tlb->mm->mm_users) < 2 || | 105 | if (atomic_read(&tlb->mm->mm_users) < 2 || |
| 100 | cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ | 106 | cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){ |
| 101 | pgtable_free(pgf); | 107 | pgtable_free(table, shift); |
| 102 | return; | 108 | return; |
| 103 | } | 109 | } |
| 104 | 110 | ||
| 105 | if (*batchp == NULL) { | 111 | if (*batchp == NULL) { |
| 106 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); | 112 | *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); |
| 107 | if (*batchp == NULL) { | 113 | if (*batchp == NULL) { |
| 108 | pgtable_free_now(pgf); | 114 | pgtable_free_now(table, shift); |
| 109 | return; | 115 | return; |
| 110 | } | 116 | } |
| 111 | (*batchp)->index = 0; | 117 | (*batchp)->index = 0; |
| 112 | } | 118 | } |
| 119 | BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE); | ||
| 120 | pgf = (unsigned long)table | shift; | ||
| 113 | (*batchp)->tables[(*batchp)->index++] = pgf; | 121 | (*batchp)->tables[(*batchp)->index++] = pgf; |
| 114 | if ((*batchp)->index == PTE_FREELIST_SIZE) { | 122 | if ((*batchp)->index == PTE_FREELIST_SIZE) { |
| 115 | pte_free_submit(*batchp); | 123 | pte_free_submit(*batchp); |
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cb96cb2e17cc..b9243e7557ae 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c | |||
| @@ -26,6 +26,8 @@ | |||
| 26 | #include <linux/vmalloc.h> | 26 | #include <linux/vmalloc.h> |
| 27 | #include <linux/init.h> | 27 | #include <linux/init.h> |
| 28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
| 29 | #include <linux/lmb.h> | ||
| 30 | #include <linux/slab.h> | ||
| 29 | 31 | ||
| 30 | #include <asm/pgtable.h> | 32 | #include <asm/pgtable.h> |
| 31 | #include <asm/pgalloc.h> | 33 | #include <asm/pgalloc.h> |
| @@ -191,7 +193,8 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, | |||
| 191 | * Don't allow anybody to remap normal RAM that we're using. | 193 | * Don't allow anybody to remap normal RAM that we're using. |
| 192 | * mem_init() sets high_memory so only do the check after that. | 194 | * mem_init() sets high_memory so only do the check after that. |
| 193 | */ | 195 | */ |
| 194 | if (mem_init_done && (p < virt_to_phys(high_memory))) { | 196 | if (mem_init_done && (p < virt_to_phys(high_memory)) && |
| 197 | !(__allow_ioremap_reserved && lmb_is_region_reserved(p, size))) { | ||
| 195 | printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", | 198 | printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", |
| 196 | (unsigned long long)p, __builtin_return_address(0)); | 199 | (unsigned long long)p, __builtin_return_address(0)); |
| 197 | return NULL; | 200 | return NULL; |
| @@ -283,18 +286,18 @@ int map_page(unsigned long va, phys_addr_t pa, int flags) | |||
| 283 | } | 286 | } |
| 284 | 287 | ||
| 285 | /* | 288 | /* |
| 286 | * Map in a big chunk of physical memory starting at PAGE_OFFSET. | 289 | * Map in a chunk of physical memory starting at start. |
| 287 | */ | 290 | */ |
| 288 | void __init mapin_ram(void) | 291 | void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) |
| 289 | { | 292 | { |
| 290 | unsigned long v, s, f; | 293 | unsigned long v, s, f; |
| 291 | phys_addr_t p; | 294 | phys_addr_t p; |
| 292 | int ktext; | 295 | int ktext; |
| 293 | 296 | ||
| 294 | s = mmu_mapin_ram(); | 297 | s = offset; |
| 295 | v = PAGE_OFFSET + s; | 298 | v = PAGE_OFFSET + s; |
| 296 | p = memstart_addr + s; | 299 | p = memstart_addr + s; |
| 297 | for (; s < total_lowmem; s += PAGE_SIZE) { | 300 | for (; s < top; s += PAGE_SIZE) { |
| 298 | ktext = ((char *) v >= _stext && (char *) v < etext); | 301 | ktext = ((char *) v >= _stext && (char *) v < etext); |
| 299 | f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL; | 302 | f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL; |
| 300 | map_page(v, p, f); | 303 | map_page(v, p, f); |
| @@ -307,6 +310,30 @@ void __init mapin_ram(void) | |||
| 307 | } | 310 | } |
| 308 | } | 311 | } |
| 309 | 312 | ||
| 313 | void __init mapin_ram(void) | ||
| 314 | { | ||
| 315 | unsigned long s, top; | ||
| 316 | |||
| 317 | #ifndef CONFIG_WII | ||
| 318 | top = total_lowmem; | ||
| 319 | s = mmu_mapin_ram(top); | ||
| 320 | __mapin_ram_chunk(s, top); | ||
| 321 | #else | ||
| 322 | if (!wii_hole_size) { | ||
| 323 | s = mmu_mapin_ram(total_lowmem); | ||
| 324 | __mapin_ram_chunk(s, total_lowmem); | ||
| 325 | } else { | ||
| 326 | top = wii_hole_start; | ||
| 327 | s = mmu_mapin_ram(top); | ||
| 328 | __mapin_ram_chunk(s, top); | ||
| 329 | |||
| 330 | top = lmb_end_of_DRAM(); | ||
| 331 | s = wii_mmu_mapin_mem2(top); | ||
| 332 | __mapin_ram_chunk(s, top); | ||
| 333 | } | ||
| 334 | #endif | ||
| 335 | } | ||
| 336 | |||
| 310 | /* Scan the real Linux page tables and return a PTE pointer for | 337 | /* Scan the real Linux page tables and return a PTE pointer for |
| 311 | * a virtual address in a context. | 338 | * a virtual address in a context. |
| 312 | * Returns true (1) if PTE was found, zero otherwise. The pointer to | 339 | * Returns true (1) if PTE was found, zero otherwise. The pointer to |
| @@ -356,7 +383,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot) | |||
| 356 | return 0; | 383 | return 0; |
| 357 | if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) | 384 | if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) |
| 358 | return -EINVAL; | 385 | return -EINVAL; |
| 359 | set_pte_at(&init_mm, address, kpte, mk_pte(page, prot)); | 386 | __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); |
| 360 | wmb(); | 387 | wmb(); |
| 361 | #ifdef CONFIG_PPC_STD_MMU | 388 | #ifdef CONFIG_PPC_STD_MMU |
| 362 | flush_hash_pages(0, address, pmd_val(*kpmd), 1); | 389 | flush_hash_pages(0, address, pmd_val(*kpmd), 1); |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 853d5565eed5..d95679a5fb29 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
| @@ -35,6 +35,7 @@ | |||
| 35 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 36 | #include <linux/bootmem.h> | 36 | #include <linux/bootmem.h> |
| 37 | #include <linux/lmb.h> | 37 | #include <linux/lmb.h> |
| 38 | #include <linux/slab.h> | ||
| 38 | 39 | ||
| 39 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
| 40 | #include <asm/page.h> | 41 | #include <asm/page.h> |
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 2d2a87e10154..f11c2cdcb0fe 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c | |||
| @@ -72,7 +72,7 @@ unsigned long p_mapped_by_bats(phys_addr_t pa) | |||
| 72 | return 0; | 72 | return 0; |
| 73 | } | 73 | } |
| 74 | 74 | ||
| 75 | unsigned long __init mmu_mapin_ram(void) | 75 | unsigned long __init mmu_mapin_ram(unsigned long top) |
| 76 | { | 76 | { |
| 77 | unsigned long tot, bl, done; | 77 | unsigned long tot, bl, done; |
| 78 | unsigned long max_size = (256<<20); | 78 | unsigned long max_size = (256<<20); |
| @@ -86,7 +86,7 @@ unsigned long __init mmu_mapin_ram(void) | |||
| 86 | 86 | ||
| 87 | /* Make sure we don't map a block larger than the | 87 | /* Make sure we don't map a block larger than the |
| 88 | smallest alignment of the physical address. */ | 88 | smallest alignment of the physical address. */ |
| 89 | tot = total_lowmem; | 89 | tot = top; |
| 90 | for (bl = 128<<10; bl < max_size; bl <<= 1) { | 90 | for (bl = 128<<10; bl < max_size; bl <<= 1) { |
| 91 | if (bl * 2 > tot) | 91 | if (bl * 2 > tot) |
| 92 | break; | 92 | break; |
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 4cafc0c33d0a..e4f8f1fc81a5 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c | |||
| @@ -10,7 +10,6 @@ | |||
| 10 | #include <linux/errno.h> | 10 | #include <linux/errno.h> |
| 11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
| 12 | #include <linux/gfp.h> | 12 | #include <linux/gfp.h> |
| 13 | #include <linux/slab.h> | ||
| 14 | #include <linux/types.h> | 13 | #include <linux/types.h> |
| 15 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
| 16 | #include <linux/hugetlb.h> | 15 | #include <linux/hugetlb.h> |
| @@ -24,9 +23,9 @@ | |||
| 24 | * Also makes sure that the subpage_prot_table structure is | 23 | * Also makes sure that the subpage_prot_table structure is |
| 25 | * reinitialized for the next user. | 24 | * reinitialized for the next user. |
| 26 | */ | 25 | */ |
| 27 | void subpage_prot_free(pgd_t *pgd) | 26 | void subpage_prot_free(struct mm_struct *mm) |
| 28 | { | 27 | { |
| 29 | struct subpage_prot_table *spt = pgd_subpage_prot(pgd); | 28 | struct subpage_prot_table *spt = &mm->context.spt; |
| 30 | unsigned long i, j, addr; | 29 | unsigned long i, j, addr; |
| 31 | u32 **p; | 30 | u32 **p; |
| 32 | 31 | ||
| @@ -51,6 +50,13 @@ void subpage_prot_free(pgd_t *pgd) | |||
| 51 | spt->maxaddr = 0; | 50 | spt->maxaddr = 0; |
| 52 | } | 51 | } |
| 53 | 52 | ||
| 53 | void subpage_prot_init_new_context(struct mm_struct *mm) | ||
| 54 | { | ||
| 55 | struct subpage_prot_table *spt = &mm->context.spt; | ||
| 56 | |||
| 57 | memset(spt, 0, sizeof(*spt)); | ||
| 58 | } | ||
| 59 | |||
| 54 | static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, | 60 | static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, |
| 55 | int npages) | 61 | int npages) |
| 56 | { | 62 | { |
| @@ -87,7 +93,7 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr, | |||
| 87 | static void subpage_prot_clear(unsigned long addr, unsigned long len) | 93 | static void subpage_prot_clear(unsigned long addr, unsigned long len) |
| 88 | { | 94 | { |
| 89 | struct mm_struct *mm = current->mm; | 95 | struct mm_struct *mm = current->mm; |
| 90 | struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); | 96 | struct subpage_prot_table *spt = &mm->context.spt; |
| 91 | u32 **spm, *spp; | 97 | u32 **spm, *spp; |
| 92 | int i, nw; | 98 | int i, nw; |
| 93 | unsigned long next, limit; | 99 | unsigned long next, limit; |
| @@ -136,7 +142,7 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) | |||
| 136 | long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) | 142 | long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) |
| 137 | { | 143 | { |
| 138 | struct mm_struct *mm = current->mm; | 144 | struct mm_struct *mm = current->mm; |
| 139 | struct subpage_prot_table *spt = pgd_subpage_prot(mm->pgd); | 145 | struct subpage_prot_table *spt = &mm->context.spt; |
| 140 | u32 **spm, *spp; | 146 | u32 **spm, *spp; |
| 141 | int i, nw; | 147 | int i, nw; |
| 142 | unsigned long next, limit; | 148 | unsigned long next, limit; |
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 2b2f35f6985e..1ec06576f619 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
| @@ -53,11 +53,6 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
| 53 | 53 | ||
| 54 | i = batch->index; | 54 | i = batch->index; |
| 55 | 55 | ||
| 56 | /* We mask the address for the base page size. Huge pages will | ||
| 57 | * have applied their own masking already | ||
| 58 | */ | ||
| 59 | addr &= PAGE_MASK; | ||
| 60 | |||
| 61 | /* Get page size (maybe move back to caller). | 56 | /* Get page size (maybe move back to caller). |
| 62 | * | 57 | * |
| 63 | * NOTE: when using special 64K mappings in 4K environment like | 58 | * NOTE: when using special 64K mappings in 4K environment like |
| @@ -68,12 +63,21 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, | |||
| 68 | if (huge) { | 63 | if (huge) { |
| 69 | #ifdef CONFIG_HUGETLB_PAGE | 64 | #ifdef CONFIG_HUGETLB_PAGE |
| 70 | psize = get_slice_psize(mm, addr); | 65 | psize = get_slice_psize(mm, addr); |
| 66 | /* Mask the address for the correct page size */ | ||
| 67 | addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1); | ||
| 71 | #else | 68 | #else |
| 72 | BUG(); | 69 | BUG(); |
| 73 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ | 70 | psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |
| 74 | #endif | 71 | #endif |
| 75 | } else | 72 | } else { |
| 76 | psize = pte_pagesize_index(mm, addr, pte); | 73 | psize = pte_pagesize_index(mm, addr, pte); |
| 74 | /* Mask the address for the standard page size. If we | ||
| 75 | * have a 64k page kernel, but the hardware does not | ||
| 76 | * support 64k pages, this might be different from the | ||
| 77 | * hardware page size encoded in the slice table. */ | ||
| 78 | addr &= PAGE_MASK; | ||
| 79 | } | ||
| 80 | |||
| 77 | 81 | ||
| 78 | /* Build full vaddr */ | 82 | /* Build full vaddr */ |
| 79 | if (!is_kernel_addr(addr)) { | 83 | if (!is_kernel_addr(addr)) { |
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index f288279e679d..8b04c54e596f 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Low leve TLB miss handlers for Book3E | 2 | * Low level TLB miss handlers for Book3E |
| 3 | * | 3 | * |
| 4 | * Copyright (C) 2008-2009 | 4 | * Copyright (C) 2008-2009 |
| 5 | * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. | 5 | * Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp. |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 2fbc680c2c71..e81d5d67f834 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
| @@ -150,7 +150,7 @@ EXPORT_SYMBOL(local_flush_tlb_page); | |||
| 150 | */ | 150 | */ |
| 151 | #ifdef CONFIG_SMP | 151 | #ifdef CONFIG_SMP |
| 152 | 152 | ||
| 153 | static DEFINE_SPINLOCK(tlbivax_lock); | 153 | static DEFINE_RAW_SPINLOCK(tlbivax_lock); |
| 154 | 154 | ||
| 155 | static int mm_is_core_local(struct mm_struct *mm) | 155 | static int mm_is_core_local(struct mm_struct *mm) |
| 156 | { | 156 | { |
| @@ -232,10 +232,10 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, | |||
| 232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { | 232 | if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) { |
| 233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); | 233 | int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL); |
| 234 | if (lock) | 234 | if (lock) |
| 235 | spin_lock(&tlbivax_lock); | 235 | raw_spin_lock(&tlbivax_lock); |
| 236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); | 236 | _tlbivax_bcast(vmaddr, pid, tsize, ind); |
| 237 | if (lock) | 237 | if (lock) |
| 238 | spin_unlock(&tlbivax_lock); | 238 | raw_spin_unlock(&tlbivax_lock); |
| 239 | goto bail; | 239 | goto bail; |
| 240 | } else { | 240 | } else { |
| 241 | struct tlb_flush_param p = { | 241 | struct tlb_flush_param p = { |
