diff options
author | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2013-08-15 14:11:45 -0400 |
commit | ee3e542fec6e69bc9fb668698889a37d93950ddf (patch) | |
tree | e74ee766a4764769ef1d3d45d266b4dea64101d3 /arch/powerpc/mm | |
parent | fe2a801b50c0bb8039d627e5ae1fec249d10ff39 (diff) | |
parent | f1d6e17f540af37bb1891480143669ba7636c4cf (diff) |
Merge remote-tracking branch 'linus/master' into testing
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/Makefile | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/gup.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 21 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 207 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 67 | ||||
-rw-r--r-- | arch/powerpc/mm/hugepage-hash64.c | 175 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 301 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 63 | ||||
-rw-r--r-- | arch/powerpc/mm/mmap.c (renamed from arch/powerpc/mm/mmap_64.c) | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 71 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 8 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 414 | ||||
-rw-r--r-- | arch/powerpc/mm/subpage-prot.c | 48 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 40 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 2 |
19 files changed, 1139 insertions, 338 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 2c9441ee6bb8..82b1ff759e26 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c | |||
@@ -41,7 +41,7 @@ int icache_44x_need_flush; | |||
41 | 41 | ||
42 | unsigned long tlb_47x_boltmap[1024/8]; | 42 | unsigned long tlb_47x_boltmap[1024/8]; |
43 | 43 | ||
44 | static void __cpuinit ppc44x_update_tlb_hwater(void) | 44 | static void ppc44x_update_tlb_hwater(void) |
45 | { | 45 | { |
46 | extern unsigned int tlb_44x_patch_hwater_D[]; | 46 | extern unsigned int tlb_44x_patch_hwater_D[]; |
47 | extern unsigned int tlb_44x_patch_hwater_I[]; | 47 | extern unsigned int tlb_44x_patch_hwater_I[]; |
@@ -134,7 +134,7 @@ static void __init ppc47x_update_boltmap(void) | |||
134 | /* | 134 | /* |
135 | * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU | 135 | * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU |
136 | */ | 136 | */ |
137 | static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) | 137 | static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys) |
138 | { | 138 | { |
139 | unsigned int rA; | 139 | unsigned int rA; |
140 | int bolted; | 140 | int bolted; |
@@ -229,7 +229,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, | |||
229 | } | 229 | } |
230 | 230 | ||
231 | #ifdef CONFIG_SMP | 231 | #ifdef CONFIG_SMP |
232 | void __cpuinit mmu_init_secondary(int cpu) | 232 | void mmu_init_secondary(int cpu) |
233 | { | 233 | { |
234 | unsigned long addr; | 234 | unsigned long addr; |
235 | unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); | 235 | unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1); |
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index cf16b5733eaa..51230ee6a407 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile | |||
@@ -6,17 +6,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror | |||
6 | 6 | ||
7 | ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) | 7 | ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) |
8 | 8 | ||
9 | obj-y := fault.o mem.o pgtable.o gup.o \ | 9 | obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ |
10 | init_$(CONFIG_WORD_SIZE).o \ | 10 | init_$(CONFIG_WORD_SIZE).o \ |
11 | pgtable_$(CONFIG_WORD_SIZE).o | 11 | pgtable_$(CONFIG_WORD_SIZE).o |
12 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ | 12 | obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ |
13 | tlb_nohash_low.o | 13 | tlb_nohash_low.o |
14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o | 14 | obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o |
15 | obj-$(CONFIG_PPC64) += mmap_64.o | ||
16 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o | 15 | hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o |
17 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ | 16 | obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ |
18 | slb_low.o slb.o stab.o \ | 17 | slb_low.o slb.o stab.o \ |
19 | mmap_64.o $(hash64-y) | 18 | $(hash64-y) |
20 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o | 19 | obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o |
21 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ | 20 | obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ |
22 | tlb_hash$(CONFIG_WORD_SIZE).o \ | 21 | tlb_hash$(CONFIG_WORD_SIZE).o \ |
@@ -28,11 +27,12 @@ obj-$(CONFIG_44x) += 44x_mmu.o | |||
28 | obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o | 27 | obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o |
29 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o | 28 | obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o |
30 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o | 29 | obj-$(CONFIG_PPC_MM_SLICES) += slice.o |
31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) | ||
32 | obj-y += hugetlbpage.o | 30 | obj-y += hugetlbpage.o |
31 | ifeq ($(CONFIG_HUGETLB_PAGE),y) | ||
33 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o | 32 | obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o |
34 | obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o | 33 | obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o |
35 | endif | 34 | endif |
35 | obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o | ||
36 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o | 36 | obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o |
37 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o | 37 | obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o |
38 | obj-$(CONFIG_HIGHMEM) += highmem.o | 38 | obj-$(CONFIG_HIGHMEM) += highmem.o |
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index 4b921affa495..49822d90ea96 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c | |||
@@ -34,7 +34,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
34 | 34 | ||
35 | ptep = pte_offset_kernel(&pmd, addr); | 35 | ptep = pte_offset_kernel(&pmd, addr); |
36 | do { | 36 | do { |
37 | pte_t pte = *ptep; | 37 | pte_t pte = ACCESS_ONCE(*ptep); |
38 | struct page *page; | 38 | struct page *page; |
39 | 39 | ||
40 | if ((pte_val(pte) & mask) != result) | 40 | if ((pte_val(pte) & mask) != result) |
@@ -63,12 +63,18 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | |||
63 | 63 | ||
64 | pmdp = pmd_offset(&pud, addr); | 64 | pmdp = pmd_offset(&pud, addr); |
65 | do { | 65 | do { |
66 | pmd_t pmd = *pmdp; | 66 | pmd_t pmd = ACCESS_ONCE(*pmdp); |
67 | 67 | ||
68 | next = pmd_addr_end(addr, end); | 68 | next = pmd_addr_end(addr, end); |
69 | if (pmd_none(pmd)) | 69 | /* |
70 | * If we find a splitting transparent hugepage we | ||
71 | * return zero. That will result in taking the slow | ||
72 | * path which will call wait_split_huge_page() | ||
73 | * if the pmd is still in splitting state | ||
74 | */ | ||
75 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
70 | return 0; | 76 | return 0; |
71 | if (pmd_huge(pmd)) { | 77 | if (pmd_huge(pmd) || pmd_large(pmd)) { |
72 | if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, | 78 | if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, |
73 | write, pages, nr)) | 79 | write, pages, nr)) |
74 | return 0; | 80 | return 0; |
@@ -91,7 +97,7 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
91 | 97 | ||
92 | pudp = pud_offset(&pgd, addr); | 98 | pudp = pud_offset(&pgd, addr); |
93 | do { | 99 | do { |
94 | pud_t pud = *pudp; | 100 | pud_t pud = ACCESS_ONCE(*pudp); |
95 | 101 | ||
96 | next = pud_addr_end(addr, end); | 102 | next = pud_addr_end(addr, end); |
97 | if (pud_none(pud)) | 103 | if (pud_none(pud)) |
@@ -154,7 +160,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
154 | 160 | ||
155 | pgdp = pgd_offset(mm, addr); | 161 | pgdp = pgd_offset(mm, addr); |
156 | do { | 162 | do { |
157 | pgd_t pgd = *pgdp; | 163 | pgd_t pgd = ACCESS_ONCE(*pgdp); |
158 | 164 | ||
159 | pr_devel(" %016lx: normal pgd %p\n", addr, | 165 | pr_devel(" %016lx: normal pgd %p\n", addr, |
160 | (void *)pgd_val(pgd)); | 166 | (void *)pgd_val(pgd)); |
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 0e980acae67c..d3cbda62857b 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S | |||
@@ -289,9 +289,10 @@ htab_modify_pte: | |||
289 | 289 | ||
290 | /* Call ppc_md.hpte_updatepp */ | 290 | /* Call ppc_md.hpte_updatepp */ |
291 | mr r5,r29 /* vpn */ | 291 | mr r5,r29 /* vpn */ |
292 | li r6,MMU_PAGE_4K /* page size */ | 292 | li r6,MMU_PAGE_4K /* base page size */ |
293 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 293 | li r7,MMU_PAGE_4K /* actual page size */ |
294 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 294 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
295 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
295 | _GLOBAL(htab_call_hpte_updatepp) | 296 | _GLOBAL(htab_call_hpte_updatepp) |
296 | bl . /* Patched by htab_finish_init() */ | 297 | bl . /* Patched by htab_finish_init() */ |
297 | 298 | ||
@@ -649,9 +650,10 @@ htab_modify_pte: | |||
649 | 650 | ||
650 | /* Call ppc_md.hpte_updatepp */ | 651 | /* Call ppc_md.hpte_updatepp */ |
651 | mr r5,r29 /* vpn */ | 652 | mr r5,r29 /* vpn */ |
652 | li r6,MMU_PAGE_4K /* page size */ | 653 | li r6,MMU_PAGE_4K /* base page size */ |
653 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 654 | li r7,MMU_PAGE_4K /* actual page size */ |
654 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 655 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
656 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
655 | _GLOBAL(htab_call_hpte_updatepp) | 657 | _GLOBAL(htab_call_hpte_updatepp) |
656 | bl . /* patched by htab_finish_init() */ | 658 | bl . /* patched by htab_finish_init() */ |
657 | 659 | ||
@@ -937,9 +939,10 @@ ht64_modify_pte: | |||
937 | 939 | ||
938 | /* Call ppc_md.hpte_updatepp */ | 940 | /* Call ppc_md.hpte_updatepp */ |
939 | mr r5,r29 /* vpn */ | 941 | mr r5,r29 /* vpn */ |
940 | li r6,MMU_PAGE_64K | 942 | li r6,MMU_PAGE_64K /* base page size */ |
941 | ld r7,STK_PARAM(R9)(r1) /* segment size */ | 943 | li r7,MMU_PAGE_64K /* actual page size */ |
942 | ld r8,STK_PARAM(R8)(r1) /* get "local" param */ | 944 | ld r8,STK_PARAM(R9)(r1) /* segment size */ |
945 | ld r9,STK_PARAM(R8)(r1) /* get "local" param */ | ||
943 | _GLOBAL(ht64_call_hpte_updatepp) | 946 | _GLOBAL(ht64_call_hpte_updatepp) |
944 | bl . /* patched by htab_finish_init() */ | 947 | bl . /* patched by htab_finish_init() */ |
945 | 948 | ||
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 4c122c3f1623..c33d939120c9 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c | |||
@@ -43,6 +43,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) | |||
43 | { | 43 | { |
44 | unsigned long va; | 44 | unsigned long va; |
45 | unsigned int penc; | 45 | unsigned int penc; |
46 | unsigned long sllp; | ||
46 | 47 | ||
47 | /* | 48 | /* |
48 | * We need 14 to 65 bits of va for a tlibe of 4K page | 49 | * We need 14 to 65 bits of va for a tlibe of 4K page |
@@ -64,7 +65,9 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) | |||
64 | /* clear out bits after (52) [0....52.....63] */ | 65 | /* clear out bits after (52) [0....52.....63] */ |
65 | va &= ~((1ul << (64 - 52)) - 1); | 66 | va &= ~((1ul << (64 - 52)) - 1); |
66 | va |= ssize << 8; | 67 | va |= ssize << 8; |
67 | va |= mmu_psize_defs[apsize].sllp << 6; | 68 | sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | |
69 | ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); | ||
70 | va |= sllp << 5; | ||
68 | asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) | 71 | asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2) |
69 | : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) | 72 | : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206) |
70 | : "memory"); | 73 | : "memory"); |
@@ -98,6 +101,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) | |||
98 | { | 101 | { |
99 | unsigned long va; | 102 | unsigned long va; |
100 | unsigned int penc; | 103 | unsigned int penc; |
104 | unsigned long sllp; | ||
101 | 105 | ||
102 | /* VPN_SHIFT can be atmost 12 */ | 106 | /* VPN_SHIFT can be atmost 12 */ |
103 | va = vpn << VPN_SHIFT; | 107 | va = vpn << VPN_SHIFT; |
@@ -113,7 +117,9 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) | |||
113 | /* clear out bits after(52) [0....52.....63] */ | 117 | /* clear out bits after(52) [0....52.....63] */ |
114 | va &= ~((1ul << (64 - 52)) - 1); | 118 | va &= ~((1ul << (64 - 52)) - 1); |
115 | va |= ssize << 8; | 119 | va |= ssize << 8; |
116 | va |= mmu_psize_defs[apsize].sllp << 6; | 120 | sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) | |
121 | ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4); | ||
122 | va |= sllp << 5; | ||
117 | asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" | 123 | asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)" |
118 | : : "r"(va) : "memory"); | 124 | : : "r"(va) : "memory"); |
119 | break; | 125 | break; |
@@ -273,61 +279,15 @@ static long native_hpte_remove(unsigned long hpte_group) | |||
273 | return i; | 279 | return i; |
274 | } | 280 | } |
275 | 281 | ||
276 | static inline int __hpte_actual_psize(unsigned int lp, int psize) | ||
277 | { | ||
278 | int i, shift; | ||
279 | unsigned int mask; | ||
280 | |||
281 | /* start from 1 ignoring MMU_PAGE_4K */ | ||
282 | for (i = 1; i < MMU_PAGE_COUNT; i++) { | ||
283 | |||
284 | /* invalid penc */ | ||
285 | if (mmu_psize_defs[psize].penc[i] == -1) | ||
286 | continue; | ||
287 | /* | ||
288 | * encoding bits per actual page size | ||
289 | * PTE LP actual page size | ||
290 | * rrrr rrrz >=8KB | ||
291 | * rrrr rrzz >=16KB | ||
292 | * rrrr rzzz >=32KB | ||
293 | * rrrr zzzz >=64KB | ||
294 | * ....... | ||
295 | */ | ||
296 | shift = mmu_psize_defs[i].shift - LP_SHIFT; | ||
297 | if (shift > LP_BITS) | ||
298 | shift = LP_BITS; | ||
299 | mask = (1 << shift) - 1; | ||
300 | if ((lp & mask) == mmu_psize_defs[psize].penc[i]) | ||
301 | return i; | ||
302 | } | ||
303 | return -1; | ||
304 | } | ||
305 | |||
306 | static inline int hpte_actual_psize(struct hash_pte *hptep, int psize) | ||
307 | { | ||
308 | /* Look at the 8 bit LP value */ | ||
309 | unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1); | ||
310 | |||
311 | if (!(hptep->v & HPTE_V_VALID)) | ||
312 | return -1; | ||
313 | |||
314 | /* First check if it is large page */ | ||
315 | if (!(hptep->v & HPTE_V_LARGE)) | ||
316 | return MMU_PAGE_4K; | ||
317 | |||
318 | return __hpte_actual_psize(lp, psize); | ||
319 | } | ||
320 | |||
321 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | 282 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, |
322 | unsigned long vpn, int psize, int ssize, | 283 | unsigned long vpn, int bpsize, |
323 | int local) | 284 | int apsize, int ssize, int local) |
324 | { | 285 | { |
325 | struct hash_pte *hptep = htab_address + slot; | 286 | struct hash_pte *hptep = htab_address + slot; |
326 | unsigned long hpte_v, want_v; | 287 | unsigned long hpte_v, want_v; |
327 | int ret = 0; | 288 | int ret = 0; |
328 | int actual_psize; | ||
329 | 289 | ||
330 | want_v = hpte_encode_avpn(vpn, psize, ssize); | 290 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
331 | 291 | ||
332 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", | 292 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)", |
333 | vpn, want_v & HPTE_V_AVPN, slot, newpp); | 293 | vpn, want_v & HPTE_V_AVPN, slot, newpp); |
@@ -335,7 +295,6 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
335 | native_lock_hpte(hptep); | 295 | native_lock_hpte(hptep); |
336 | 296 | ||
337 | hpte_v = hptep->v; | 297 | hpte_v = hptep->v; |
338 | actual_psize = hpte_actual_psize(hptep, psize); | ||
339 | /* | 298 | /* |
340 | * We need to invalidate the TLB always because hpte_remove doesn't do | 299 | * We need to invalidate the TLB always because hpte_remove doesn't do |
341 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | 300 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
@@ -343,12 +302,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
343 | * (hpte_remove) because we assume the old translation is still | 302 | * (hpte_remove) because we assume the old translation is still |
344 | * technically "valid". | 303 | * technically "valid". |
345 | */ | 304 | */ |
346 | if (actual_psize < 0) { | 305 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { |
347 | actual_psize = psize; | ||
348 | ret = -1; | ||
349 | goto err_out; | ||
350 | } | ||
351 | if (!HPTE_V_COMPARE(hpte_v, want_v)) { | ||
352 | DBG_LOW(" -> miss\n"); | 306 | DBG_LOW(" -> miss\n"); |
353 | ret = -1; | 307 | ret = -1; |
354 | } else { | 308 | } else { |
@@ -357,11 +311,10 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, | |||
357 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | | 311 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | |
358 | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); | 312 | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); |
359 | } | 313 | } |
360 | err_out: | ||
361 | native_unlock_hpte(hptep); | 314 | native_unlock_hpte(hptep); |
362 | 315 | ||
363 | /* Ensure it is out of the tlb too. */ | 316 | /* Ensure it is out of the tlb too. */ |
364 | tlbie(vpn, psize, actual_psize, ssize, local); | 317 | tlbie(vpn, bpsize, apsize, ssize, local); |
365 | 318 | ||
366 | return ret; | 319 | return ret; |
367 | } | 320 | } |
@@ -402,7 +355,6 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize) | |||
402 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | 355 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, |
403 | int psize, int ssize) | 356 | int psize, int ssize) |
404 | { | 357 | { |
405 | int actual_psize; | ||
406 | unsigned long vpn; | 358 | unsigned long vpn; |
407 | unsigned long vsid; | 359 | unsigned long vsid; |
408 | long slot; | 360 | long slot; |
@@ -415,36 +367,33 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, | |||
415 | if (slot == -1) | 367 | if (slot == -1) |
416 | panic("could not find page to bolt\n"); | 368 | panic("could not find page to bolt\n"); |
417 | hptep = htab_address + slot; | 369 | hptep = htab_address + slot; |
418 | actual_psize = hpte_actual_psize(hptep, psize); | ||
419 | if (actual_psize < 0) | ||
420 | actual_psize = psize; | ||
421 | 370 | ||
422 | /* Update the HPTE */ | 371 | /* Update the HPTE */ |
423 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | | 372 | hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | |
424 | (newpp & (HPTE_R_PP | HPTE_R_N)); | 373 | (newpp & (HPTE_R_PP | HPTE_R_N)); |
425 | 374 | /* | |
426 | /* Ensure it is out of the tlb too. */ | 375 | * Ensure it is out of the tlb too. Bolted entries base and |
427 | tlbie(vpn, psize, actual_psize, ssize, 0); | 376 | * actual page size will be same. |
377 | */ | ||
378 | tlbie(vpn, psize, psize, ssize, 0); | ||
428 | } | 379 | } |
429 | 380 | ||
430 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | 381 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, |
431 | int psize, int ssize, int local) | 382 | int bpsize, int apsize, int ssize, int local) |
432 | { | 383 | { |
433 | struct hash_pte *hptep = htab_address + slot; | 384 | struct hash_pte *hptep = htab_address + slot; |
434 | unsigned long hpte_v; | 385 | unsigned long hpte_v; |
435 | unsigned long want_v; | 386 | unsigned long want_v; |
436 | unsigned long flags; | 387 | unsigned long flags; |
437 | int actual_psize; | ||
438 | 388 | ||
439 | local_irq_save(flags); | 389 | local_irq_save(flags); |
440 | 390 | ||
441 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); | 391 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot); |
442 | 392 | ||
443 | want_v = hpte_encode_avpn(vpn, psize, ssize); | 393 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
444 | native_lock_hpte(hptep); | 394 | native_lock_hpte(hptep); |
445 | hpte_v = hptep->v; | 395 | hpte_v = hptep->v; |
446 | 396 | ||
447 | actual_psize = hpte_actual_psize(hptep, psize); | ||
448 | /* | 397 | /* |
449 | * We need to invalidate the TLB always because hpte_remove doesn't do | 398 | * We need to invalidate the TLB always because hpte_remove doesn't do |
450 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less | 399 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
@@ -452,23 +401,120 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, | |||
452 | * (hpte_remove) because we assume the old translation is still | 401 | * (hpte_remove) because we assume the old translation is still |
453 | * technically "valid". | 402 | * technically "valid". |
454 | */ | 403 | */ |
455 | if (actual_psize < 0) { | 404 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) |
456 | actual_psize = psize; | ||
457 | native_unlock_hpte(hptep); | ||
458 | goto err_out; | ||
459 | } | ||
460 | if (!HPTE_V_COMPARE(hpte_v, want_v)) | ||
461 | native_unlock_hpte(hptep); | 405 | native_unlock_hpte(hptep); |
462 | else | 406 | else |
463 | /* Invalidate the hpte. NOTE: this also unlocks it */ | 407 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
464 | hptep->v = 0; | 408 | hptep->v = 0; |
465 | 409 | ||
466 | err_out: | ||
467 | /* Invalidate the TLB */ | 410 | /* Invalidate the TLB */ |
468 | tlbie(vpn, psize, actual_psize, ssize, local); | 411 | tlbie(vpn, bpsize, apsize, ssize, local); |
412 | |||
413 | local_irq_restore(flags); | ||
414 | } | ||
415 | |||
416 | static void native_hugepage_invalidate(struct mm_struct *mm, | ||
417 | unsigned char *hpte_slot_array, | ||
418 | unsigned long addr, int psize) | ||
419 | { | ||
420 | int ssize = 0, i; | ||
421 | int lock_tlbie; | ||
422 | struct hash_pte *hptep; | ||
423 | int actual_psize = MMU_PAGE_16M; | ||
424 | unsigned int max_hpte_count, valid; | ||
425 | unsigned long flags, s_addr = addr; | ||
426 | unsigned long hpte_v, want_v, shift; | ||
427 | unsigned long hidx, vpn = 0, vsid, hash, slot; | ||
428 | |||
429 | shift = mmu_psize_defs[psize].shift; | ||
430 | max_hpte_count = 1U << (PMD_SHIFT - shift); | ||
431 | |||
432 | local_irq_save(flags); | ||
433 | for (i = 0; i < max_hpte_count; i++) { | ||
434 | valid = hpte_valid(hpte_slot_array, i); | ||
435 | if (!valid) | ||
436 | continue; | ||
437 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
438 | |||
439 | /* get the vpn */ | ||
440 | addr = s_addr + (i * (1ul << shift)); | ||
441 | if (!is_kernel_addr(addr)) { | ||
442 | ssize = user_segment_size(addr); | ||
443 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
444 | WARN_ON(vsid == 0); | ||
445 | } else { | ||
446 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
447 | ssize = mmu_kernel_ssize; | ||
448 | } | ||
449 | |||
450 | vpn = hpt_vpn(addr, vsid, ssize); | ||
451 | hash = hpt_hash(vpn, shift, ssize); | ||
452 | if (hidx & _PTEIDX_SECONDARY) | ||
453 | hash = ~hash; | ||
454 | |||
455 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
456 | slot += hidx & _PTEIDX_GROUP_IX; | ||
457 | |||
458 | hptep = htab_address + slot; | ||
459 | want_v = hpte_encode_avpn(vpn, psize, ssize); | ||
460 | native_lock_hpte(hptep); | ||
461 | hpte_v = hptep->v; | ||
462 | |||
463 | /* Even if we miss, we need to invalidate the TLB */ | ||
464 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) | ||
465 | native_unlock_hpte(hptep); | ||
466 | else | ||
467 | /* Invalidate the hpte. NOTE: this also unlocks it */ | ||
468 | hptep->v = 0; | ||
469 | } | ||
470 | /* | ||
471 | * Since this is a hugepage, we just need a single tlbie. | ||
472 | * use the last vpn. | ||
473 | */ | ||
474 | lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); | ||
475 | if (lock_tlbie) | ||
476 | raw_spin_lock(&native_tlbie_lock); | ||
477 | |||
478 | asm volatile("ptesync":::"memory"); | ||
479 | __tlbie(vpn, psize, actual_psize, ssize); | ||
480 | asm volatile("eieio; tlbsync; ptesync":::"memory"); | ||
481 | |||
482 | if (lock_tlbie) | ||
483 | raw_spin_unlock(&native_tlbie_lock); | ||
484 | |||
469 | local_irq_restore(flags); | 485 | local_irq_restore(flags); |
470 | } | 486 | } |
471 | 487 | ||
488 | static inline int __hpte_actual_psize(unsigned int lp, int psize) | ||
489 | { | ||
490 | int i, shift; | ||
491 | unsigned int mask; | ||
492 | |||
493 | /* start from 1 ignoring MMU_PAGE_4K */ | ||
494 | for (i = 1; i < MMU_PAGE_COUNT; i++) { | ||
495 | |||
496 | /* invalid penc */ | ||
497 | if (mmu_psize_defs[psize].penc[i] == -1) | ||
498 | continue; | ||
499 | /* | ||
500 | * encoding bits per actual page size | ||
501 | * PTE LP actual page size | ||
502 | * rrrr rrrz >=8KB | ||
503 | * rrrr rrzz >=16KB | ||
504 | * rrrr rzzz >=32KB | ||
505 | * rrrr zzzz >=64KB | ||
506 | * ....... | ||
507 | */ | ||
508 | shift = mmu_psize_defs[i].shift - LP_SHIFT; | ||
509 | if (shift > LP_BITS) | ||
510 | shift = LP_BITS; | ||
511 | mask = (1 << shift) - 1; | ||
512 | if ((lp & mask) == mmu_psize_defs[psize].penc[i]) | ||
513 | return i; | ||
514 | } | ||
515 | return -1; | ||
516 | } | ||
517 | |||
472 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, | 518 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, |
473 | int *psize, int *apsize, int *ssize, unsigned long *vpn) | 519 | int *psize, int *apsize, int *ssize, unsigned long *vpn) |
474 | { | 520 | { |
@@ -514,6 +560,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, | |||
514 | seg_off |= vpi << shift; | 560 | seg_off |= vpi << shift; |
515 | } | 561 | } |
516 | *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; | 562 | *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; |
563 | break; | ||
517 | case MMU_SEGSIZE_1T: | 564 | case MMU_SEGSIZE_1T: |
518 | /* We only have 40 - 23 bits of seg_off in avpn */ | 565 | /* We only have 40 - 23 bits of seg_off in avpn */ |
519 | seg_off = (avpn & 0x1ffff) << 23; | 566 | seg_off = (avpn & 0x1ffff) << 23; |
@@ -523,6 +570,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, | |||
523 | seg_off |= vpi << shift; | 570 | seg_off |= vpi << shift; |
524 | } | 571 | } |
525 | *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; | 572 | *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; |
573 | break; | ||
526 | default: | 574 | default: |
527 | *vpn = size = 0; | 575 | *vpn = size = 0; |
528 | } | 576 | } |
@@ -672,4 +720,5 @@ void __init hpte_init_native(void) | |||
672 | ppc_md.hpte_remove = native_hpte_remove; | 720 | ppc_md.hpte_remove = native_hpte_remove; |
673 | ppc_md.hpte_clear_all = native_hpte_clear; | 721 | ppc_md.hpte_clear_all = native_hpte_clear; |
674 | ppc_md.flush_hash_range = native_flush_hash_range; | 722 | ppc_md.flush_hash_range = native_flush_hash_range; |
723 | ppc_md.hugepage_invalidate = native_hugepage_invalidate; | ||
675 | } | 724 | } |
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index e303a6d74e3a..6ecc38bd5b24 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c | |||
@@ -807,7 +807,7 @@ void __init early_init_mmu(void) | |||
807 | } | 807 | } |
808 | 808 | ||
809 | #ifdef CONFIG_SMP | 809 | #ifdef CONFIG_SMP |
810 | void __cpuinit early_init_mmu_secondary(void) | 810 | void early_init_mmu_secondary(void) |
811 | { | 811 | { |
812 | /* Initialize hash table for that CPU */ | 812 | /* Initialize hash table for that CPU */ |
813 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 813 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
@@ -1050,13 +1050,26 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) | |||
1050 | goto bail; | 1050 | goto bail; |
1051 | } | 1051 | } |
1052 | 1052 | ||
1053 | #ifdef CONFIG_HUGETLB_PAGE | ||
1054 | if (hugeshift) { | 1053 | if (hugeshift) { |
1055 | rc = __hash_page_huge(ea, access, vsid, ptep, trap, local, | 1054 | if (pmd_trans_huge(*(pmd_t *)ptep)) |
1056 | ssize, hugeshift, psize); | 1055 | rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, |
1056 | trap, local, ssize, psize); | ||
1057 | #ifdef CONFIG_HUGETLB_PAGE | ||
1058 | else | ||
1059 | rc = __hash_page_huge(ea, access, vsid, ptep, trap, | ||
1060 | local, ssize, hugeshift, psize); | ||
1061 | #else | ||
1062 | else { | ||
1063 | /* | ||
1064 | * if we have hugeshift, and is not transhuge with | ||
1065 | * hugetlb disabled, something is really wrong. | ||
1066 | */ | ||
1067 | rc = 1; | ||
1068 | WARN_ON(1); | ||
1069 | } | ||
1070 | #endif | ||
1057 | goto bail; | 1071 | goto bail; |
1058 | } | 1072 | } |
1059 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
1060 | 1073 | ||
1061 | #ifndef CONFIG_PPC_64K_PAGES | 1074 | #ifndef CONFIG_PPC_64K_PAGES |
1062 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); | 1075 | DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep)); |
@@ -1145,6 +1158,7 @@ EXPORT_SYMBOL_GPL(hash_page); | |||
1145 | void hash_preload(struct mm_struct *mm, unsigned long ea, | 1158 | void hash_preload(struct mm_struct *mm, unsigned long ea, |
1146 | unsigned long access, unsigned long trap) | 1159 | unsigned long access, unsigned long trap) |
1147 | { | 1160 | { |
1161 | int hugepage_shift; | ||
1148 | unsigned long vsid; | 1162 | unsigned long vsid; |
1149 | pgd_t *pgdir; | 1163 | pgd_t *pgdir; |
1150 | pte_t *ptep; | 1164 | pte_t *ptep; |
@@ -1166,10 +1180,27 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1166 | pgdir = mm->pgd; | 1180 | pgdir = mm->pgd; |
1167 | if (pgdir == NULL) | 1181 | if (pgdir == NULL) |
1168 | return; | 1182 | return; |
1169 | ptep = find_linux_pte(pgdir, ea); | 1183 | |
1170 | if (!ptep) | 1184 | /* Get VSID */ |
1185 | ssize = user_segment_size(ea); | ||
1186 | vsid = get_vsid(mm->context.id, ea, ssize); | ||
1187 | if (!vsid) | ||
1171 | return; | 1188 | return; |
1189 | /* | ||
1190 | * Hash doesn't like irqs. Walking linux page table with irq disabled | ||
1191 | * saves us from holding multiple locks. | ||
1192 | */ | ||
1193 | local_irq_save(flags); | ||
1194 | |||
1195 | /* | ||
1196 | * THP pages use update_mmu_cache_pmd. We don't do | ||
1197 | * hash preload there. Hence can ignore THP here | ||
1198 | */ | ||
1199 | ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift); | ||
1200 | if (!ptep) | ||
1201 | goto out_exit; | ||
1172 | 1202 | ||
1203 | WARN_ON(hugepage_shift); | ||
1173 | #ifdef CONFIG_PPC_64K_PAGES | 1204 | #ifdef CONFIG_PPC_64K_PAGES |
1174 | /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on | 1205 | /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on |
1175 | * a 64K kernel), then we don't preload, hash_page() will take | 1206 | * a 64K kernel), then we don't preload, hash_page() will take |
@@ -1178,18 +1209,9 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1178 | * page size demotion here | 1209 | * page size demotion here |
1179 | */ | 1210 | */ |
1180 | if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) | 1211 | if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) |
1181 | return; | 1212 | goto out_exit; |
1182 | #endif /* CONFIG_PPC_64K_PAGES */ | 1213 | #endif /* CONFIG_PPC_64K_PAGES */ |
1183 | 1214 | ||
1184 | /* Get VSID */ | ||
1185 | ssize = user_segment_size(ea); | ||
1186 | vsid = get_vsid(mm->context.id, ea, ssize); | ||
1187 | if (!vsid) | ||
1188 | return; | ||
1189 | |||
1190 | /* Hash doesn't like irqs */ | ||
1191 | local_irq_save(flags); | ||
1192 | |||
1193 | /* Is that local to this CPU ? */ | 1215 | /* Is that local to this CPU ? */ |
1194 | if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) | 1216 | if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) |
1195 | local = 1; | 1217 | local = 1; |
@@ -1211,7 +1233,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, | |||
1211 | mm->context.user_psize, | 1233 | mm->context.user_psize, |
1212 | mm->context.user_psize, | 1234 | mm->context.user_psize, |
1213 | pte_val(*ptep)); | 1235 | pte_val(*ptep)); |
1214 | 1236 | out_exit: | |
1215 | local_irq_restore(flags); | 1237 | local_irq_restore(flags); |
1216 | } | 1238 | } |
1217 | 1239 | ||
@@ -1232,7 +1254,11 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, | |||
1232 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1254 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
1233 | slot += hidx & _PTEIDX_GROUP_IX; | 1255 | slot += hidx & _PTEIDX_GROUP_IX; |
1234 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); | 1256 | DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx); |
1235 | ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local); | 1257 | /* |
1258 | * We use same base page size and actual psize, because we don't | ||
1259 | * use these functions for hugepage | ||
1260 | */ | ||
1261 | ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); | ||
1236 | } pte_iterate_hashed_end(); | 1262 | } pte_iterate_hashed_end(); |
1237 | 1263 | ||
1238 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM | 1264 | #ifdef CONFIG_PPC_TRANSACTIONAL_MEM |
@@ -1365,7 +1391,8 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) | |||
1365 | hash = ~hash; | 1391 | hash = ~hash; |
1366 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 1392 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
1367 | slot += hidx & _PTEIDX_GROUP_IX; | 1393 | slot += hidx & _PTEIDX_GROUP_IX; |
1368 | ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_kernel_ssize, 0); | 1394 | ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, |
1395 | mmu_kernel_ssize, 0); | ||
1369 | } | 1396 | } |
1370 | 1397 | ||
1371 | void kernel_map_pages(struct page *page, int numpages, int enable) | 1398 | void kernel_map_pages(struct page *page, int numpages, int enable) |
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c new file mode 100644 index 000000000000..34de9e0cdc34 --- /dev/null +++ b/arch/powerpc/mm/hugepage-hash64.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * Copyright IBM Corporation, 2013 | ||
3 | * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms of version 2.1 of the GNU Lesser General Public License | ||
7 | * as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | /* | ||
16 | * PPC64 THP Support for hash based MMUs | ||
17 | */ | ||
18 | #include <linux/mm.h> | ||
19 | #include <asm/machdep.h> | ||
20 | |||
21 | int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, | ||
22 | pmd_t *pmdp, unsigned long trap, int local, int ssize, | ||
23 | unsigned int psize) | ||
24 | { | ||
25 | unsigned int index, valid; | ||
26 | unsigned char *hpte_slot_array; | ||
27 | unsigned long rflags, pa, hidx; | ||
28 | unsigned long old_pmd, new_pmd; | ||
29 | int ret, lpsize = MMU_PAGE_16M; | ||
30 | unsigned long vpn, hash, shift, slot; | ||
31 | |||
32 | /* | ||
33 | * atomically mark the linux large page PMD busy and dirty | ||
34 | */ | ||
35 | do { | ||
36 | old_pmd = pmd_val(*pmdp); | ||
37 | /* If PMD busy, retry the access */ | ||
38 | if (unlikely(old_pmd & _PAGE_BUSY)) | ||
39 | return 0; | ||
40 | /* If PMD is trans splitting retry the access */ | ||
41 | if (unlikely(old_pmd & _PAGE_SPLITTING)) | ||
42 | return 0; | ||
43 | /* If PMD permissions don't match, take page fault */ | ||
44 | if (unlikely(access & ~old_pmd)) | ||
45 | return 1; | ||
46 | /* | ||
47 | * Try to lock the PTE, add ACCESSED and DIRTY if it was | ||
48 | * a write access | ||
49 | */ | ||
50 | new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED; | ||
51 | if (access & _PAGE_RW) | ||
52 | new_pmd |= _PAGE_DIRTY; | ||
53 | } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp, | ||
54 | old_pmd, new_pmd)); | ||
55 | /* | ||
56 | * PP bits. _PAGE_USER is already PP bit 0x2, so we only | ||
57 | * need to add in 0x1 if it's a read-only user page | ||
58 | */ | ||
59 | rflags = new_pmd & _PAGE_USER; | ||
60 | if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) && | ||
61 | (new_pmd & _PAGE_DIRTY))) | ||
62 | rflags |= 0x1; | ||
63 | /* | ||
64 | * _PAGE_EXEC -> HW_NO_EXEC since it's inverted | ||
65 | */ | ||
66 | rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N); | ||
67 | |||
68 | #if 0 | ||
69 | if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) { | ||
70 | |||
71 | /* | ||
72 | * No CPU has hugepages but lacks no execute, so we | ||
73 | * don't need to worry about that case | ||
74 | */ | ||
75 | rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); | ||
76 | } | ||
77 | #endif | ||
78 | /* | ||
79 | * Find the slot index details for this ea, using base page size. | ||
80 | */ | ||
81 | shift = mmu_psize_defs[psize].shift; | ||
82 | index = (ea & ~HPAGE_PMD_MASK) >> shift; | ||
83 | BUG_ON(index >= 4096); | ||
84 | |||
85 | vpn = hpt_vpn(ea, vsid, ssize); | ||
86 | hash = hpt_hash(vpn, shift, ssize); | ||
87 | hpte_slot_array = get_hpte_slot_array(pmdp); | ||
88 | |||
89 | valid = hpte_valid(hpte_slot_array, index); | ||
90 | if (valid) { | ||
91 | /* update the hpte bits */ | ||
92 | hidx = hpte_hash_index(hpte_slot_array, index); | ||
93 | if (hidx & _PTEIDX_SECONDARY) | ||
94 | hash = ~hash; | ||
95 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
96 | slot += hidx & _PTEIDX_GROUP_IX; | ||
97 | |||
98 | ret = ppc_md.hpte_updatepp(slot, rflags, vpn, | ||
99 | psize, lpsize, ssize, local); | ||
100 | /* | ||
101 | * We failed to update, try to insert a new entry. | ||
102 | */ | ||
103 | if (ret == -1) { | ||
104 | /* | ||
105 | * large pte is marked busy, so we can be sure | ||
106 | * nobody is looking at hpte_slot_array. hence we can | ||
107 | * safely update this here. | ||
108 | */ | ||
109 | valid = 0; | ||
110 | new_pmd &= ~_PAGE_HPTEFLAGS; | ||
111 | hpte_slot_array[index] = 0; | ||
112 | } else | ||
113 | /* clear the busy bits and set the hash pte bits */ | ||
114 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
115 | } | ||
116 | |||
117 | if (!valid) { | ||
118 | unsigned long hpte_group; | ||
119 | |||
120 | /* insert new entry */ | ||
121 | pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; | ||
122 | repeat: | ||
123 | hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; | ||
124 | |||
125 | /* clear the busy bits and set the hash pte bits */ | ||
126 | new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; | ||
127 | |||
128 | /* Add in WIMG bits */ | ||
129 | rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | | ||
130 | _PAGE_COHERENT | _PAGE_GUARDED)); | ||
131 | |||
132 | /* Insert into the hash table, primary slot */ | ||
133 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, | ||
134 | psize, lpsize, ssize); | ||
135 | /* | ||
136 | * Primary is full, try the secondary | ||
137 | */ | ||
138 | if (unlikely(slot == -1)) { | ||
139 | hpte_group = ((~hash & htab_hash_mask) * | ||
140 | HPTES_PER_GROUP) & ~0x7UL; | ||
141 | slot = ppc_md.hpte_insert(hpte_group, vpn, pa, | ||
142 | rflags, HPTE_V_SECONDARY, | ||
143 | psize, lpsize, ssize); | ||
144 | if (slot == -1) { | ||
145 | if (mftb() & 0x1) | ||
146 | hpte_group = ((hash & htab_hash_mask) * | ||
147 | HPTES_PER_GROUP) & ~0x7UL; | ||
148 | |||
149 | ppc_md.hpte_remove(hpte_group); | ||
150 | goto repeat; | ||
151 | } | ||
152 | } | ||
153 | /* | ||
154 | * Hypervisor failure. Restore old pmd and return -1 | ||
155 | * similar to __hash_page_* | ||
156 | */ | ||
157 | if (unlikely(slot == -2)) { | ||
158 | *pmdp = __pmd(old_pmd); | ||
159 | hash_failure_debug(ea, access, vsid, trap, ssize, | ||
160 | psize, lpsize, old_pmd); | ||
161 | return -1; | ||
162 | } | ||
163 | /* | ||
164 | * large pte is marked busy, so we can be sure | ||
165 | * nobody is looking at hpte_slot_array. hence we can | ||
166 | * safely update this here. | ||
167 | */ | ||
168 | mark_hpte_slot_valid(hpte_slot_array, index, slot); | ||
169 | } | ||
170 | /* | ||
171 | * No need to use ldarx/stdcx here | ||
172 | */ | ||
173 | *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); | ||
174 | return 0; | ||
175 | } | ||
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 0f1d94a1fb82..0b7fb6761015 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c | |||
@@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, | |||
81 | slot += (old_pte & _PAGE_F_GIX) >> 12; | 81 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
82 | 82 | ||
83 | if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, | 83 | if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, |
84 | ssize, local) == -1) | 84 | mmu_psize, ssize, local) == -1) |
85 | old_pte &= ~_PAGE_HPTEFLAGS; | 85 | old_pte &= ~_PAGE_HPTEFLAGS; |
86 | } | 86 | } |
87 | 87 | ||
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 77fdd2cef33b..834ca8eb38f2 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -21,6 +21,9 @@ | |||
21 | #include <asm/pgalloc.h> | 21 | #include <asm/pgalloc.h> |
22 | #include <asm/tlb.h> | 22 | #include <asm/tlb.h> |
23 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
24 | #include <asm/hugetlb.h> | ||
25 | |||
26 | #ifdef CONFIG_HUGETLB_PAGE | ||
24 | 27 | ||
25 | #define PAGE_SHIFT_64K 16 | 28 | #define PAGE_SHIFT_64K 16 |
26 | #define PAGE_SHIFT_16M 24 | 29 | #define PAGE_SHIFT_16M 24 |
@@ -100,68 +103,9 @@ int pgd_huge(pgd_t pgd) | |||
100 | } | 103 | } |
101 | #endif | 104 | #endif |
102 | 105 | ||
103 | /* | ||
104 | * We have 4 cases for pgds and pmds: | ||
105 | * (1) invalid (all zeroes) | ||
106 | * (2) pointer to next table, as normal; bottom 6 bits == 0 | ||
107 | * (3) leaf pte for huge page, bottom two bits != 00 | ||
108 | * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table | ||
109 | */ | ||
110 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
111 | { | ||
112 | pgd_t *pg; | ||
113 | pud_t *pu; | ||
114 | pmd_t *pm; | ||
115 | pte_t *ret_pte; | ||
116 | hugepd_t *hpdp = NULL; | ||
117 | unsigned pdshift = PGDIR_SHIFT; | ||
118 | |||
119 | if (shift) | ||
120 | *shift = 0; | ||
121 | |||
122 | pg = pgdir + pgd_index(ea); | ||
123 | |||
124 | if (pgd_huge(*pg)) { | ||
125 | ret_pte = (pte_t *) pg; | ||
126 | goto out; | ||
127 | } else if (is_hugepd(pg)) | ||
128 | hpdp = (hugepd_t *)pg; | ||
129 | else if (!pgd_none(*pg)) { | ||
130 | pdshift = PUD_SHIFT; | ||
131 | pu = pud_offset(pg, ea); | ||
132 | |||
133 | if (pud_huge(*pu)) { | ||
134 | ret_pte = (pte_t *) pu; | ||
135 | goto out; | ||
136 | } else if (is_hugepd(pu)) | ||
137 | hpdp = (hugepd_t *)pu; | ||
138 | else if (!pud_none(*pu)) { | ||
139 | pdshift = PMD_SHIFT; | ||
140 | pm = pmd_offset(pu, ea); | ||
141 | |||
142 | if (pmd_huge(*pm)) { | ||
143 | ret_pte = (pte_t *) pm; | ||
144 | goto out; | ||
145 | } else if (is_hugepd(pm)) | ||
146 | hpdp = (hugepd_t *)pm; | ||
147 | else if (!pmd_none(*pm)) | ||
148 | return pte_offset_kernel(pm, ea); | ||
149 | } | ||
150 | } | ||
151 | if (!hpdp) | ||
152 | return NULL; | ||
153 | |||
154 | ret_pte = hugepte_offset(hpdp, ea, pdshift); | ||
155 | pdshift = hugepd_shift(*hpdp); | ||
156 | out: | ||
157 | if (shift) | ||
158 | *shift = pdshift; | ||
159 | return ret_pte; | ||
160 | } | ||
161 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
162 | |||
163 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 106 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
164 | { | 107 | { |
108 | /* Only called for hugetlbfs pages, hence can ignore THP */ | ||
165 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); | 109 | return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); |
166 | } | 110 | } |
167 | 111 | ||
@@ -357,7 +301,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) | |||
357 | int alloc_bootmem_huge_page(struct hstate *hstate) | 301 | int alloc_bootmem_huge_page(struct hstate *hstate) |
358 | { | 302 | { |
359 | struct huge_bootmem_page *m; | 303 | struct huge_bootmem_page *m; |
360 | int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); | 304 | int idx = shift_to_mmu_psize(huge_page_shift(hstate)); |
361 | int nr_gpages = gpage_freearray[idx].nr_gpages; | 305 | int nr_gpages = gpage_freearray[idx].nr_gpages; |
362 | 306 | ||
363 | if (nr_gpages == 0) | 307 | if (nr_gpages == 0) |
@@ -736,11 +680,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) | |||
736 | struct page *page; | 680 | struct page *page; |
737 | unsigned shift; | 681 | unsigned shift; |
738 | unsigned long mask; | 682 | unsigned long mask; |
739 | 683 | /* | |
684 | * Transparent hugepages are handled by generic code. We can skip them | ||
685 | * here. | ||
686 | */ | ||
740 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); | 687 | ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); |
741 | 688 | ||
742 | /* Verify it is a huge page else bail. */ | 689 | /* Verify it is a huge page else bail. */ |
743 | if (!ptep || !shift) | 690 | if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep)) |
744 | return ERR_PTR(-EINVAL); | 691 | return ERR_PTR(-EINVAL); |
745 | 692 | ||
746 | mask = (1UL << shift) - 1; | 693 | mask = (1UL << shift) - 1; |
@@ -759,69 +706,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
759 | return NULL; | 706 | return NULL; |
760 | } | 707 | } |
761 | 708 | ||
762 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
763 | unsigned long end, int write, struct page **pages, int *nr) | ||
764 | { | ||
765 | unsigned long mask; | ||
766 | unsigned long pte_end; | ||
767 | struct page *head, *page, *tail; | ||
768 | pte_t pte; | ||
769 | int refs; | ||
770 | |||
771 | pte_end = (addr + sz) & ~(sz-1); | ||
772 | if (pte_end < end) | ||
773 | end = pte_end; | ||
774 | |||
775 | pte = *ptep; | ||
776 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
777 | if (write) | ||
778 | mask |= _PAGE_RW; | ||
779 | |||
780 | if ((pte_val(pte) & mask) != mask) | ||
781 | return 0; | ||
782 | |||
783 | /* hugepages are never "special" */ | ||
784 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
785 | |||
786 | refs = 0; | ||
787 | head = pte_page(pte); | ||
788 | |||
789 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
790 | tail = page; | ||
791 | do { | ||
792 | VM_BUG_ON(compound_head(page) != head); | ||
793 | pages[*nr] = page; | ||
794 | (*nr)++; | ||
795 | page++; | ||
796 | refs++; | ||
797 | } while (addr += PAGE_SIZE, addr != end); | ||
798 | |||
799 | if (!page_cache_add_speculative(head, refs)) { | ||
800 | *nr -= refs; | ||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
805 | /* Could be optimized better */ | ||
806 | *nr -= refs; | ||
807 | while (refs--) | ||
808 | put_page(head); | ||
809 | return 0; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * Any tail page need their mapcount reference taken before we | ||
814 | * return. | ||
815 | */ | ||
816 | while (refs--) { | ||
817 | if (PageTail(tail)) | ||
818 | get_huge_page_tail(tail); | ||
819 | tail++; | ||
820 | } | ||
821 | |||
822 | return 1; | ||
823 | } | ||
824 | |||
825 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, | 709 | static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, |
826 | unsigned long sz) | 710 | unsigned long sz) |
827 | { | 711 | { |
@@ -1038,3 +922,168 @@ void flush_dcache_icache_hugepage(struct page *page) | |||
1038 | } | 922 | } |
1039 | } | 923 | } |
1040 | } | 924 | } |
925 | |||
926 | #endif /* CONFIG_HUGETLB_PAGE */ | ||
927 | |||
928 | /* | ||
929 | * We have 4 cases for pgds and pmds: | ||
930 | * (1) invalid (all zeroes) | ||
931 | * (2) pointer to next table, as normal; bottom 6 bits == 0 | ||
932 | * (3) leaf pte for huge page, bottom two bits != 00 | ||
933 | * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table | ||
934 | * | ||
935 | * So long as we atomically load page table pointers we are safe against teardown, | ||
936 | * we can follow the address down to the the page and take a ref on it. | ||
937 | */ | ||
938 | |||
939 | pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) | ||
940 | { | ||
941 | pgd_t pgd, *pgdp; | ||
942 | pud_t pud, *pudp; | ||
943 | pmd_t pmd, *pmdp; | ||
944 | pte_t *ret_pte; | ||
945 | hugepd_t *hpdp = NULL; | ||
946 | unsigned pdshift = PGDIR_SHIFT; | ||
947 | |||
948 | if (shift) | ||
949 | *shift = 0; | ||
950 | |||
951 | pgdp = pgdir + pgd_index(ea); | ||
952 | pgd = ACCESS_ONCE(*pgdp); | ||
953 | /* | ||
954 | * Always operate on the local stack value. This make sure the | ||
955 | * value don't get updated by a parallel THP split/collapse, | ||
956 | * page fault or a page unmap. The return pte_t * is still not | ||
957 | * stable. So should be checked there for above conditions. | ||
958 | */ | ||
959 | if (pgd_none(pgd)) | ||
960 | return NULL; | ||
961 | else if (pgd_huge(pgd)) { | ||
962 | ret_pte = (pte_t *) pgdp; | ||
963 | goto out; | ||
964 | } else if (is_hugepd(&pgd)) | ||
965 | hpdp = (hugepd_t *)&pgd; | ||
966 | else { | ||
967 | /* | ||
968 | * Even if we end up with an unmap, the pgtable will not | ||
969 | * be freed, because we do an rcu free and here we are | ||
970 | * irq disabled | ||
971 | */ | ||
972 | pdshift = PUD_SHIFT; | ||
973 | pudp = pud_offset(&pgd, ea); | ||
974 | pud = ACCESS_ONCE(*pudp); | ||
975 | |||
976 | if (pud_none(pud)) | ||
977 | return NULL; | ||
978 | else if (pud_huge(pud)) { | ||
979 | ret_pte = (pte_t *) pudp; | ||
980 | goto out; | ||
981 | } else if (is_hugepd(&pud)) | ||
982 | hpdp = (hugepd_t *)&pud; | ||
983 | else { | ||
984 | pdshift = PMD_SHIFT; | ||
985 | pmdp = pmd_offset(&pud, ea); | ||
986 | pmd = ACCESS_ONCE(*pmdp); | ||
987 | /* | ||
988 | * A hugepage collapse is captured by pmd_none, because | ||
989 | * it mark the pmd none and do a hpte invalidate. | ||
990 | * | ||
991 | * A hugepage split is captured by pmd_trans_splitting | ||
992 | * because we mark the pmd trans splitting and do a | ||
993 | * hpte invalidate | ||
994 | * | ||
995 | */ | ||
996 | if (pmd_none(pmd) || pmd_trans_splitting(pmd)) | ||
997 | return NULL; | ||
998 | |||
999 | if (pmd_huge(pmd) || pmd_large(pmd)) { | ||
1000 | ret_pte = (pte_t *) pmdp; | ||
1001 | goto out; | ||
1002 | } else if (is_hugepd(&pmd)) | ||
1003 | hpdp = (hugepd_t *)&pmd; | ||
1004 | else | ||
1005 | return pte_offset_kernel(&pmd, ea); | ||
1006 | } | ||
1007 | } | ||
1008 | if (!hpdp) | ||
1009 | return NULL; | ||
1010 | |||
1011 | ret_pte = hugepte_offset(hpdp, ea, pdshift); | ||
1012 | pdshift = hugepd_shift(*hpdp); | ||
1013 | out: | ||
1014 | if (shift) | ||
1015 | *shift = pdshift; | ||
1016 | return ret_pte; | ||
1017 | } | ||
1018 | EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte); | ||
1019 | |||
1020 | int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, | ||
1021 | unsigned long end, int write, struct page **pages, int *nr) | ||
1022 | { | ||
1023 | unsigned long mask; | ||
1024 | unsigned long pte_end; | ||
1025 | struct page *head, *page, *tail; | ||
1026 | pte_t pte; | ||
1027 | int refs; | ||
1028 | |||
1029 | pte_end = (addr + sz) & ~(sz-1); | ||
1030 | if (pte_end < end) | ||
1031 | end = pte_end; | ||
1032 | |||
1033 | pte = ACCESS_ONCE(*ptep); | ||
1034 | mask = _PAGE_PRESENT | _PAGE_USER; | ||
1035 | if (write) | ||
1036 | mask |= _PAGE_RW; | ||
1037 | |||
1038 | if ((pte_val(pte) & mask) != mask) | ||
1039 | return 0; | ||
1040 | |||
1041 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
1042 | /* | ||
1043 | * check for splitting here | ||
1044 | */ | ||
1045 | if (pmd_trans_splitting(pte_pmd(pte))) | ||
1046 | return 0; | ||
1047 | #endif | ||
1048 | |||
1049 | /* hugepages are never "special" */ | ||
1050 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
1051 | |||
1052 | refs = 0; | ||
1053 | head = pte_page(pte); | ||
1054 | |||
1055 | page = head + ((addr & (sz-1)) >> PAGE_SHIFT); | ||
1056 | tail = page; | ||
1057 | do { | ||
1058 | VM_BUG_ON(compound_head(page) != head); | ||
1059 | pages[*nr] = page; | ||
1060 | (*nr)++; | ||
1061 | page++; | ||
1062 | refs++; | ||
1063 | } while (addr += PAGE_SIZE, addr != end); | ||
1064 | |||
1065 | if (!page_cache_add_speculative(head, refs)) { | ||
1066 | *nr -= refs; | ||
1067 | return 0; | ||
1068 | } | ||
1069 | |||
1070 | if (unlikely(pte_val(pte) != pte_val(*ptep))) { | ||
1071 | /* Could be optimized better */ | ||
1072 | *nr -= refs; | ||
1073 | while (refs--) | ||
1074 | put_page(head); | ||
1075 | return 0; | ||
1076 | } | ||
1077 | |||
1078 | /* | ||
1079 | * Any tail page need their mapcount reference taken before we | ||
1080 | * return. | ||
1081 | */ | ||
1082 | while (refs--) { | ||
1083 | if (PageTail(tail)) | ||
1084 | get_huge_page_tail(tail); | ||
1085 | tail++; | ||
1086 | } | ||
1087 | |||
1088 | return 1; | ||
1089 | } | ||
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a90b9c458990..d0cd9e4c6837 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c | |||
@@ -88,7 +88,11 @@ static void pgd_ctor(void *addr) | |||
88 | 88 | ||
89 | static void pmd_ctor(void *addr) | 89 | static void pmd_ctor(void *addr) |
90 | { | 90 | { |
91 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
92 | memset(addr, 0, PMD_TABLE_SIZE * 2); | ||
93 | #else | ||
91 | memset(addr, 0, PMD_TABLE_SIZE); | 94 | memset(addr, 0, PMD_TABLE_SIZE); |
95 | #endif | ||
92 | } | 96 | } |
93 | 97 | ||
94 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; | 98 | struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; |
@@ -137,10 +141,9 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) | |||
137 | void pgtable_cache_init(void) | 141 | void pgtable_cache_init(void) |
138 | { | 142 | { |
139 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); | 143 | pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); |
140 | pgtable_cache_add(PMD_INDEX_SIZE, pmd_ctor); | 144 | pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); |
141 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_INDEX_SIZE)) | 145 | if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) |
142 | panic("Couldn't allocate pgtable caches"); | 146 | panic("Couldn't allocate pgtable caches"); |
143 | |||
144 | /* In all current configs, when the PUD index exists it's the | 147 | /* In all current configs, when the PUD index exists it's the |
145 | * same size as either the pgd or pmd index. Verify that the | 148 | * same size as either the pgd or pmd index. Verify that the |
146 | * initialization above has also created a PUD cache. This | 149 | * initialization above has also created a PUD cache. This |
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 0988a26e0413..7f4bea162026 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c | |||
@@ -299,47 +299,13 @@ void __init paging_init(void) | |||
299 | 299 | ||
300 | void __init mem_init(void) | 300 | void __init mem_init(void) |
301 | { | 301 | { |
302 | #ifdef CONFIG_NEED_MULTIPLE_NODES | ||
303 | int nid; | ||
304 | #endif | ||
305 | pg_data_t *pgdat; | ||
306 | unsigned long i; | ||
307 | struct page *page; | ||
308 | unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; | ||
309 | |||
310 | #ifdef CONFIG_SWIOTLB | 302 | #ifdef CONFIG_SWIOTLB |
311 | swiotlb_init(0); | 303 | swiotlb_init(0); |
312 | #endif | 304 | #endif |
313 | 305 | ||
314 | num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT; | ||
315 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); | 306 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); |
316 | 307 | set_max_mapnr(max_pfn); | |
317 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 308 | free_all_bootmem(); |
318 | for_each_online_node(nid) { | ||
319 | if (NODE_DATA(nid)->node_spanned_pages != 0) { | ||
320 | printk("freeing bootmem node %d\n", nid); | ||
321 | totalram_pages += | ||
322 | free_all_bootmem_node(NODE_DATA(nid)); | ||
323 | } | ||
324 | } | ||
325 | #else | ||
326 | max_mapnr = max_pfn; | ||
327 | totalram_pages += free_all_bootmem(); | ||
328 | #endif | ||
329 | for_each_online_pgdat(pgdat) { | ||
330 | for (i = 0; i < pgdat->node_spanned_pages; i++) { | ||
331 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
332 | continue; | ||
333 | page = pgdat_page_nr(pgdat, i); | ||
334 | if (PageReserved(page)) | ||
335 | reservedpages++; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | codesize = (unsigned long)&_sdata - (unsigned long)&_stext; | ||
340 | datasize = (unsigned long)&_edata - (unsigned long)&_sdata; | ||
341 | initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; | ||
342 | bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; | ||
343 | 309 | ||
344 | #ifdef CONFIG_HIGHMEM | 310 | #ifdef CONFIG_HIGHMEM |
345 | { | 311 | { |
@@ -349,13 +315,9 @@ void __init mem_init(void) | |||
349 | for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { | 315 | for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { |
350 | phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; | 316 | phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT; |
351 | struct page *page = pfn_to_page(pfn); | 317 | struct page *page = pfn_to_page(pfn); |
352 | if (memblock_is_reserved(paddr)) | 318 | if (!memblock_is_reserved(paddr)) |
353 | continue; | 319 | free_highmem_page(page); |
354 | free_highmem_page(page); | ||
355 | reservedpages--; | ||
356 | } | 320 | } |
357 | printk(KERN_DEBUG "High memory: %luk\n", | ||
358 | totalhigh_pages << (PAGE_SHIFT-10)); | ||
359 | } | 321 | } |
360 | #endif /* CONFIG_HIGHMEM */ | 322 | #endif /* CONFIG_HIGHMEM */ |
361 | 323 | ||
@@ -368,16 +330,7 @@ void __init mem_init(void) | |||
368 | (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; | 330 | (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1; |
369 | #endif | 331 | #endif |
370 | 332 | ||
371 | printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " | 333 | mem_init_print_info(NULL); |
372 | "%luk reserved, %luk data, %luk bss, %luk init)\n", | ||
373 | nr_free_pages() << (PAGE_SHIFT-10), | ||
374 | num_physpages << (PAGE_SHIFT-10), | ||
375 | codesize >> 10, | ||
376 | reservedpages << (PAGE_SHIFT-10), | ||
377 | datasize >> 10, | ||
378 | bsssize >> 10, | ||
379 | initsize >> 10); | ||
380 | |||
381 | #ifdef CONFIG_PPC32 | 334 | #ifdef CONFIG_PPC32 |
382 | pr_info("Kernel virtual memory layout:\n"); | 335 | pr_info("Kernel virtual memory layout:\n"); |
383 | pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); | 336 | pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP); |
@@ -407,7 +360,7 @@ void free_initmem(void) | |||
407 | #ifdef CONFIG_BLK_DEV_INITRD | 360 | #ifdef CONFIG_BLK_DEV_INITRD |
408 | void __init free_initrd_mem(unsigned long start, unsigned long end) | 361 | void __init free_initrd_mem(unsigned long start, unsigned long end) |
409 | { | 362 | { |
410 | free_reserved_area(start, end, 0, "initrd"); | 363 | free_reserved_area((void *)start, (void *)end, -1, "initrd"); |
411 | } | 364 | } |
412 | #endif | 365 | #endif |
413 | 366 | ||
@@ -508,6 +461,10 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, | |||
508 | pte_t *ptep) | 461 | pte_t *ptep) |
509 | { | 462 | { |
510 | #ifdef CONFIG_PPC_STD_MMU | 463 | #ifdef CONFIG_PPC_STD_MMU |
464 | /* | ||
465 | * We don't need to worry about _PAGE_PRESENT here because we are | ||
466 | * called with either mm->page_table_lock held or ptl lock held | ||
467 | */ | ||
511 | unsigned long access = 0, trap; | 468 | unsigned long access = 0, trap; |
512 | 469 | ||
513 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ | 470 | /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ |
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap.c index 67a42ed0d2fc..cb8bdbe4972f 100644 --- a/arch/powerpc/mm/mmap_64.c +++ b/arch/powerpc/mm/mmap.c | |||
@@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm) | |||
92 | if (mmap_is_legacy()) { | 92 | if (mmap_is_legacy()) { |
93 | mm->mmap_base = TASK_UNMAPPED_BASE; | 93 | mm->mmap_base = TASK_UNMAPPED_BASE; |
94 | mm->get_unmapped_area = arch_get_unmapped_area; | 94 | mm->get_unmapped_area = arch_get_unmapped_area; |
95 | mm->unmap_area = arch_unmap_area; | ||
96 | } else { | 95 | } else { |
97 | mm->mmap_base = mmap_base(); | 96 | mm->mmap_base = mmap_base(); |
98 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | 97 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; |
99 | mm->unmap_area = arch_unmap_area_topdown; | ||
100 | } | 98 | } |
101 | } | 99 | } |
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index e779642c25e5..af3d78e19302 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c | |||
@@ -112,8 +112,10 @@ static unsigned int steal_context_smp(unsigned int id) | |||
112 | */ | 112 | */ |
113 | for_each_cpu(cpu, mm_cpumask(mm)) { | 113 | for_each_cpu(cpu, mm_cpumask(mm)) { |
114 | for (i = cpu_first_thread_sibling(cpu); | 114 | for (i = cpu_first_thread_sibling(cpu); |
115 | i <= cpu_last_thread_sibling(cpu); i++) | 115 | i <= cpu_last_thread_sibling(cpu); i++) { |
116 | __set_bit(id, stale_map[i]); | 116 | if (stale_map[i]) |
117 | __set_bit(id, stale_map[i]); | ||
118 | } | ||
117 | cpu = i - 1; | 119 | cpu = i - 1; |
118 | } | 120 | } |
119 | return id; | 121 | return id; |
@@ -272,7 +274,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) | |||
272 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ | 274 | /* XXX This clear should ultimately be part of local_flush_tlb_mm */ |
273 | for (i = cpu_first_thread_sibling(cpu); | 275 | for (i = cpu_first_thread_sibling(cpu); |
274 | i <= cpu_last_thread_sibling(cpu); i++) { | 276 | i <= cpu_last_thread_sibling(cpu); i++) { |
275 | __clear_bit(id, stale_map[i]); | 277 | if (stale_map[i]) |
278 | __clear_bit(id, stale_map[i]); | ||
276 | } | 279 | } |
277 | } | 280 | } |
278 | 281 | ||
@@ -329,8 +332,8 @@ void destroy_context(struct mm_struct *mm) | |||
329 | 332 | ||
330 | #ifdef CONFIG_SMP | 333 | #ifdef CONFIG_SMP |
331 | 334 | ||
332 | static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | 335 | static int mmu_context_cpu_notify(struct notifier_block *self, |
333 | unsigned long action, void *hcpu) | 336 | unsigned long action, void *hcpu) |
334 | { | 337 | { |
335 | unsigned int cpu = (unsigned int)(long)hcpu; | 338 | unsigned int cpu = (unsigned int)(long)hcpu; |
336 | 339 | ||
@@ -363,7 +366,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self, | |||
363 | return NOTIFY_OK; | 366 | return NOTIFY_OK; |
364 | } | 367 | } |
365 | 368 | ||
366 | static struct notifier_block __cpuinitdata mmu_context_cpu_nb = { | 369 | static struct notifier_block mmu_context_cpu_nb = { |
367 | .notifier_call = mmu_context_cpu_notify, | 370 | .notifier_call = mmu_context_cpu_notify, |
368 | }; | 371 | }; |
369 | 372 | ||
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 88c0425dc0a8..5850798826cd 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/seq_file.h> | 27 | #include <linux/seq_file.h> |
28 | #include <linux/uaccess.h> | 28 | #include <linux/uaccess.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <asm/cputhreads.h> | ||
30 | #include <asm/sparsemem.h> | 31 | #include <asm/sparsemem.h> |
31 | #include <asm/prom.h> | 32 | #include <asm/prom.h> |
32 | #include <asm/smp.h> | 33 | #include <asm/smp.h> |
@@ -516,7 +517,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, | |||
516 | * Figure out to which domain a cpu belongs and stick it there. | 517 | * Figure out to which domain a cpu belongs and stick it there. |
517 | * Return the id of the domain used. | 518 | * Return the id of the domain used. |
518 | */ | 519 | */ |
519 | static int __cpuinit numa_setup_cpu(unsigned long lcpu) | 520 | static int numa_setup_cpu(unsigned long lcpu) |
520 | { | 521 | { |
521 | int nid = 0; | 522 | int nid = 0; |
522 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); | 523 | struct device_node *cpu = of_get_cpu_node(lcpu, NULL); |
@@ -538,8 +539,7 @@ out: | |||
538 | return nid; | 539 | return nid; |
539 | } | 540 | } |
540 | 541 | ||
541 | static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, | 542 | static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, |
542 | unsigned long action, | ||
543 | void *hcpu) | 543 | void *hcpu) |
544 | { | 544 | { |
545 | unsigned long lcpu = (unsigned long)hcpu; | 545 | unsigned long lcpu = (unsigned long)hcpu; |
@@ -919,7 +919,7 @@ static void __init *careful_zallocation(int nid, unsigned long size, | |||
919 | return ret; | 919 | return ret; |
920 | } | 920 | } |
921 | 921 | ||
922 | static struct notifier_block __cpuinitdata ppc64_numa_nb = { | 922 | static struct notifier_block ppc64_numa_nb = { |
923 | .notifier_call = cpu_numa_callback, | 923 | .notifier_call = cpu_numa_callback, |
924 | .priority = 1 /* Must run before sched domains notifier. */ | 924 | .priority = 1 /* Must run before sched domains notifier. */ |
925 | }; | 925 | }; |
@@ -1319,7 +1319,8 @@ static int update_cpu_associativity_changes_mask(void) | |||
1319 | } | 1319 | } |
1320 | } | 1320 | } |
1321 | if (changed) { | 1321 | if (changed) { |
1322 | cpumask_set_cpu(cpu, changes); | 1322 | cpumask_or(changes, changes, cpu_sibling_mask(cpu)); |
1323 | cpu = cpu_last_thread_sibling(cpu); | ||
1323 | } | 1324 | } |
1324 | } | 1325 | } |
1325 | 1326 | ||
@@ -1427,17 +1428,15 @@ static int update_cpu_topology(void *data) | |||
1427 | if (!data) | 1428 | if (!data) |
1428 | return -EINVAL; | 1429 | return -EINVAL; |
1429 | 1430 | ||
1430 | cpu = get_cpu(); | 1431 | cpu = smp_processor_id(); |
1431 | 1432 | ||
1432 | for (update = data; update; update = update->next) { | 1433 | for (update = data; update; update = update->next) { |
1433 | if (cpu != update->cpu) | 1434 | if (cpu != update->cpu) |
1434 | continue; | 1435 | continue; |
1435 | 1436 | ||
1436 | unregister_cpu_under_node(update->cpu, update->old_nid); | ||
1437 | unmap_cpu_from_node(update->cpu); | 1437 | unmap_cpu_from_node(update->cpu); |
1438 | map_cpu_to_node(update->cpu, update->new_nid); | 1438 | map_cpu_to_node(update->cpu, update->new_nid); |
1439 | vdso_getcpu_init(); | 1439 | vdso_getcpu_init(); |
1440 | register_cpu_under_node(update->cpu, update->new_nid); | ||
1441 | } | 1440 | } |
1442 | 1441 | ||
1443 | return 0; | 1442 | return 0; |
@@ -1449,12 +1448,12 @@ static int update_cpu_topology(void *data) | |||
1449 | */ | 1448 | */ |
1450 | int arch_update_cpu_topology(void) | 1449 | int arch_update_cpu_topology(void) |
1451 | { | 1450 | { |
1452 | unsigned int cpu, changed = 0; | 1451 | unsigned int cpu, sibling, changed = 0; |
1453 | struct topology_update_data *updates, *ud; | 1452 | struct topology_update_data *updates, *ud; |
1454 | unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; | 1453 | unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0}; |
1455 | cpumask_t updated_cpus; | 1454 | cpumask_t updated_cpus; |
1456 | struct device *dev; | 1455 | struct device *dev; |
1457 | int weight, i = 0; | 1456 | int weight, new_nid, i = 0; |
1458 | 1457 | ||
1459 | weight = cpumask_weight(&cpu_associativity_changes_mask); | 1458 | weight = cpumask_weight(&cpu_associativity_changes_mask); |
1460 | if (!weight) | 1459 | if (!weight) |
@@ -1467,24 +1466,54 @@ int arch_update_cpu_topology(void) | |||
1467 | cpumask_clear(&updated_cpus); | 1466 | cpumask_clear(&updated_cpus); |
1468 | 1467 | ||
1469 | for_each_cpu(cpu, &cpu_associativity_changes_mask) { | 1468 | for_each_cpu(cpu, &cpu_associativity_changes_mask) { |
1470 | ud = &updates[i++]; | 1469 | /* |
1471 | ud->cpu = cpu; | 1470 | * If siblings aren't flagged for changes, updates list |
1472 | vphn_get_associativity(cpu, associativity); | 1471 | * will be too short. Skip on this update and set for next |
1473 | ud->new_nid = associativity_to_nid(associativity); | 1472 | * update. |
1474 | 1473 | */ | |
1475 | if (ud->new_nid < 0 || !node_online(ud->new_nid)) | 1474 | if (!cpumask_subset(cpu_sibling_mask(cpu), |
1476 | ud->new_nid = first_online_node; | 1475 | &cpu_associativity_changes_mask)) { |
1476 | pr_info("Sibling bits not set for associativity " | ||
1477 | "change, cpu%d\n", cpu); | ||
1478 | cpumask_or(&cpu_associativity_changes_mask, | ||
1479 | &cpu_associativity_changes_mask, | ||
1480 | cpu_sibling_mask(cpu)); | ||
1481 | cpu = cpu_last_thread_sibling(cpu); | ||
1482 | continue; | ||
1483 | } | ||
1477 | 1484 | ||
1478 | ud->old_nid = numa_cpu_lookup_table[cpu]; | 1485 | /* Use associativity from first thread for all siblings */ |
1479 | cpumask_set_cpu(cpu, &updated_cpus); | 1486 | vphn_get_associativity(cpu, associativity); |
1487 | new_nid = associativity_to_nid(associativity); | ||
1488 | if (new_nid < 0 || !node_online(new_nid)) | ||
1489 | new_nid = first_online_node; | ||
1490 | |||
1491 | if (new_nid == numa_cpu_lookup_table[cpu]) { | ||
1492 | cpumask_andnot(&cpu_associativity_changes_mask, | ||
1493 | &cpu_associativity_changes_mask, | ||
1494 | cpu_sibling_mask(cpu)); | ||
1495 | cpu = cpu_last_thread_sibling(cpu); | ||
1496 | continue; | ||
1497 | } | ||
1480 | 1498 | ||
1481 | if (i < weight) | 1499 | for_each_cpu(sibling, cpu_sibling_mask(cpu)) { |
1482 | ud->next = &updates[i]; | 1500 | ud = &updates[i++]; |
1501 | ud->cpu = sibling; | ||
1502 | ud->new_nid = new_nid; | ||
1503 | ud->old_nid = numa_cpu_lookup_table[sibling]; | ||
1504 | cpumask_set_cpu(sibling, &updated_cpus); | ||
1505 | if (i < weight) | ||
1506 | ud->next = &updates[i]; | ||
1507 | } | ||
1508 | cpu = cpu_last_thread_sibling(cpu); | ||
1483 | } | 1509 | } |
1484 | 1510 | ||
1485 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); | 1511 | stop_machine(update_cpu_topology, &updates[0], &updated_cpus); |
1486 | 1512 | ||
1487 | for (ud = &updates[0]; ud; ud = ud->next) { | 1513 | for (ud = &updates[0]; ud; ud = ud->next) { |
1514 | unregister_cpu_under_node(ud->cpu, ud->old_nid); | ||
1515 | register_cpu_under_node(ud->cpu, ud->new_nid); | ||
1516 | |||
1488 | dev = get_cpu_device(ud->cpu); | 1517 | dev = get_cpu_device(ud->cpu); |
1489 | if (dev) | 1518 | if (dev) |
1490 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); | 1519 | kobject_uevent(&dev->kobj, KOBJ_CHANGE); |
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 214130a4edc6..edda589795c3 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c | |||
@@ -235,6 +235,14 @@ void assert_pte_locked(struct mm_struct *mm, unsigned long addr) | |||
235 | pud = pud_offset(pgd, addr); | 235 | pud = pud_offset(pgd, addr); |
236 | BUG_ON(pud_none(*pud)); | 236 | BUG_ON(pud_none(*pud)); |
237 | pmd = pmd_offset(pud, addr); | 237 | pmd = pmd_offset(pud, addr); |
238 | /* | ||
239 | * khugepaged to collapse normal pages to hugepage, first set | ||
240 | * pmd to none to force page fault/gup to take mmap_sem. After | ||
241 | * pmd is set to none, we do a pte_clear which does this assertion | ||
242 | * so if we find pmd none, return. | ||
243 | */ | ||
244 | if (pmd_none(*pmd)) | ||
245 | return; | ||
238 | BUG_ON(!pmd_present(*pmd)); | 246 | BUG_ON(!pmd_present(*pmd)); |
239 | assert_spin_locked(pte_lockptr(mm, pmd)); | 247 | assert_spin_locked(pte_lockptr(mm, pmd)); |
240 | } | 248 | } |
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index a854096e1023..536eec72c0f7 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c | |||
@@ -338,6 +338,19 @@ EXPORT_SYMBOL(iounmap); | |||
338 | EXPORT_SYMBOL(__iounmap); | 338 | EXPORT_SYMBOL(__iounmap); |
339 | EXPORT_SYMBOL(__iounmap_at); | 339 | EXPORT_SYMBOL(__iounmap_at); |
340 | 340 | ||
341 | /* | ||
342 | * For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags | ||
343 | * For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address. | ||
344 | */ | ||
345 | struct page *pmd_page(pmd_t pmd) | ||
346 | { | ||
347 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
348 | if (pmd_trans_huge(pmd)) | ||
349 | return pfn_to_page(pmd_pfn(pmd)); | ||
350 | #endif | ||
351 | return virt_to_page(pmd_page_vaddr(pmd)); | ||
352 | } | ||
353 | |||
341 | #ifdef CONFIG_PPC_64K_PAGES | 354 | #ifdef CONFIG_PPC_64K_PAGES |
342 | static pte_t *get_from_cache(struct mm_struct *mm) | 355 | static pte_t *get_from_cache(struct mm_struct *mm) |
343 | { | 356 | { |
@@ -455,3 +468,404 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) | |||
455 | } | 468 | } |
456 | #endif | 469 | #endif |
457 | #endif /* CONFIG_PPC_64K_PAGES */ | 470 | #endif /* CONFIG_PPC_64K_PAGES */ |
471 | |||
472 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
473 | |||
474 | /* | ||
475 | * This is called when relaxing access to a hugepage. It's also called in the page | ||
476 | * fault path when we don't hit any of the major fault cases, ie, a minor | ||
477 | * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have | ||
478 | * handled those two for us, we additionally deal with missing execute | ||
479 | * permission here on some processors | ||
480 | */ | ||
481 | int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, | ||
482 | pmd_t *pmdp, pmd_t entry, int dirty) | ||
483 | { | ||
484 | int changed; | ||
485 | #ifdef CONFIG_DEBUG_VM | ||
486 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
487 | assert_spin_locked(&vma->vm_mm->page_table_lock); | ||
488 | #endif | ||
489 | changed = !pmd_same(*(pmdp), entry); | ||
490 | if (changed) { | ||
491 | __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); | ||
492 | /* | ||
493 | * Since we are not supporting SW TLB systems, we don't | ||
494 | * have any thing similar to flush_tlb_page_nohash() | ||
495 | */ | ||
496 | } | ||
497 | return changed; | ||
498 | } | ||
499 | |||
500 | unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, | ||
501 | pmd_t *pmdp, unsigned long clr) | ||
502 | { | ||
503 | |||
504 | unsigned long old, tmp; | ||
505 | |||
506 | #ifdef CONFIG_DEBUG_VM | ||
507 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
508 | assert_spin_locked(&mm->page_table_lock); | ||
509 | #endif | ||
510 | |||
511 | #ifdef PTE_ATOMIC_UPDATES | ||
512 | __asm__ __volatile__( | ||
513 | "1: ldarx %0,0,%3\n\ | ||
514 | andi. %1,%0,%6\n\ | ||
515 | bne- 1b \n\ | ||
516 | andc %1,%0,%4 \n\ | ||
517 | stdcx. %1,0,%3 \n\ | ||
518 | bne- 1b" | ||
519 | : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) | ||
520 | : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY) | ||
521 | : "cc" ); | ||
522 | #else | ||
523 | old = pmd_val(*pmdp); | ||
524 | *pmdp = __pmd(old & ~clr); | ||
525 | #endif | ||
526 | if (old & _PAGE_HASHPTE) | ||
527 | hpte_do_hugepage_flush(mm, addr, pmdp); | ||
528 | return old; | ||
529 | } | ||
530 | |||
531 | pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, | ||
532 | pmd_t *pmdp) | ||
533 | { | ||
534 | pmd_t pmd; | ||
535 | |||
536 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
537 | if (pmd_trans_huge(*pmdp)) { | ||
538 | pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp); | ||
539 | } else { | ||
540 | /* | ||
541 | * khugepaged calls this for normal pmd | ||
542 | */ | ||
543 | pmd = *pmdp; | ||
544 | pmd_clear(pmdp); | ||
545 | /* | ||
546 | * Wait for all pending hash_page to finish. This is needed | ||
547 | * in case of subpage collapse. When we collapse normal pages | ||
548 | * to hugepage, we first clear the pmd, then invalidate all | ||
549 | * the PTE entries. The assumption here is that any low level | ||
550 | * page fault will see a none pmd and take the slow path that | ||
551 | * will wait on mmap_sem. But we could very well be in a | ||
552 | * hash_page with local ptep pointer value. Such a hash page | ||
553 | * can result in adding new HPTE entries for normal subpages. | ||
554 | * That means we could be modifying the page content as we | ||
555 | * copy them to a huge page. So wait for parallel hash_page | ||
556 | * to finish before invalidating HPTE entries. We can do this | ||
557 | * by sending an IPI to all the cpus and executing a dummy | ||
558 | * function there. | ||
559 | */ | ||
560 | kick_all_cpus_sync(); | ||
561 | /* | ||
562 | * Now invalidate the hpte entries in the range | ||
563 | * covered by pmd. This make sure we take a | ||
564 | * fault and will find the pmd as none, which will | ||
565 | * result in a major fault which takes mmap_sem and | ||
566 | * hence wait for collapse to complete. Without this | ||
567 | * the __collapse_huge_page_copy can result in copying | ||
568 | * the old content. | ||
569 | */ | ||
570 | flush_tlb_pmd_range(vma->vm_mm, &pmd, address); | ||
571 | } | ||
572 | return pmd; | ||
573 | } | ||
574 | |||
575 | int pmdp_test_and_clear_young(struct vm_area_struct *vma, | ||
576 | unsigned long address, pmd_t *pmdp) | ||
577 | { | ||
578 | return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); | ||
579 | } | ||
580 | |||
581 | /* | ||
582 | * We currently remove entries from the hashtable regardless of whether | ||
583 | * the entry was young or dirty. The generic routines only flush if the | ||
584 | * entry was young or dirty which is not good enough. | ||
585 | * | ||
586 | * We should be more intelligent about this but for the moment we override | ||
587 | * these functions and force a tlb flush unconditionally | ||
588 | */ | ||
589 | int pmdp_clear_flush_young(struct vm_area_struct *vma, | ||
590 | unsigned long address, pmd_t *pmdp) | ||
591 | { | ||
592 | return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp); | ||
593 | } | ||
594 | |||
595 | /* | ||
596 | * We mark the pmd splitting and invalidate all the hpte | ||
597 | * entries for this hugepage. | ||
598 | */ | ||
599 | void pmdp_splitting_flush(struct vm_area_struct *vma, | ||
600 | unsigned long address, pmd_t *pmdp) | ||
601 | { | ||
602 | unsigned long old, tmp; | ||
603 | |||
604 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
605 | |||
606 | #ifdef CONFIG_DEBUG_VM | ||
607 | WARN_ON(!pmd_trans_huge(*pmdp)); | ||
608 | assert_spin_locked(&vma->vm_mm->page_table_lock); | ||
609 | #endif | ||
610 | |||
611 | #ifdef PTE_ATOMIC_UPDATES | ||
612 | |||
613 | __asm__ __volatile__( | ||
614 | "1: ldarx %0,0,%3\n\ | ||
615 | andi. %1,%0,%6\n\ | ||
616 | bne- 1b \n\ | ||
617 | ori %1,%0,%4 \n\ | ||
618 | stdcx. %1,0,%3 \n\ | ||
619 | bne- 1b" | ||
620 | : "=&r" (old), "=&r" (tmp), "=m" (*pmdp) | ||
621 | : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY) | ||
622 | : "cc" ); | ||
623 | #else | ||
624 | old = pmd_val(*pmdp); | ||
625 | *pmdp = __pmd(old | _PAGE_SPLITTING); | ||
626 | #endif | ||
627 | /* | ||
628 | * If we didn't had the splitting flag set, go and flush the | ||
629 | * HPTE entries. | ||
630 | */ | ||
631 | if (!(old & _PAGE_SPLITTING)) { | ||
632 | /* We need to flush the hpte */ | ||
633 | if (old & _PAGE_HASHPTE) | ||
634 | hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); | ||
635 | } | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | * We want to put the pgtable in pmd and use pgtable for tracking | ||
640 | * the base page size hptes | ||
641 | */ | ||
642 | void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, | ||
643 | pgtable_t pgtable) | ||
644 | { | ||
645 | pgtable_t *pgtable_slot; | ||
646 | assert_spin_locked(&mm->page_table_lock); | ||
647 | /* | ||
648 | * we store the pgtable in the second half of PMD | ||
649 | */ | ||
650 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
651 | *pgtable_slot = pgtable; | ||
652 | /* | ||
653 | * expose the deposited pgtable to other cpus. | ||
654 | * before we set the hugepage PTE at pmd level | ||
655 | * hash fault code looks at the deposted pgtable | ||
656 | * to store hash index values. | ||
657 | */ | ||
658 | smp_wmb(); | ||
659 | } | ||
660 | |||
661 | pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) | ||
662 | { | ||
663 | pgtable_t pgtable; | ||
664 | pgtable_t *pgtable_slot; | ||
665 | |||
666 | assert_spin_locked(&mm->page_table_lock); | ||
667 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
668 | pgtable = *pgtable_slot; | ||
669 | /* | ||
670 | * Once we withdraw, mark the entry NULL. | ||
671 | */ | ||
672 | *pgtable_slot = NULL; | ||
673 | /* | ||
674 | * We store HPTE information in the deposited PTE fragment. | ||
675 | * zero out the content on withdraw. | ||
676 | */ | ||
677 | memset(pgtable, 0, PTE_FRAG_SIZE); | ||
678 | return pgtable; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * set a new huge pmd. We should not be called for updating | ||
683 | * an existing pmd entry. That should go via pmd_hugepage_update. | ||
684 | */ | ||
685 | void set_pmd_at(struct mm_struct *mm, unsigned long addr, | ||
686 | pmd_t *pmdp, pmd_t pmd) | ||
687 | { | ||
688 | #ifdef CONFIG_DEBUG_VM | ||
689 | WARN_ON(!pmd_none(*pmdp)); | ||
690 | assert_spin_locked(&mm->page_table_lock); | ||
691 | WARN_ON(!pmd_trans_huge(pmd)); | ||
692 | #endif | ||
693 | return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); | ||
694 | } | ||
695 | |||
696 | void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, | ||
697 | pmd_t *pmdp) | ||
698 | { | ||
699 | pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT); | ||
700 | } | ||
701 | |||
702 | /* | ||
703 | * A linux hugepage PMD was changed and the corresponding hash table entries | ||
704 | * neesd to be flushed. | ||
705 | */ | ||
706 | void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, | ||
707 | pmd_t *pmdp) | ||
708 | { | ||
709 | int ssize, i; | ||
710 | unsigned long s_addr; | ||
711 | int max_hpte_count; | ||
712 | unsigned int psize, valid; | ||
713 | unsigned char *hpte_slot_array; | ||
714 | unsigned long hidx, vpn, vsid, hash, shift, slot; | ||
715 | |||
716 | /* | ||
717 | * Flush all the hptes mapping this hugepage | ||
718 | */ | ||
719 | s_addr = addr & HPAGE_PMD_MASK; | ||
720 | hpte_slot_array = get_hpte_slot_array(pmdp); | ||
721 | /* | ||
722 | * IF we try to do a HUGE PTE update after a withdraw is done. | ||
723 | * we will find the below NULL. This happens when we do | ||
724 | * split_huge_page_pmd | ||
725 | */ | ||
726 | if (!hpte_slot_array) | ||
727 | return; | ||
728 | |||
729 | /* get the base page size */ | ||
730 | psize = get_slice_psize(mm, s_addr); | ||
731 | |||
732 | if (ppc_md.hugepage_invalidate) | ||
733 | return ppc_md.hugepage_invalidate(mm, hpte_slot_array, | ||
734 | s_addr, psize); | ||
735 | /* | ||
736 | * No bluk hpte removal support, invalidate each entry | ||
737 | */ | ||
738 | shift = mmu_psize_defs[psize].shift; | ||
739 | max_hpte_count = HPAGE_PMD_SIZE >> shift; | ||
740 | for (i = 0; i < max_hpte_count; i++) { | ||
741 | /* | ||
742 | * 8 bits per each hpte entries | ||
743 | * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] | ||
744 | */ | ||
745 | valid = hpte_valid(hpte_slot_array, i); | ||
746 | if (!valid) | ||
747 | continue; | ||
748 | hidx = hpte_hash_index(hpte_slot_array, i); | ||
749 | |||
750 | /* get the vpn */ | ||
751 | addr = s_addr + (i * (1ul << shift)); | ||
752 | if (!is_kernel_addr(addr)) { | ||
753 | ssize = user_segment_size(addr); | ||
754 | vsid = get_vsid(mm->context.id, addr, ssize); | ||
755 | WARN_ON(vsid == 0); | ||
756 | } else { | ||
757 | vsid = get_kernel_vsid(addr, mmu_kernel_ssize); | ||
758 | ssize = mmu_kernel_ssize; | ||
759 | } | ||
760 | |||
761 | vpn = hpt_vpn(addr, vsid, ssize); | ||
762 | hash = hpt_hash(vpn, shift, ssize); | ||
763 | if (hidx & _PTEIDX_SECONDARY) | ||
764 | hash = ~hash; | ||
765 | |||
766 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | ||
767 | slot += hidx & _PTEIDX_GROUP_IX; | ||
768 | ppc_md.hpte_invalidate(slot, vpn, psize, | ||
769 | MMU_PAGE_16M, ssize, 0); | ||
770 | } | ||
771 | } | ||
772 | |||
773 | static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) | ||
774 | { | ||
775 | pmd_val(pmd) |= pgprot_val(pgprot); | ||
776 | return pmd; | ||
777 | } | ||
778 | |||
779 | pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot) | ||
780 | { | ||
781 | pmd_t pmd; | ||
782 | /* | ||
783 | * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always | ||
784 | * set. We use this to check THP page at pmd level. | ||
785 | * leaf pte for huge page, bottom two bits != 00 | ||
786 | */ | ||
787 | pmd_val(pmd) = pfn << PTE_RPN_SHIFT; | ||
788 | pmd_val(pmd) |= _PAGE_THP_HUGE; | ||
789 | pmd = pmd_set_protbits(pmd, pgprot); | ||
790 | return pmd; | ||
791 | } | ||
792 | |||
793 | pmd_t mk_pmd(struct page *page, pgprot_t pgprot) | ||
794 | { | ||
795 | return pfn_pmd(page_to_pfn(page), pgprot); | ||
796 | } | ||
797 | |||
798 | pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) | ||
799 | { | ||
800 | |||
801 | pmd_val(pmd) &= _HPAGE_CHG_MASK; | ||
802 | pmd = pmd_set_protbits(pmd, newprot); | ||
803 | return pmd; | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * This is called at the end of handling a user page fault, when the | ||
808 | * fault has been handled by updating a HUGE PMD entry in the linux page tables. | ||
809 | * We use it to preload an HPTE into the hash table corresponding to | ||
810 | * the updated linux HUGE PMD entry. | ||
811 | */ | ||
812 | void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, | ||
813 | pmd_t *pmd) | ||
814 | { | ||
815 | return; | ||
816 | } | ||
817 | |||
818 | pmd_t pmdp_get_and_clear(struct mm_struct *mm, | ||
819 | unsigned long addr, pmd_t *pmdp) | ||
820 | { | ||
821 | pmd_t old_pmd; | ||
822 | pgtable_t pgtable; | ||
823 | unsigned long old; | ||
824 | pgtable_t *pgtable_slot; | ||
825 | |||
826 | old = pmd_hugepage_update(mm, addr, pmdp, ~0UL); | ||
827 | old_pmd = __pmd(old); | ||
828 | /* | ||
829 | * We have pmd == none and we are holding page_table_lock. | ||
830 | * So we can safely go and clear the pgtable hash | ||
831 | * index info. | ||
832 | */ | ||
833 | pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD; | ||
834 | pgtable = *pgtable_slot; | ||
835 | /* | ||
836 | * Let's zero out old valid and hash index details | ||
837 | * hash fault look at them. | ||
838 | */ | ||
839 | memset(pgtable, 0, PTE_FRAG_SIZE); | ||
840 | return old_pmd; | ||
841 | } | ||
842 | |||
843 | int has_transparent_hugepage(void) | ||
844 | { | ||
845 | if (!mmu_has_feature(MMU_FTR_16M_PAGE)) | ||
846 | return 0; | ||
847 | /* | ||
848 | * We support THP only if PMD_SIZE is 16MB. | ||
849 | */ | ||
850 | if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT) | ||
851 | return 0; | ||
852 | /* | ||
853 | * We need to make sure that we support 16MB hugepage in a segement | ||
854 | * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE | ||
855 | * of 64K. | ||
856 | */ | ||
857 | /* | ||
858 | * If we have 64K HPTE, we will be using that by default | ||
859 | */ | ||
860 | if (mmu_psize_defs[MMU_PAGE_64K].shift && | ||
861 | (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1)) | ||
862 | return 0; | ||
863 | /* | ||
864 | * Ok we only have 4K HPTE | ||
865 | */ | ||
866 | if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1) | ||
867 | return 0; | ||
868 | |||
869 | return 1; | ||
870 | } | ||
871 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | ||
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 7c415ddde948..aa74acb0fdfc 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c | |||
@@ -130,6 +130,53 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len) | |||
130 | up_write(&mm->mmap_sem); | 130 | up_write(&mm->mmap_sem); |
131 | } | 131 | } |
132 | 132 | ||
133 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
134 | static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr, | ||
135 | unsigned long end, struct mm_walk *walk) | ||
136 | { | ||
137 | struct vm_area_struct *vma = walk->private; | ||
138 | split_huge_page_pmd(vma, addr, pmd); | ||
139 | return 0; | ||
140 | } | ||
141 | |||
142 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | ||
143 | unsigned long len) | ||
144 | { | ||
145 | struct vm_area_struct *vma; | ||
146 | struct mm_walk subpage_proto_walk = { | ||
147 | .mm = mm, | ||
148 | .pmd_entry = subpage_walk_pmd_entry, | ||
149 | }; | ||
150 | |||
151 | /* | ||
152 | * We don't try too hard, we just mark all the vma in that range | ||
153 | * VM_NOHUGEPAGE and split them. | ||
154 | */ | ||
155 | vma = find_vma(mm, addr); | ||
156 | /* | ||
157 | * If the range is in unmapped range, just return | ||
158 | */ | ||
159 | if (vma && ((addr + len) <= vma->vm_start)) | ||
160 | return; | ||
161 | |||
162 | while (vma) { | ||
163 | if (vma->vm_start >= (addr + len)) | ||
164 | break; | ||
165 | vma->vm_flags |= VM_NOHUGEPAGE; | ||
166 | subpage_proto_walk.private = vma; | ||
167 | walk_page_range(vma->vm_start, vma->vm_end, | ||
168 | &subpage_proto_walk); | ||
169 | vma = vma->vm_next; | ||
170 | } | ||
171 | } | ||
172 | #else | ||
173 | static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, | ||
174 | unsigned long len) | ||
175 | { | ||
176 | return; | ||
177 | } | ||
178 | #endif | ||
179 | |||
133 | /* | 180 | /* |
134 | * Copy in a subpage protection map for an address range. | 181 | * Copy in a subpage protection map for an address range. |
135 | * The map has 2 bits per 4k subpage, so 32 bits per 64k page. | 182 | * The map has 2 bits per 4k subpage, so 32 bits per 64k page. |
@@ -168,6 +215,7 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) | |||
168 | return -EFAULT; | 215 | return -EFAULT; |
169 | 216 | ||
170 | down_write(&mm->mmap_sem); | 217 | down_write(&mm->mmap_sem); |
218 | subpage_mark_vma_nohuge(mm, addr, len); | ||
171 | for (limit = addr + len; addr < limit; addr = next) { | 219 | for (limit = addr + len; addr < limit; addr = next) { |
172 | next = pmd_addr_end(addr, limit); | 220 | next = pmd_addr_end(addr, limit); |
173 | err = -ENOMEM; | 221 | err = -ENOMEM; |
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index 023ec8a13f38..36e44b4260eb 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c | |||
@@ -183,12 +183,13 @@ void tlb_flush(struct mmu_gather *tlb) | |||
183 | * since 64K pages may overlap with other bridges when using 64K pages | 183 | * since 64K pages may overlap with other bridges when using 64K pages |
184 | * with 4K HW pages on IO space. | 184 | * with 4K HW pages on IO space. |
185 | * | 185 | * |
186 | * Because of that usage pattern, it's only available with CONFIG_HOTPLUG | 186 | * Because of that usage pattern, it is implemented for small size rather |
187 | * and is implemented for small size rather than speed. | 187 | * than speed. |
188 | */ | 188 | */ |
189 | void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | 189 | void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, |
190 | unsigned long end) | 190 | unsigned long end) |
191 | { | 191 | { |
192 | int hugepage_shift; | ||
192 | unsigned long flags; | 193 | unsigned long flags; |
193 | 194 | ||
194 | start = _ALIGN_DOWN(start, PAGE_SIZE); | 195 | start = _ALIGN_DOWN(start, PAGE_SIZE); |
@@ -206,7 +207,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |||
206 | local_irq_save(flags); | 207 | local_irq_save(flags); |
207 | arch_enter_lazy_mmu_mode(); | 208 | arch_enter_lazy_mmu_mode(); |
208 | for (; start < end; start += PAGE_SIZE) { | 209 | for (; start < end; start += PAGE_SIZE) { |
209 | pte_t *ptep = find_linux_pte(mm->pgd, start); | 210 | pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start, |
211 | &hugepage_shift); | ||
210 | unsigned long pte; | 212 | unsigned long pte; |
211 | 213 | ||
212 | if (ptep == NULL) | 214 | if (ptep == NULL) |
@@ -214,7 +216,37 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, | |||
214 | pte = pte_val(*ptep); | 216 | pte = pte_val(*ptep); |
215 | if (!(pte & _PAGE_HASHPTE)) | 217 | if (!(pte & _PAGE_HASHPTE)) |
216 | continue; | 218 | continue; |
217 | hpte_need_flush(mm, start, ptep, pte, 0); | 219 | if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) |
220 | hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); | ||
221 | else | ||
222 | hpte_need_flush(mm, start, ptep, pte, 0); | ||
223 | } | ||
224 | arch_leave_lazy_mmu_mode(); | ||
225 | local_irq_restore(flags); | ||
226 | } | ||
227 | |||
228 | void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) | ||
229 | { | ||
230 | pte_t *pte; | ||
231 | pte_t *start_pte; | ||
232 | unsigned long flags; | ||
233 | |||
234 | addr = _ALIGN_DOWN(addr, PMD_SIZE); | ||
235 | /* Note: Normally, we should only ever use a batch within a | ||
236 | * PTE locked section. This violates the rule, but will work | ||
237 | * since we don't actually modify the PTEs, we just flush the | ||
238 | * hash while leaving the PTEs intact (including their reference | ||
239 | * to being hashed). This is not the most performance oriented | ||
240 | * way to do things but is fine for our needs here. | ||
241 | */ | ||
242 | local_irq_save(flags); | ||
243 | arch_enter_lazy_mmu_mode(); | ||
244 | start_pte = pte_offset_map(pmd, addr); | ||
245 | for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { | ||
246 | unsigned long pteval = pte_val(*pte); | ||
247 | if (pteval & _PAGE_HASHPTE) | ||
248 | hpte_need_flush(mm, addr, pte, pteval, 0); | ||
249 | addr += PAGE_SIZE; | ||
218 | } | 250 | } |
219 | arch_leave_lazy_mmu_mode(); | 251 | arch_leave_lazy_mmu_mode(); |
220 | local_irq_restore(flags); | 252 | local_irq_restore(flags); |
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 6888cad5103d..41cd68dee681 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c | |||
@@ -648,7 +648,7 @@ void __init early_init_mmu(void) | |||
648 | __early_init_mmu(1); | 648 | __early_init_mmu(1); |
649 | } | 649 | } |
650 | 650 | ||
651 | void __cpuinit early_init_mmu_secondary(void) | 651 | void early_init_mmu_secondary(void) |
652 | { | 652 | { |
653 | __early_init_mmu(0); | 653 | __early_init_mmu(0); |
654 | } | 654 | } |