summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2015-11-30 22:36:28 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2015-12-13 23:19:05 -0500
commit3dfcb315d81e663bf70401de61940c1b4de2deea (patch)
treebc9d411e750e1f06c6aacf0b64cb1e4321fc7bca
parent26b6a3d9bb48f8b4624a62281bc2a295df3a8109 (diff)
powerpc/mm: make a separate copy for book3s
In this patch we do: cp pgtable-ppc32.h book3s/32/pgtable.h cp pgtable-ppc64.h book3s/64/pgtable.h This enable us to do further changes to hash specific config. We will change the page table format for 64bit hash in later patches. Acked-by: Scott Wood <scottwood@freescale.com> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h340
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h626
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h10
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h2
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h4
-rw-r--r--arch/powerpc/include/asm/pgtable.h4
7 files changed, 981 insertions, 7 deletions
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
new file mode 100644
index 000000000000..418d2fa3ac7d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -0,0 +1,340 @@
1#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
2#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
3
4#include <asm-generic/pgtable-nopmd.h>
5
6#ifndef __ASSEMBLY__
7#include <linux/sched.h>
8#include <linux/threads.h>
9#include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */
10
11extern unsigned long ioremap_bot;
12
13#ifdef CONFIG_44x
14extern int icache_44x_need_flush;
15#endif
16
17#endif /* __ASSEMBLY__ */
18
19/*
20 * The normal case is that PTEs are 32-bits and we have a 1-page
21 * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
22 *
23 * For any >32-bit physical address platform, we can use the following
24 * two level page table layout where the pgdir is 8KB and the MS 13 bits
25 * are an index to the second level table. The combined pgdir/pmd first
26 * level has 2048 entries and the second level has 512 64-bit PTE entries.
27 * -Matt
28 */
29/* PGDIR_SHIFT determines what a top-level page table entry can map */
30#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
31#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
32#define PGDIR_MASK (~(PGDIR_SIZE-1))
33
34/*
35 * entries per page directory level: our page-table tree is two-level, so
36 * we don't really have any PMD directory.
37 */
38#ifndef __ASSEMBLY__
39#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT)
40#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT))
41#endif /* __ASSEMBLY__ */
42
43#define PTRS_PER_PTE (1 << PTE_SHIFT)
44#define PTRS_PER_PMD 1
45#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
46
47#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
48#define FIRST_USER_ADDRESS 0UL
49
50#define pte_ERROR(e) \
51 pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
52 (unsigned long long)pte_val(e))
53#define pgd_ERROR(e) \
54 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
55
56/*
57 * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
58 * value (for now) on others, from where we can start layout kernel
59 * virtual space that goes below PKMAP and FIXMAP
60 */
61#ifdef CONFIG_HIGHMEM
62#define KVIRT_TOP PKMAP_BASE
63#else
64#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
65#endif
66
67/*
68 * ioremap_bot starts at that address. Early ioremaps move down from there,
69 * until mem_init() at which point this becomes the top of the vmalloc
70 * and ioremap space
71 */
72#ifdef CONFIG_NOT_COHERENT_CACHE
73#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
74#else
75#define IOREMAP_TOP KVIRT_TOP
76#endif
77
78/*
79 * Just any arbitrary offset to the start of the vmalloc VM area: the
80 * current 16MB value just means that there will be a 64MB "hole" after the
81 * physical memory until the kernel virtual memory starts. That means that
82 * any out-of-bounds memory accesses will hopefully be caught.
83 * The vmalloc() routines leaves a hole of 4kB between each vmalloced
84 * area for the same reason. ;)
85 *
86 * We no longer map larger than phys RAM with the BATs so we don't have
87 * to worry about the VMALLOC_OFFSET causing problems. We do have to worry
88 * about clashes between our early calls to ioremap() that start growing down
89 * from ioremap_base being run into the VM area allocations (growing upwards
90 * from VMALLOC_START). For this reason we have ioremap_bot to check when
91 * we actually run into our mappings setup in the early boot with the VM
92 * system. This really does become a problem for machines with good amounts
93 * of RAM. -- Cort
94 */
95#define VMALLOC_OFFSET (0x1000000) /* 16M */
96#ifdef PPC_PIN_SIZE
97#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
98#else
99#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
100#endif
101#define VMALLOC_END ioremap_bot
102
103/*
104 * Bits in a linux-style PTE. These match the bits in the
105 * (hardware-defined) PowerPC PTE as closely as possible.
106 */
107
108#if defined(CONFIG_40x)
109#include <asm/pte-40x.h>
110#elif defined(CONFIG_44x)
111#include <asm/pte-44x.h>
112#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
113#include <asm/pte-book3e.h>
114#elif defined(CONFIG_FSL_BOOKE)
115#include <asm/pte-fsl-booke.h>
116#elif defined(CONFIG_8xx)
117#include <asm/pte-8xx.h>
118#else /* CONFIG_6xx */
119#include <asm/book3s/32/hash.h>
120#endif
121
122/* And here we include common definitions */
123#include <asm/pte-common.h>
124
125#ifndef __ASSEMBLY__
126
127#define pte_clear(mm, addr, ptep) \
128 do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
129
130#define pmd_none(pmd) (!pmd_val(pmd))
131#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
132#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
133#define pmd_clear(pmdp) do { pmd_val(*(pmdp)) = 0; } while (0)
134
135/*
136 * When flushing the tlb entry for a page, we also need to flush the hash
137 * table entry. flush_hash_pages is assembler (for speed) in hashtable.S.
138 */
139extern int flush_hash_pages(unsigned context, unsigned long va,
140 unsigned long pmdval, int count);
141
142/* Add an HPTE to the hash table */
143extern void add_hash_page(unsigned context, unsigned long va,
144 unsigned long pmdval);
145
146/* Flush an entry from the TLB/hash table */
147extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
148 unsigned long address);
149
150/*
151 * PTE updates. This function is called whenever an existing
152 * valid PTE is updated. This does -not- include set_pte_at()
153 * which nowadays only sets a new PTE.
154 *
155 * Depending on the type of MMU, we may need to use atomic updates
156 * and the PTE may be either 32 or 64 bit wide. In the later case,
157 * when using atomic updates, only the low part of the PTE is
158 * accessed atomically.
159 *
160 * In addition, on 44x, we also maintain a global flag indicating
161 * that an executable user mapping was modified, which is needed
162 * to properly flush the virtually tagged instruction cache of
163 * those implementations.
164 */
165#ifndef CONFIG_PTE_64BIT
166static inline unsigned long pte_update(pte_t *p,
167 unsigned long clr,
168 unsigned long set)
169{
170#ifdef PTE_ATOMIC_UPDATES
171 unsigned long old, tmp;
172
173 __asm__ __volatile__("\
1741: lwarx %0,0,%3\n\
175 andc %1,%0,%4\n\
176 or %1,%1,%5\n"
177 PPC405_ERR77(0,%3)
178" stwcx. %1,0,%3\n\
179 bne- 1b"
180 : "=&r" (old), "=&r" (tmp), "=m" (*p)
181 : "r" (p), "r" (clr), "r" (set), "m" (*p)
182 : "cc" );
183#else /* PTE_ATOMIC_UPDATES */
184 unsigned long old = pte_val(*p);
185 *p = __pte((old & ~clr) | set);
186#endif /* !PTE_ATOMIC_UPDATES */
187
188#ifdef CONFIG_44x
189 if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
190 icache_44x_need_flush = 1;
191#endif
192 return old;
193}
194#else /* CONFIG_PTE_64BIT */
195static inline unsigned long long pte_update(pte_t *p,
196 unsigned long clr,
197 unsigned long set)
198{
199#ifdef PTE_ATOMIC_UPDATES
200 unsigned long long old;
201 unsigned long tmp;
202
203 __asm__ __volatile__("\
2041: lwarx %L0,0,%4\n\
205 lwzx %0,0,%3\n\
206 andc %1,%L0,%5\n\
207 or %1,%1,%6\n"
208 PPC405_ERR77(0,%3)
209" stwcx. %1,0,%4\n\
210 bne- 1b"
211 : "=&r" (old), "=&r" (tmp), "=m" (*p)
212 : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
213 : "cc" );
214#else /* PTE_ATOMIC_UPDATES */
215 unsigned long long old = pte_val(*p);
216 *p = __pte((old & ~(unsigned long long)clr) | set);
217#endif /* !PTE_ATOMIC_UPDATES */
218
219#ifdef CONFIG_44x
220 if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
221 icache_44x_need_flush = 1;
222#endif
223 return old;
224}
225#endif /* CONFIG_PTE_64BIT */
226
227/*
228 * 2.6 calls this without flushing the TLB entry; this is wrong
229 * for our hash-based implementation, we fix that up here.
230 */
231#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
232static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
233{
234 unsigned long old;
235 old = pte_update(ptep, _PAGE_ACCESSED, 0);
236#if _PAGE_HASHPTE != 0
237 if (old & _PAGE_HASHPTE) {
238 unsigned long ptephys = __pa(ptep) & PAGE_MASK;
239 flush_hash_pages(context, addr, ptephys, 1);
240 }
241#endif
242 return (old & _PAGE_ACCESSED) != 0;
243}
244#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
245 __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
246
247#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
248static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
249 pte_t *ptep)
250{
251 return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
252}
253
254#define __HAVE_ARCH_PTEP_SET_WRPROTECT
255static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
256 pte_t *ptep)
257{
258 pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
259}
260static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
261 unsigned long addr, pte_t *ptep)
262{
263 ptep_set_wrprotect(mm, addr, ptep);
264}
265
266
267static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
268{
269 unsigned long set = pte_val(entry) &
270 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
271 unsigned long clr = ~pte_val(entry) & _PAGE_RO;
272
273 pte_update(ptep, clr, set);
274}
275
276#define __HAVE_ARCH_PTE_SAME
277#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
278
279/*
280 * Note that on Book E processors, the pmd contains the kernel virtual
281 * (lowmem) address of the pte page. The physical address is less useful
282 * because everything runs with translation enabled (even the TLB miss
283 * handler). On everything else the pmd contains the physical address
284 * of the pte page. -- paulus
285 */
286#ifndef CONFIG_BOOKE
287#define pmd_page_vaddr(pmd) \
288 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
289#define pmd_page(pmd) \
290 pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
291#else
292#define pmd_page_vaddr(pmd) \
293 ((unsigned long) (pmd_val(pmd) & PAGE_MASK))
294#define pmd_page(pmd) \
295 pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
296#endif
297
298/* to find an entry in a kernel page-table-directory */
299#define pgd_offset_k(address) pgd_offset(&init_mm, address)
300
301/* to find an entry in a page-table-directory */
302#define pgd_index(address) ((address) >> PGDIR_SHIFT)
303#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
304
305/* Find an entry in the third-level page table.. */
306#define pte_index(address) \
307 (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
308#define pte_offset_kernel(dir, addr) \
309 ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
310#define pte_offset_map(dir, addr) \
311 ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
312#define pte_unmap(pte) kunmap_atomic(pte)
313
314/*
315 * Encode and decode a swap entry.
316 * Note that the bits we use in a PTE for representing a swap entry
317 * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
318 * -- paulus
319 */
320#define __swp_type(entry) ((entry).val & 0x1f)
321#define __swp_offset(entry) ((entry).val >> 5)
322#define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 5) })
323#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
324#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
325
326#ifndef CONFIG_PPC_4K_PAGES
327void pgtable_cache_init(void);
328#else
329/*
330 * No page table caches to initialise
331 */
332#define pgtable_cache_init() do { } while (0)
333#endif
334
335extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
336 pmd_t **pmdp);
337
338#endif /* !__ASSEMBLY__ */
339
340#endif /* _ASM_POWERPC_BOOK3S_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
new file mode 100644
index 000000000000..cdd5284d9eaa
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -0,0 +1,626 @@
1#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
2#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
3/*
4 * This file contains the functions and defines necessary to modify and use
5 * the ppc64 hashed page table.
6 */
7
8#ifdef CONFIG_PPC_64K_PAGES
9#include <asm/pgtable-ppc64-64k.h>
10#else
11#include <asm/pgtable-ppc64-4k.h>
12#endif
13#include <asm/barrier.h>
14
15#define FIRST_USER_ADDRESS 0UL
16
17/*
18 * Size of EA range mapped by our pagetables.
19 */
20#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
21 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
22#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
23
24#ifdef CONFIG_TRANSPARENT_HUGEPAGE
25#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
26#else
27#define PMD_CACHE_INDEX PMD_INDEX_SIZE
28#endif
29/*
30 * Define the address range of the kernel non-linear virtual area
31 */
32
33#ifdef CONFIG_PPC_BOOK3E
34#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
35#else
36#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
37#endif
38#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
39
40/*
41 * The vmalloc space starts at the beginning of that region, and
42 * occupies half of it on hash CPUs and a quarter of it on Book3E
43 * (we keep a quarter for the virtual memmap)
44 */
45#define VMALLOC_START KERN_VIRT_START
46#ifdef CONFIG_PPC_BOOK3E
47#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2)
48#else
49#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
50#endif
51#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
52
53/*
54 * The second half of the kernel virtual space is used for IO mappings,
55 * it's itself carved into the PIO region (ISA and PHB IO space) and
56 * the ioremap space
57 *
58 * ISA_IO_BASE = KERN_IO_START, 64K reserved area
59 * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
60 * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
61 */
62#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
63#define FULL_IO_SIZE 0x80000000ul
64#define ISA_IO_BASE (KERN_IO_START)
65#define ISA_IO_END (KERN_IO_START + 0x10000ul)
66#define PHB_IO_BASE (ISA_IO_END)
67#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
68#define IOREMAP_BASE (PHB_IO_END)
69#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
70
71
72/*
73 * Region IDs
74 */
75#define REGION_SHIFT 60UL
76#define REGION_MASK (0xfUL << REGION_SHIFT)
77#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
78
79#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
80#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
81#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
82#define USER_REGION_ID (0UL)
83
84/*
85 * Defines the address of the vmemap area, in its own region on
86 * hash table CPUs and after the vmalloc space on Book3E
87 */
88#ifdef CONFIG_PPC_BOOK3E
89#define VMEMMAP_BASE VMALLOC_END
90#define VMEMMAP_END KERN_IO_START
91#else
92#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
93#endif
94#define vmemmap ((struct page *)VMEMMAP_BASE)
95
96
97/*
98 * Include the PTE bits definitions
99 */
100#ifdef CONFIG_PPC_BOOK3S
101#include <asm/book3s/64/hash.h>
102#else
103#include <asm/pte-book3e.h>
104#endif
105#include <asm/pte-common.h>
106
107#ifdef CONFIG_PPC_MM_SLICES
108#define HAVE_ARCH_UNMAPPED_AREA
109#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
110#endif /* CONFIG_PPC_MM_SLICES */
111
112#ifndef __ASSEMBLY__
113
114/*
115 * This is the default implementation of various PTE accessors, it's
116 * used in all cases except Book3S with 64K pages where we have a
117 * concept of sub-pages
118 */
119#ifndef __real_pte
120
121#ifdef CONFIG_STRICT_MM_TYPECHECKS
122#define __real_pte(e,p) ((real_pte_t){(e)})
123#define __rpte_to_pte(r) ((r).pte)
124#else
125#define __real_pte(e,p) (e)
126#define __rpte_to_pte(r) (__pte(r))
127#endif
128#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> 12)
129
130#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
131 do { \
132 index = 0; \
133 shift = mmu_psize_defs[psize].shift; \
134
135#define pte_iterate_hashed_end() } while(0)
136
137/*
138 * We expect this to be called only for user addresses or kernel virtual
139 * addresses other than the linear mapping.
140 */
141#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K
142
143#endif /* __real_pte */
144
145
146/* pte_clear moved to later in this file */
147
148#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
149#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
150
151#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
152#define pmd_none(pmd) (!pmd_val(pmd))
153#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
154 || (pmd_val(pmd) & PMD_BAD_BITS))
155#define pmd_present(pmd) (!pmd_none(pmd))
156#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
157#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
158extern struct page *pmd_page(pmd_t pmd);
159
160#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
161#define pud_none(pud) (!pud_val(pud))
162#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
163 || (pud_val(pud) & PUD_BAD_BITS))
164#define pud_present(pud) (pud_val(pud) != 0)
165#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
166#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
167
168extern struct page *pud_page(pud_t pud);
169
170static inline pte_t pud_pte(pud_t pud)
171{
172 return __pte(pud_val(pud));
173}
174
175static inline pud_t pte_pud(pte_t pte)
176{
177 return __pud(pte_val(pte));
178}
179#define pud_write(pud) pte_write(pud_pte(pud))
180#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
181#define pgd_write(pgd) pte_write(pgd_pte(pgd))
182
183/*
184 * Find an entry in a page-table-directory. We combine the address region
185 * (the high order N bits) and the pgd portion of the address.
186 */
187#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
188
189#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
190
191#define pmd_offset(pudp,addr) \
192 (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
193
194#define pte_offset_kernel(dir,addr) \
195 (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
196
197#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
198#define pte_unmap(pte) do { } while(0)
199
200/* to find an entry in a kernel page-table-directory */
201/* This now only contains the vmalloc pages */
202#define pgd_offset_k(address) pgd_offset(&init_mm, address)
203extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
204 pte_t *ptep, unsigned long pte, int huge);
205
206/* Atomic PTE updates */
207static inline unsigned long pte_update(struct mm_struct *mm,
208 unsigned long addr,
209 pte_t *ptep, unsigned long clr,
210 unsigned long set,
211 int huge)
212{
213#ifdef PTE_ATOMIC_UPDATES
214 unsigned long old, tmp;
215
216 __asm__ __volatile__(
217 "1: ldarx %0,0,%3 # pte_update\n\
218 andi. %1,%0,%6\n\
219 bne- 1b \n\
220 andc %1,%0,%4 \n\
221 or %1,%1,%7\n\
222 stdcx. %1,0,%3 \n\
223 bne- 1b"
224 : "=&r" (old), "=&r" (tmp), "=m" (*ptep)
225 : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
226 : "cc" );
227#else
228 unsigned long old = pte_val(*ptep);
229 *ptep = __pte((old & ~clr) | set);
230#endif
231 /* huge pages use the old page table lock */
232 if (!huge)
233 assert_pte_locked(mm, addr);
234
235#ifdef CONFIG_PPC_STD_MMU_64
236 if (old & _PAGE_HASHPTE)
237 hpte_need_flush(mm, addr, ptep, old, huge);
238#endif
239
240 return old;
241}
242
243static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
244 unsigned long addr, pte_t *ptep)
245{
246 unsigned long old;
247
248 if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
249 return 0;
250 old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
251 return (old & _PAGE_ACCESSED) != 0;
252}
253#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
254#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
255({ \
256 int __r; \
257 __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
258 __r; \
259})
260
261#define __HAVE_ARCH_PTEP_SET_WRPROTECT
262static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
263 pte_t *ptep)
264{
265
266 if ((pte_val(*ptep) & _PAGE_RW) == 0)
267 return;
268
269 pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
270}
271
272static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
273 unsigned long addr, pte_t *ptep)
274{
275 if ((pte_val(*ptep) & _PAGE_RW) == 0)
276 return;
277
278 pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
279}
280
281/*
282 * We currently remove entries from the hashtable regardless of whether
283 * the entry was young or dirty. The generic routines only flush if the
284 * entry was young or dirty which is not good enough.
285 *
286 * We should be more intelligent about this but for the moment we override
287 * these functions and force a tlb flush unconditionally
288 */
289#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
290#define ptep_clear_flush_young(__vma, __address, __ptep) \
291({ \
292 int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
293 __ptep); \
294 __young; \
295})
296
297#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
298static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
299 unsigned long addr, pte_t *ptep)
300{
301 unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
302 return __pte(old);
303}
304
305static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
306 pte_t * ptep)
307{
308 pte_update(mm, addr, ptep, ~0UL, 0, 0);
309}
310
311
312/* Set the dirty and/or accessed bits atomically in a linux PTE, this
313 * function doesn't need to flush the hash entry
314 */
315static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
316{
317 unsigned long bits = pte_val(entry) &
318 (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
319
320#ifdef PTE_ATOMIC_UPDATES
321 unsigned long old, tmp;
322
323 __asm__ __volatile__(
324 "1: ldarx %0,0,%4\n\
325 andi. %1,%0,%6\n\
326 bne- 1b \n\
327 or %0,%3,%0\n\
328 stdcx. %0,0,%4\n\
329 bne- 1b"
330 :"=&r" (old), "=&r" (tmp), "=m" (*ptep)
331 :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
332 :"cc");
333#else
334 unsigned long old = pte_val(*ptep);
335 *ptep = __pte(old | bits);
336#endif
337}
338
339#define __HAVE_ARCH_PTE_SAME
340#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
341
342#define pte_ERROR(e) \
343 pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
344#define pmd_ERROR(e) \
345 pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
346#define pgd_ERROR(e) \
347 pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
348
349/* Encode and de-code a swap entry */
350#define MAX_SWAPFILES_CHECK() do { \
351 BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
352 /* \
353 * Don't have overlapping bits with _PAGE_HPTEFLAGS \
354 * We filter HPTEFLAGS on set_pte. \
355 */ \
356 BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
357 } while (0)
358/*
359 * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
360 */
361#define SWP_TYPE_BITS 5
362#define __swp_type(x) (((x).val >> _PAGE_BIT_SWAP_TYPE) \
363 & ((1UL << SWP_TYPE_BITS) - 1))
364#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
365#define __swp_entry(type, offset) ((swp_entry_t) { \
366 ((type) << _PAGE_BIT_SWAP_TYPE) \
367 | ((offset) << PTE_RPN_SHIFT) })
368
369#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
370#define __swp_entry_to_pte(x) __pte((x).val)
371
372void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
373void pgtable_cache_init(void);
374#endif /* __ASSEMBLY__ */
375
376/*
377 * THP pages can't be special. So use the _PAGE_SPECIAL
378 */
379#define _PAGE_SPLITTING _PAGE_SPECIAL
380
381/*
382 * We need to differentiate between explicit huge page and THP huge
383 * page, since THP huge page also need to track real subpage details
384 */
385#define _PAGE_THP_HUGE _PAGE_4K_PFN
386
387/*
388 * set of bits not changed in pmd_modify.
389 */
390#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
391 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
392 _PAGE_THP_HUGE)
393
394#ifndef __ASSEMBLY__
395/*
396 * The linux hugepage PMD now include the pmd entries followed by the address
397 * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
398 * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
399 * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
400 * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
401 *
402 * The last three bits are intentionally left to zero. This memory location
403 * are also used as normal page PTE pointers. So if we have any pointers
404 * left around while we collapse a hugepage, we need to make sure
405 * _PAGE_PRESENT bit of that is zero when we look at them
406 */
407static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
408{
409 return (hpte_slot_array[index] >> 3) & 0x1;
410}
411
412static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
413 int index)
414{
415 return hpte_slot_array[index] >> 4;
416}
417
418static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
419 unsigned int index, unsigned int hidx)
420{
421 hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
422}
423
424struct page *realmode_pfn_to_page(unsigned long pfn);
425
426static inline char *get_hpte_slot_array(pmd_t *pmdp)
427{
428 /*
429 * The hpte hindex is stored in the pgtable whose address is in the
430 * second half of the PMD
431 *
432 * Order this load with the test for pmd_trans_huge in the caller
433 */
434 smp_rmb();
435 return *(char **)(pmdp + PTRS_PER_PMD);
436
437
438}
439
440#ifdef CONFIG_TRANSPARENT_HUGEPAGE
441extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
442 pmd_t *pmdp, unsigned long old_pmd);
443extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
444extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
445extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
446extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
447 pmd_t *pmdp, pmd_t pmd);
448extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
449 pmd_t *pmd);
450/*
451 *
452 * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
453 * page. The hugetlbfs page table walking and mangling paths are totally
454 * separated form the core VM paths and they're differentiated by
455 * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
456 *
457 * pmd_trans_huge() is defined as false at build time if
458 * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
459 * time in such case.
460 *
461 * For ppc64 we need to differntiate from explicit hugepages from THP, because
462 * for THP we also track the subpage details at the pmd level. We don't do
463 * that for explicit huge pages.
464 *
465 */
466static inline int pmd_trans_huge(pmd_t pmd)
467{
468 /*
469 * leaf pte for huge page, bottom two bits != 00
470 */
471 return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
472}
473
474static inline int pmd_trans_splitting(pmd_t pmd)
475{
476 if (pmd_trans_huge(pmd))
477 return pmd_val(pmd) & _PAGE_SPLITTING;
478 return 0;
479}
480
481extern int has_transparent_hugepage(void);
482#else
483static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
484 unsigned long addr, pmd_t *pmdp,
485 unsigned long old_pmd)
486{
487
488 WARN(1, "%s called with THP disabled\n", __func__);
489}
490#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
491
492static inline int pmd_large(pmd_t pmd)
493{
494 /*
495 * leaf pte for huge page, bottom two bits != 00
496 */
497 return ((pmd_val(pmd) & 0x3) != 0x0);
498}
499
500static inline pte_t pmd_pte(pmd_t pmd)
501{
502 return __pte(pmd_val(pmd));
503}
504
505static inline pmd_t pte_pmd(pte_t pte)
506{
507 return __pmd(pte_val(pte));
508}
509
510static inline pte_t *pmdp_ptep(pmd_t *pmd)
511{
512 return (pte_t *)pmd;
513}
514
515#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
516#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
517#define pmd_young(pmd) pte_young(pmd_pte(pmd))
518#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
519#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
520#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
521#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
522#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
523
524#define __HAVE_ARCH_PMD_WRITE
525#define pmd_write(pmd) pte_write(pmd_pte(pmd))
526
527static inline pmd_t pmd_mkhuge(pmd_t pmd)
528{
529 /* Do nothing, mk_pmd() does this part. */
530 return pmd;
531}
532
533static inline pmd_t pmd_mknotpresent(pmd_t pmd)
534{
535 pmd_val(pmd) &= ~_PAGE_PRESENT;
536 return pmd;
537}
538
539static inline pmd_t pmd_mksplitting(pmd_t pmd)
540{
541 pmd_val(pmd) |= _PAGE_SPLITTING;
542 return pmd;
543}
544
545#define __HAVE_ARCH_PMD_SAME
546static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
547{
548 return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
549}
550
551#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
552extern int pmdp_set_access_flags(struct vm_area_struct *vma,
553 unsigned long address, pmd_t *pmdp,
554 pmd_t entry, int dirty);
555
556extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
557 unsigned long addr,
558 pmd_t *pmdp,
559 unsigned long clr,
560 unsigned long set);
561
562static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
563 unsigned long addr, pmd_t *pmdp)
564{
565 unsigned long old;
566
567 if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
568 return 0;
569 old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
570 return ((old & _PAGE_ACCESSED) != 0);
571}
572
573#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
574extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
575 unsigned long address, pmd_t *pmdp);
576#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
577extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
578 unsigned long address, pmd_t *pmdp);
579
580#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
581extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
582 unsigned long addr, pmd_t *pmdp);
583
584#define __HAVE_ARCH_PMDP_SET_WRPROTECT
585static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
586 pmd_t *pmdp)
587{
588
589 if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
590 return;
591
592 pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
593}
594
595#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
596extern void pmdp_splitting_flush(struct vm_area_struct *vma,
597 unsigned long address, pmd_t *pmdp);
598
599extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
600 unsigned long address, pmd_t *pmdp);
601#define pmdp_collapse_flush pmdp_collapse_flush
602
603#define __HAVE_ARCH_PGTABLE_DEPOSIT
604extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
605 pgtable_t pgtable);
606#define __HAVE_ARCH_PGTABLE_WITHDRAW
607extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
608
609#define __HAVE_ARCH_PMDP_INVALIDATE
610extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
611 pmd_t *pmdp);
612
613#define pmd_move_must_withdraw pmd_move_must_withdraw
614struct spinlock;
615static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
616 struct spinlock *old_pmd_ptl)
617{
618 /*
619 * Archs like ppc64 use pgtable to store per pmd
620 * specific information. So when we switch the pmd,
621 * we should also withdraw and deposit the pgtable
622 */
623 return true;
624}
625#endif /* __ASSEMBLY__ */
626#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
new file mode 100644
index 000000000000..a8d8e5152bd4
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -0,0 +1,10 @@
1#ifndef _ASM_POWERPC_BOOK3S_PGTABLE_H
2#define _ASM_POWERPC_BOOK3S_PGTABLE_H
3
4#ifdef CONFIG_PPC64
5#include <asm/book3s/64/pgtable.h>
6#else
7#include <asm/book3s/32/pgtable.h>
8#endif
9
10#endif
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ba3342bbdbda..7352d3f212df 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -21,7 +21,7 @@
21 * need for various slices related matters. Note that this isn't the 21 * need for various slices related matters. Note that this isn't the
22 * complete pgtable.h but only a portion of it. 22 * complete pgtable.h but only a portion of it.
23 */ 23 */
24#include <asm/pgtable-ppc64.h> 24#include <asm/book3s/64/pgtable.h>
25#include <asm/bug.h> 25#include <asm/bug.h>
26#include <asm/processor.h> 26#include <asm/processor.h>
27 27
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 1a58a05be99c..aac6547b0823 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -115,8 +115,6 @@ extern int icache_44x_need_flush;
115#include <asm/pte-fsl-booke.h> 115#include <asm/pte-fsl-booke.h>
116#elif defined(CONFIG_8xx) 116#elif defined(CONFIG_8xx)
117#include <asm/pte-8xx.h> 117#include <asm/pte-8xx.h>
118#else /* CONFIG_6xx */
119#include <asm/book3s/32/hash.h>
120#endif 118#endif
121 119
122/* And here we include common definitions */ 120/* And here we include common definitions */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index b36a932abdfb..1ef0fea32e1e 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -97,11 +97,7 @@
97/* 97/*
98 * Include the PTE bits definitions 98 * Include the PTE bits definitions
99 */ 99 */
100#ifdef CONFIG_PPC_BOOK3S
101#include <asm/book3s/64/hash.h>
102#else
103#include <asm/pte-book3e.h> 100#include <asm/pte-book3e.h>
104#endif
105#include <asm/pte-common.h> 101#include <asm/pte-common.h>
106 102
107#ifdef CONFIG_PPC_MM_SLICES 103#ifdef CONFIG_PPC_MM_SLICES
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b64b4212b71f..c304d0767919 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -13,11 +13,15 @@ struct mm_struct;
13 13
14#endif /* !__ASSEMBLY__ */ 14#endif /* !__ASSEMBLY__ */
15 15
16#ifdef CONFIG_PPC_BOOK3S
17#include <asm/book3s/pgtable.h>
18#else
16#if defined(CONFIG_PPC64) 19#if defined(CONFIG_PPC64)
17# include <asm/pgtable-ppc64.h> 20# include <asm/pgtable-ppc64.h>
18#else 21#else
19# include <asm/pgtable-ppc32.h> 22# include <asm/pgtable-ppc32.h>
20#endif 23#endif
24#endif /* !CONFIG_PPC_BOOK3S */
21 25
22/* 26/*
23 * We save the slot number & secondary bit in the second half of the 27 * We save the slot number & secondary bit in the second half of the