aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-ppc64
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2005-11-06 19:06:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-06 19:56:47 -0500
commit3c726f8dee6f55e96475574e9f645327e461884c (patch)
treef67c381e8f57959aa4a94bda4c68e24253cd8171 /include/asm-ppc64
parentf912696ab330bf539231d1f8032320f2a08b850f (diff)
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/asm-ppc64')
-rw-r--r--include/asm-ppc64/mmu.h208
-rw-r--r--include/asm-ppc64/mmu_context.h15
-rw-r--r--include/asm-ppc64/paca.h13
-rw-r--r--include/asm-ppc64/page.h147
-rw-r--r--include/asm-ppc64/pgalloc.h47
-rw-r--r--include/asm-ppc64/pgtable-4k.h88
-rw-r--r--include/asm-ppc64/pgtable-64k.h87
-rw-r--r--include/asm-ppc64/pgtable.h160
-rw-r--r--include/asm-ppc64/prom.h8
-rw-r--r--include/asm-ppc64/system.h2
10 files changed, 532 insertions, 243 deletions
diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h
index e0505acb77d9..4c18a5cb69f5 100644
--- a/include/asm-ppc64/mmu.h
+++ b/include/asm-ppc64/mmu.h
@@ -48,13 +48,21 @@ extern char initial_stab[];
48 48
49/* Bits in the SLB VSID word */ 49/* Bits in the SLB VSID word */
50#define SLB_VSID_SHIFT 12 50#define SLB_VSID_SHIFT 12
51#define SLB_VSID_B ASM_CONST(0xc000000000000000)
52#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
53#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
51#define SLB_VSID_KS ASM_CONST(0x0000000000000800) 54#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
52#define SLB_VSID_KP ASM_CONST(0x0000000000000400) 55#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
53#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */ 56#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
54#define SLB_VSID_L ASM_CONST(0x0000000000000100) /* largepage */ 57#define SLB_VSID_L ASM_CONST(0x0000000000000100)
55#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */ 58#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
56#define SLB_VSID_LS ASM_CONST(0x0000000000000070) /* size of largepage */ 59#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
57 60#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
61#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
62#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
63#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
64#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
65
58#define SLB_VSID_KERNEL (SLB_VSID_KP) 66#define SLB_VSID_KERNEL (SLB_VSID_KP)
59#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C) 67#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
60 68
@@ -69,6 +77,7 @@ extern char initial_stab[];
69#define HPTE_V_AVPN_SHIFT 7 77#define HPTE_V_AVPN_SHIFT 7
70#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80) 78#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80)
71#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT) 79#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
80#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN))
72#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010) 81#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
73#define HPTE_V_LOCK ASM_CONST(0x0000000000000008) 82#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
74#define HPTE_V_LARGE ASM_CONST(0x0000000000000004) 83#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
@@ -81,6 +90,7 @@ extern char initial_stab[];
81#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000) 90#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000)
82#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff) 91#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff)
83#define HPTE_R_PP ASM_CONST(0x0000000000000003) 92#define HPTE_R_PP ASM_CONST(0x0000000000000003)
93#define HPTE_R_N ASM_CONST(0x0000000000000004)
84 94
85/* Values for PP (assumes Ks=0, Kp=1) */ 95/* Values for PP (assumes Ks=0, Kp=1) */
86/* pp0 will always be 0 for linux */ 96/* pp0 will always be 0 for linux */
@@ -99,100 +109,120 @@ typedef struct {
99extern hpte_t *htab_address; 109extern hpte_t *htab_address;
100extern unsigned long htab_hash_mask; 110extern unsigned long htab_hash_mask;
101 111
102static inline unsigned long hpt_hash(unsigned long vpn, int large) 112/*
113 * Page size definition
114 *
115 * shift : is the "PAGE_SHIFT" value for that page size
116 * sllp : is a bit mask with the value of SLB L || LP to be or'ed
117 * directly to a slbmte "vsid" value
118 * penc : is the HPTE encoding mask for the "LP" field:
119 *
120 */
121struct mmu_psize_def
103{ 122{
104 unsigned long vsid; 123 unsigned int shift; /* number of bits */
105 unsigned long page; 124 unsigned int penc; /* HPTE encoding */
106 125 unsigned int tlbiel; /* tlbiel supported for that page size */
107 if (large) { 126 unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
108 vsid = vpn >> 4; 127 unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
109 page = vpn & 0xf; 128};
110 } else {
111 vsid = vpn >> 16;
112 page = vpn & 0xffff;
113 }
114 129
115 return (vsid & 0x7fffffffffUL) ^ page; 130#endif /* __ASSEMBLY__ */
116}
117
118static inline void __tlbie(unsigned long va, int large)
119{
120 /* clear top 16 bits, non SLS segment */
121 va &= ~(0xffffULL << 48);
122
123 if (large) {
124 va &= HPAGE_MASK;
125 asm volatile("tlbie %0,1" : : "r"(va) : "memory");
126 } else {
127 va &= PAGE_MASK;
128 asm volatile("tlbie %0,0" : : "r"(va) : "memory");
129 }
130}
131 131
132static inline void tlbie(unsigned long va, int large) 132/*
133{ 133 * The kernel use the constants below to index in the page sizes array.
134 asm volatile("ptesync": : :"memory"); 134 * The use of fixed constants for this purpose is better for performances
135 __tlbie(va, large); 135 * of the low level hash refill handlers.
136 asm volatile("eieio; tlbsync; ptesync": : :"memory"); 136 *
137} 137 * A non supported page size has a "shift" field set to 0
138 *
139 * Any new page size being implemented can get a new entry in here. Whether
140 * the kernel will use it or not is a different matter though. The actual page
141 * size used by hugetlbfs is not defined here and may be made variable
142 */
138 143
139static inline void __tlbiel(unsigned long va) 144#define MMU_PAGE_4K 0 /* 4K */
140{ 145#define MMU_PAGE_64K 1 /* 64K */
141 /* clear top 16 bits, non SLS segment */ 146#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */
142 va &= ~(0xffffULL << 48); 147#define MMU_PAGE_1M 3 /* 1M */
143 va &= PAGE_MASK; 148#define MMU_PAGE_16M 4 /* 16M */
144 149#define MMU_PAGE_16G 5 /* 16G */
145 /* 150#define MMU_PAGE_COUNT 6
146 * Thanks to Alan Modra we are now able to use machine specific
147 * assembly instructions (like tlbiel) by using the gas -many flag.
148 * However we have to support older toolchains so for the moment
149 * we hardwire it.
150 */
151#if 0
152 asm volatile("tlbiel %0" : : "r"(va) : "memory");
153#else
154 asm volatile(".long 0x7c000224 | (%0 << 11)" : : "r"(va) : "memory");
155#endif
156}
157 151
158static inline void tlbiel(unsigned long va) 152#ifndef __ASSEMBLY__
159{
160 asm volatile("ptesync": : :"memory");
161 __tlbiel(va);
162 asm volatile("ptesync": : :"memory");
163}
164 153
165static inline unsigned long slot2va(unsigned long hpte_v, unsigned long slot) 154/*
166{ 155 * The current system page sizes
167 unsigned long avpn = HPTE_V_AVPN_VAL(hpte_v); 156 */
168 unsigned long va; 157extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
158extern int mmu_linear_psize;
159extern int mmu_virtual_psize;
169 160
170 va = avpn << 23; 161#ifdef CONFIG_HUGETLB_PAGE
162/*
163 * The page size index of the huge pages for use by hugetlbfs
164 */
165extern int mmu_huge_psize;
171 166
172 if (! (hpte_v & HPTE_V_LARGE)) { 167#endif /* CONFIG_HUGETLB_PAGE */
173 unsigned long vpi, pteg;
174 168
175 pteg = slot / HPTES_PER_GROUP; 169/*
176 if (hpte_v & HPTE_V_SECONDARY) 170 * This function sets the AVPN and L fields of the HPTE appropriately
177 pteg = ~pteg; 171 * for the page size
172 */
173static inline unsigned long hpte_encode_v(unsigned long va, int psize)
174{
175 unsigned long v =
176 v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
177 v <<= HPTE_V_AVPN_SHIFT;
178 if (psize != MMU_PAGE_4K)
179 v |= HPTE_V_LARGE;
180 return v;
181}
178 182
179 vpi = ((va >> 28) ^ pteg) & htab_hash_mask; 183/*
184 * This function sets the ARPN, and LP fields of the HPTE appropriately
185 * for the page size. We assume the pa is already "clean" that is properly
186 * aligned for the requested page size
187 */
188static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
189{
190 unsigned long r;
180 191
181 va |= vpi << PAGE_SHIFT; 192 /* A 4K page needs no special encoding */
193 if (psize == MMU_PAGE_4K)
194 return pa & HPTE_R_RPN;
195 else {
196 unsigned int penc = mmu_psize_defs[psize].penc;
197 unsigned int shift = mmu_psize_defs[psize].shift;
198 return (pa & ~((1ul << shift) - 1)) | (penc << 12);
182 } 199 }
183 200 return r;
184 return va;
185} 201}
186 202
187/* 203/*
188 * Handle a fault by adding an HPTE. If the address can't be determined 204 * This hashes a virtual address for a 256Mb segment only for now
189 * to be valid via Linux page tables, return 1. If handled return 0
190 */ 205 */
191extern int __hash_page(unsigned long ea, unsigned long access, 206
192 unsigned long vsid, pte_t *ptep, unsigned long trap, 207static inline unsigned long hpt_hash(unsigned long va, unsigned int shift)
193 int local); 208{
209 return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift);
210}
211
212extern int __hash_page_4K(unsigned long ea, unsigned long access,
213 unsigned long vsid, pte_t *ptep, unsigned long trap,
214 unsigned int local);
215extern int __hash_page_64K(unsigned long ea, unsigned long access,
216 unsigned long vsid, pte_t *ptep, unsigned long trap,
217 unsigned int local);
218struct mm_struct;
219extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
220 unsigned long ea, unsigned long vsid, int local);
194 221
195extern void htab_finish_init(void); 222extern void htab_finish_init(void);
223extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
224 unsigned long pstart, unsigned long mode,
225 int psize);
196 226
197extern void hpte_init_native(void); 227extern void hpte_init_native(void);
198extern void hpte_init_lpar(void); 228extern void hpte_init_lpar(void);
@@ -200,17 +230,21 @@ extern void hpte_init_iSeries(void);
200 230
201extern long pSeries_lpar_hpte_insert(unsigned long hpte_group, 231extern long pSeries_lpar_hpte_insert(unsigned long hpte_group,
202 unsigned long va, unsigned long prpn, 232 unsigned long va, unsigned long prpn,
203 unsigned long vflags, 233 unsigned long rflags,
204 unsigned long rflags); 234 unsigned long vflags, int psize);
205extern long native_hpte_insert(unsigned long hpte_group, unsigned long va, 235
206 unsigned long prpn, 236extern long native_hpte_insert(unsigned long hpte_group,
207 unsigned long vflags, unsigned long rflags); 237 unsigned long va, unsigned long prpn,
238 unsigned long rflags,
239 unsigned long vflags, int psize);
208 240
209extern long iSeries_hpte_bolt_or_insert(unsigned long hpte_group, 241extern long iSeries_hpte_insert(unsigned long hpte_group,
210 unsigned long va, unsigned long prpn, 242 unsigned long va, unsigned long prpn,
211 unsigned long vflags, unsigned long rflags); 243 unsigned long rflags,
244 unsigned long vflags, int psize);
212 245
213extern void stabs_alloc(void); 246extern void stabs_alloc(void);
247extern void slb_initialize(void);
214 248
215#endif /* __ASSEMBLY__ */ 249#endif /* __ASSEMBLY__ */
216 250
diff --git a/include/asm-ppc64/mmu_context.h b/include/asm-ppc64/mmu_context.h
index 820dd729b895..4f512e9fa6b8 100644
--- a/include/asm-ppc64/mmu_context.h
+++ b/include/asm-ppc64/mmu_context.h
@@ -16,8 +16,16 @@
16 * 2 of the License, or (at your option) any later version. 16 * 2 of the License, or (at your option) any later version.
17 */ 17 */
18 18
19static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 19/*
20 * Getting into a kernel thread, there is no valid user segment, mark
21 * paca->pgdir NULL so that SLB miss on user addresses will fault
22 */
23static inline void enter_lazy_tlb(struct mm_struct *mm,
24 struct task_struct *tsk)
20{ 25{
26#ifdef CONFIG_PPC_64K_PAGES
27 get_paca()->pgdir = NULL;
28#endif /* CONFIG_PPC_64K_PAGES */
21} 29}
22 30
23#define NO_CONTEXT 0 31#define NO_CONTEXT 0
@@ -40,8 +48,13 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
40 cpu_set(smp_processor_id(), next->cpu_vm_mask); 48 cpu_set(smp_processor_id(), next->cpu_vm_mask);
41 49
42 /* No need to flush userspace segments if the mm doesnt change */ 50 /* No need to flush userspace segments if the mm doesnt change */
51#ifdef CONFIG_PPC_64K_PAGES
52 if (prev == next && get_paca()->pgdir == next->pgd)
53 return;
54#else
43 if (prev == next) 55 if (prev == next)
44 return; 56 return;
57#endif /* CONFIG_PPC_64K_PAGES */
45 58
46#ifdef CONFIG_ALTIVEC 59#ifdef CONFIG_ALTIVEC
47 if (cpu_has_feature(CPU_FTR_ALTIVEC)) 60 if (cpu_has_feature(CPU_FTR_ALTIVEC))
diff --git a/include/asm-ppc64/paca.h b/include/asm-ppc64/paca.h
index f68fe91debaf..bccacd6aa93a 100644
--- a/include/asm-ppc64/paca.h
+++ b/include/asm-ppc64/paca.h
@@ -72,10 +72,15 @@ struct paca_struct {
72 /* 72 /*
73 * Now, starting in cacheline 2, the exception save areas 73 * Now, starting in cacheline 2, the exception save areas
74 */ 74 */
75 u64 exgen[8] __attribute__((aligned(0x80))); /* used for most interrupts/exceptions */ 75 /* used for most interrupts/exceptions */
76 u64 exmc[8]; /* used for machine checks */ 76 u64 exgen[10] __attribute__((aligned(0x80)));
77 u64 exslb[8]; /* used for SLB/segment table misses 77 u64 exmc[10]; /* used for machine checks */
78 * on the linear mapping */ 78 u64 exslb[10]; /* used for SLB/segment table misses
79 * on the linear mapping */
80#ifdef CONFIG_PPC_64K_PAGES
81 pgd_t *pgdir;
82#endif /* CONFIG_PPC_64K_PAGES */
83
79 mm_context_t context; 84 mm_context_t context;
80 u16 slb_cache[SLB_CACHE_ENTRIES]; 85 u16 slb_cache[SLB_CACHE_ENTRIES];
81 u16 slb_cache_ptr; 86 u16 slb_cache_ptr;
diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h
index d404431f0a9a..82ce187e5be8 100644
--- a/include/asm-ppc64/page.h
+++ b/include/asm-ppc64/page.h
@@ -13,32 +13,59 @@
13#include <linux/config.h> 13#include <linux/config.h>
14#include <asm/ppc_asm.h> /* for ASM_CONST */ 14#include <asm/ppc_asm.h> /* for ASM_CONST */
15 15
16/* PAGE_SHIFT determines the page size */ 16/*
17#define PAGE_SHIFT 12 17 * We support either 4k or 64k software page size. When using 64k pages
18#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) 18 * however, wether we are really supporting 64k pages in HW or not is
19#define PAGE_MASK (~(PAGE_SIZE-1)) 19 * irrelevant to those definitions. We always define HW_PAGE_SHIFT to 12
20 * as use of 64k pages remains a linux kernel specific, every notion of
21 * page number shared with the firmware, TCEs, iommu, etc... still assumes
22 * a page size of 4096.
23 */
24#ifdef CONFIG_PPC_64K_PAGES
25#define PAGE_SHIFT 16
26#else
27#define PAGE_SHIFT 12
28#endif
20 29
21#define SID_SHIFT 28 30#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
22#define SID_MASK 0xfffffffffUL 31#define PAGE_MASK (~(PAGE_SIZE-1))
23#define ESID_MASK 0xfffffffff0000000UL
24#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
25 32
26#define HPAGE_SHIFT 24 33/* HW_PAGE_SHIFT is always 4k pages */
27#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) 34#define HW_PAGE_SHIFT 12
28#define HPAGE_MASK (~(HPAGE_SIZE - 1)) 35#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT)
36#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1))
29 37
30#ifdef CONFIG_HUGETLB_PAGE 38/* PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and
39 * HW_PAGE_SHIFT, that is 4k pages
40 */
41#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT)
42
43/* Segment size */
44#define SID_SHIFT 28
45#define SID_MASK 0xfffffffffUL
46#define ESID_MASK 0xfffffffff0000000UL
47#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK)
31 48
49/* Large pages size */
50
51#ifndef __ASSEMBLY__
52extern unsigned int HPAGE_SHIFT;
53#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
54#define HPAGE_MASK (~(HPAGE_SIZE - 1))
32#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 55#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
56#endif /* __ASSEMBLY__ */
57
58#ifdef CONFIG_HUGETLB_PAGE
59
33 60
34#define HTLB_AREA_SHIFT 40 61#define HTLB_AREA_SHIFT 40
35#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) 62#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT)
36#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) 63#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT)
37 64
38#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ 65#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \
39 - (1U << GET_ESID(addr))) & 0xffff) 66 - (1U << GET_ESID(addr))) & 0xffff)
40#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ 67#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \
41 - (1U << GET_HTLB_AREA(addr))) & 0xffff) 68 - (1U << GET_HTLB_AREA(addr))) & 0xffff)
42 69
43#define ARCH_HAS_HUGEPAGE_ONLY_RANGE 70#define ARCH_HAS_HUGEPAGE_ONLY_RANGE
44#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE 71#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
@@ -114,7 +141,25 @@ static __inline__ void clear_page(void *addr)
114 : "ctr", "memory"); 141 : "ctr", "memory");
115} 142}
116 143
117extern void copy_page(void *to, void *from); 144extern void copy_4K_page(void *to, void *from);
145
146#ifdef CONFIG_PPC_64K_PAGES
147static inline void copy_page(void *to, void *from)
148{
149 unsigned int i;
150 for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) {
151 copy_4K_page(to, from);
152 to += 4096;
153 from += 4096;
154 }
155}
156#else /* CONFIG_PPC_64K_PAGES */
157static inline void copy_page(void *to, void *from)
158{
159 copy_4K_page(to, from);
160}
161#endif /* CONFIG_PPC_64K_PAGES */
162
118struct page; 163struct page;
119extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); 164extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
120extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p); 165extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct page *p);
@@ -124,43 +169,75 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag
124 * These are used to make use of C type-checking. 169 * These are used to make use of C type-checking.
125 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. 170 * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b.
126 */ 171 */
127typedef struct { unsigned long pte; } pte_t;
128typedef struct { unsigned long pmd; } pmd_t;
129typedef struct { unsigned long pud; } pud_t;
130typedef struct { unsigned long pgd; } pgd_t;
131typedef struct { unsigned long pgprot; } pgprot_t;
132 172
173/* PTE level */
174typedef struct { unsigned long pte; } pte_t;
133#define pte_val(x) ((x).pte) 175#define pte_val(x) ((x).pte)
134#define pmd_val(x) ((x).pmd)
135#define pud_val(x) ((x).pud)
136#define pgd_val(x) ((x).pgd)
137#define pgprot_val(x) ((x).pgprot)
138
139#define __pte(x) ((pte_t) { (x) }) 176#define __pte(x) ((pte_t) { (x) })
177
178/* 64k pages additionally define a bigger "real PTE" type that gathers
179 * the "second half" part of the PTE for pseudo 64k pages
180 */
181#ifdef CONFIG_PPC_64K_PAGES
182typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
183#else
184typedef struct { pte_t pte; } real_pte_t;
185#endif
186
187/* PMD level */
188typedef struct { unsigned long pmd; } pmd_t;
189#define pmd_val(x) ((x).pmd)
140#define __pmd(x) ((pmd_t) { (x) }) 190#define __pmd(x) ((pmd_t) { (x) })
191
192/* PUD level exusts only on 4k pages */
193#ifndef CONFIG_PPC_64K_PAGES
194typedef struct { unsigned long pud; } pud_t;
195#define pud_val(x) ((x).pud)
141#define __pud(x) ((pud_t) { (x) }) 196#define __pud(x) ((pud_t) { (x) })
197#endif
198
199/* PGD level */
200typedef struct { unsigned long pgd; } pgd_t;
201#define pgd_val(x) ((x).pgd)
142#define __pgd(x) ((pgd_t) { (x) }) 202#define __pgd(x) ((pgd_t) { (x) })
203
204/* Page protection bits */
205typedef struct { unsigned long pgprot; } pgprot_t;
206#define pgprot_val(x) ((x).pgprot)
143#define __pgprot(x) ((pgprot_t) { (x) }) 207#define __pgprot(x) ((pgprot_t) { (x) })
144 208
145#else 209#else
210
146/* 211/*
147 * .. while these make it easier on the compiler 212 * .. while these make it easier on the compiler
148 */ 213 */
149typedef unsigned long pte_t;
150typedef unsigned long pmd_t;
151typedef unsigned long pud_t;
152typedef unsigned long pgd_t;
153typedef unsigned long pgprot_t;
154 214
215typedef unsigned long pte_t;
155#define pte_val(x) (x) 216#define pte_val(x) (x)
217#define __pte(x) (x)
218
219#ifdef CONFIG_PPC_64K_PAGES
220typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
221#else
222typedef unsigned long real_pte_t;
223#endif
224
225
226typedef unsigned long pmd_t;
156#define pmd_val(x) (x) 227#define pmd_val(x) (x)
228#define __pmd(x) (x)
229
230#ifndef CONFIG_PPC_64K_PAGES
231typedef unsigned long pud_t;
157#define pud_val(x) (x) 232#define pud_val(x) (x)
233#define __pud(x) (x)
234#endif
235
236typedef unsigned long pgd_t;
158#define pgd_val(x) (x) 237#define pgd_val(x) (x)
159#define pgprot_val(x) (x) 238#define pgprot_val(x) (x)
160 239
161#define __pte(x) (x) 240typedef unsigned long pgprot_t;
162#define __pmd(x) (x)
163#define __pud(x) (x)
164#define __pgd(x) (x) 241#define __pgd(x) (x)
165#define __pgprot(x) (x) 242#define __pgprot(x) (x)
166 243
diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
index 26bc49c1108d..98da0e4262bd 100644
--- a/include/asm-ppc64/pgalloc.h
+++ b/include/asm-ppc64/pgalloc.h
@@ -8,10 +8,16 @@
8 8
9extern kmem_cache_t *pgtable_cache[]; 9extern kmem_cache_t *pgtable_cache[];
10 10
11#ifdef CONFIG_PPC_64K_PAGES
12#define PTE_CACHE_NUM 0
13#define PMD_CACHE_NUM 0
14#define PGD_CACHE_NUM 1
15#else
11#define PTE_CACHE_NUM 0 16#define PTE_CACHE_NUM 0
12#define PMD_CACHE_NUM 1 17#define PMD_CACHE_NUM 1
13#define PUD_CACHE_NUM 1 18#define PUD_CACHE_NUM 1
14#define PGD_CACHE_NUM 0 19#define PGD_CACHE_NUM 0
20#endif
15 21
16/* 22/*
17 * This program is free software; you can redistribute it and/or 23 * This program is free software; you can redistribute it and/or
@@ -30,6 +36,8 @@ static inline void pgd_free(pgd_t *pgd)
30 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); 36 kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
31} 37}
32 38
39#ifndef CONFIG_PPC_64K_PAGES
40
33#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) 41#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
34 42
35static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) 43static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -43,7 +51,30 @@ static inline void pud_free(pud_t *pud)
43 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); 51 kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
44} 52}
45 53
46#define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) 54static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
55{
56 pud_set(pud, (unsigned long)pmd);
57}
58
59#define pmd_populate(mm, pmd, pte_page) \
60 pmd_populate_kernel(mm, pmd, page_address(pte_page))
61#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
62
63
64#else /* CONFIG_PPC_64K_PAGES */
65
66#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
67
68static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
69 pte_t *pte)
70{
71 pmd_set(pmd, (unsigned long)pte);
72}
73
74#define pmd_populate(mm, pmd, pte_page) \
75 pmd_populate_kernel(mm, pmd, page_address(pte_page))
76
77#endif /* CONFIG_PPC_64K_PAGES */
47 78
48static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) 79static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
49{ 80{
@@ -56,17 +87,15 @@ static inline void pmd_free(pmd_t *pmd)
56 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); 87 kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
57} 88}
58 89
59#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) 90static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
60#define pmd_populate(mm, pmd, pte_page) \ 91 unsigned long address)
61 pmd_populate_kernel(mm, pmd, page_address(pte_page))
62
63static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
64{ 92{
65 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], 93 return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
66 GFP_KERNEL|__GFP_REPEAT); 94 GFP_KERNEL|__GFP_REPEAT);
67} 95}
68 96
69static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 97static inline struct page *pte_alloc_one(struct mm_struct *mm,
98 unsigned long address)
70{ 99{
71 return virt_to_page(pte_alloc_one_kernel(mm, address)); 100 return virt_to_page(pte_alloc_one_kernel(mm, address));
72} 101}
@@ -103,7 +132,7 @@ static inline void pgtable_free(pgtable_free_t pgf)
103 kmem_cache_free(pgtable_cache[cachenum], p); 132 kmem_cache_free(pgtable_cache[cachenum], p);
104} 133}
105 134
106void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); 135extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
107 136
108#define __pte_free_tlb(tlb, ptepage) \ 137#define __pte_free_tlb(tlb, ptepage) \
109 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ 138 pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
@@ -111,9 +140,11 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
111#define __pmd_free_tlb(tlb, pmd) \ 140#define __pmd_free_tlb(tlb, pmd) \
112 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ 141 pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
113 PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) 142 PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
143#ifndef CONFIG_PPC_64K_PAGES
114#define __pud_free_tlb(tlb, pmd) \ 144#define __pud_free_tlb(tlb, pmd) \
115 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ 145 pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
116 PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) 146 PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
147#endif /* CONFIG_PPC_64K_PAGES */
117 148
118#define check_pgt_cache() do { } while (0) 149#define check_pgt_cache() do { } while (0)
119 150
diff --git a/include/asm-ppc64/pgtable-4k.h b/include/asm-ppc64/pgtable-4k.h
new file mode 100644
index 000000000000..c883a2748558
--- /dev/null
+++ b/include/asm-ppc64/pgtable-4k.h
@@ -0,0 +1,88 @@
1/*
2 * Entries per page directory level. The PTE level must use a 64b record
3 * for each page table entry. The PMD and PGD level use a 32b record for
4 * each entry by assuming that each entry is page aligned.
5 */
6#define PTE_INDEX_SIZE 9
7#define PMD_INDEX_SIZE 7
8#define PUD_INDEX_SIZE 7
9#define PGD_INDEX_SIZE 9
10
11#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
12#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
13#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
14#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
15
16#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
17#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
18#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
19#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
20
21/* PMD_SHIFT determines what a second-level page table entry can map */
22#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
23#define PMD_SIZE (1UL << PMD_SHIFT)
24#define PMD_MASK (~(PMD_SIZE-1))
25
26/* PUD_SHIFT determines what a third-level page table entry can map */
27#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
28#define PUD_SIZE (1UL << PUD_SHIFT)
29#define PUD_MASK (~(PUD_SIZE-1))
30
31/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
32#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
33#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
34#define PGDIR_MASK (~(PGDIR_SIZE-1))
35
36/* PTE bits */
37#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
38#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
39#define _PAGE_F_SECOND _PAGE_SECONDARY
40#define _PAGE_F_GIX _PAGE_GROUP_IX
41
42/* PTE flags to conserve for HPTE identification */
43#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
44 _PAGE_SECONDARY | _PAGE_GROUP_IX)
45
46/* PAGE_MASK gives the right answer below, but only by accident */
47/* It should be preserving the high 48 bits and then specifically */
48/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
49#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
50 _PAGE_HPTEFLAGS)
51
52/* Bits to mask out from a PMD to get to the PTE page */
53#define PMD_MASKED_BITS 0
54/* Bits to mask out from a PUD to get to the PMD page */
55#define PUD_MASKED_BITS 0
56/* Bits to mask out from a PGD to get to the PUD page */
57#define PGD_MASKED_BITS 0
58
59/* shift to put page number into pte */
60#define PTE_RPN_SHIFT (17)
61
62#define __real_pte(e,p) ((real_pte_t)(e))
63#define __rpte_to_pte(r) (r)
64#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12)
65
66#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
67 do { \
68 index = 0; \
69 shift = mmu_psize_defs[psize].shift; \
70
71#define pte_iterate_hashed_end() } while(0)
72
73/*
74 * 4-level page tables related bits
75 */
76
77#define pgd_none(pgd) (!pgd_val(pgd))
78#define pgd_bad(pgd) (pgd_val(pgd) == 0)
79#define pgd_present(pgd) (pgd_val(pgd) != 0)
80#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
81#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
82
83#define pud_offset(pgdp, addr) \
84 (((pud_t *) pgd_page(*(pgdp))) + \
85 (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
86
87#define pud_ERROR(e) \
88 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
diff --git a/include/asm-ppc64/pgtable-64k.h b/include/asm-ppc64/pgtable-64k.h
new file mode 100644
index 000000000000..c5f437c86b3c
--- /dev/null
+++ b/include/asm-ppc64/pgtable-64k.h
@@ -0,0 +1,87 @@
1#include <asm-generic/pgtable-nopud.h>
2
3
4#define PTE_INDEX_SIZE 12
5#define PMD_INDEX_SIZE 12
6#define PUD_INDEX_SIZE 0
7#define PGD_INDEX_SIZE 4
8
9#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
10#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
11#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
12
13#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
14#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
15#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
16
17/* PMD_SHIFT determines what a second-level page table entry can map */
18#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
19#define PMD_SIZE (1UL << PMD_SHIFT)
20#define PMD_MASK (~(PMD_SIZE-1))
21
22/* PGDIR_SHIFT determines what a third-level page table entry can map */
23#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
24#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
25#define PGDIR_MASK (~(PGDIR_SIZE-1))
26
27/* Additional PTE bits (don't change without checking asm in hash_low.S) */
28#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
29#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
30#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
31#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
32#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
33
34/* PTE flags to conserve for HPTE identification */
35#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\
36 _PAGE_COMBO)
37
38/* Shift to put page number into pte.
39 *
40 * That gives us a max RPN of 32 bits, which means a max of 48 bits
41 * of addressable physical space.
42 * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but
43 * 32 makes PTEs more readable for debugging for now :)
44 */
45#define PTE_RPN_SHIFT (32)
46#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT))
47#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
48
49/* _PAGE_CHG_MASK masks of bits that are to be preserved accross
50 * pgprot changes
51 */
52#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
53 _PAGE_ACCESSED)
54
55/* Bits to mask out from a PMD to get to the PTE page */
56#define PMD_MASKED_BITS 0x1ff
57/* Bits to mask out from a PGD/PUD to get to the PMD page */
58#define PUD_MASKED_BITS 0x1ff
59
60#ifndef __ASSEMBLY__
61
62/* Manipulate "rpte" values */
63#define __real_pte(e,p) ((real_pte_t) { \
64 (e), pte_val(*((p) + PTRS_PER_PTE)) })
65#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
66 (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
67#define __rpte_to_pte(r) ((r).pte)
68#define __rpte_sub_valid(rpte, index) \
69 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
70
71
72/* Trick: we set __end to va + 64k, which happens works for
73 * a 16M page as well as we want only one iteration
74 */
75#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
76 do { \
77 unsigned long __end = va + PAGE_SIZE; \
78 unsigned __split = (psize == MMU_PAGE_4K || \
79 psize == MMU_PAGE_64K_AP); \
80 shift = mmu_psize_defs[psize].shift; \
81 for (index = 0; va < __end; index++, va += (1 << shift)) { \
82 if (!__split || __rpte_sub_valid(rpte, index)) do { \
83
84#define pte_iterate_hashed_end() } while(0); } } while(0)
85
86
87#endif /* __ASSEMBLY__ */
diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
index 8c3f574046b6..fde93ec36abc 100644
--- a/include/asm-ppc64/pgtable.h
+++ b/include/asm-ppc64/pgtable.h
@@ -15,40 +15,11 @@
15#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#endif /* __ASSEMBLY__ */ 16#endif /* __ASSEMBLY__ */
17 17
18/* 18#ifdef CONFIG_PPC_64K_PAGES
19 * Entries per page directory level. The PTE level must use a 64b record 19#include <asm/pgtable-64k.h>
20 * for each page table entry. The PMD and PGD level use a 32b record for 20#else
21 * each entry by assuming that each entry is page aligned. 21#include <asm/pgtable-4k.h>
22 */ 22#endif
23#define PTE_INDEX_SIZE 9
24#define PMD_INDEX_SIZE 7
25#define PUD_INDEX_SIZE 7
26#define PGD_INDEX_SIZE 9
27
28#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
29#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
30#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
31#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
32
33#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
34#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
35#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
36#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
37
38/* PMD_SHIFT determines what a second-level page table entry can map */
39#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
40#define PMD_SIZE (1UL << PMD_SHIFT)
41#define PMD_MASK (~(PMD_SIZE-1))
42
43/* PUD_SHIFT determines what a third-level page table entry can map */
44#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
45#define PUD_SIZE (1UL << PUD_SHIFT)
46#define PUD_MASK (~(PUD_SIZE-1))
47
48/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
49#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
50#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
51#define PGDIR_MASK (~(PGDIR_SIZE-1))
52 23
53#define FIRST_USER_ADDRESS 0 24#define FIRST_USER_ADDRESS 0
54 25
@@ -75,8 +46,9 @@
75#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) 46#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
76 47
77/* 48/*
78 * Bits in a linux-style PTE. These match the bits in the 49 * Common bits in a linux-style PTE. These match the bits in the
79 * (hardware-defined) PowerPC PTE as closely as possible. 50 * (hardware-defined) PowerPC PTE as closely as possible. Additional
51 * bits may be defined in pgtable-*.h
80 */ 52 */
81#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */ 53#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
82#define _PAGE_USER 0x0002 /* matches one of the PP bits */ 54#define _PAGE_USER 0x0002 /* matches one of the PP bits */
@@ -91,15 +63,6 @@
91#define _PAGE_RW 0x0200 /* software: user write access allowed */ 63#define _PAGE_RW 0x0200 /* software: user write access allowed */
92#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */ 64#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
93#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */ 65#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
94#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
95#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
96#define _PAGE_HUGE 0x10000 /* 16MB page */
97/* Bits 0x7000 identify the index within an HPT Group */
98#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_SECONDARY | _PAGE_GROUP_IX)
99/* PAGE_MASK gives the right answer below, but only by accident */
100/* It should be preserving the high 48 bits and then specifically */
101/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
102#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HPTEFLAGS)
103 66
104#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT) 67#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)
105 68
@@ -122,10 +85,10 @@
122#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE) 85#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE)
123#define HAVE_PAGE_AGP 86#define HAVE_PAGE_AGP
124 87
125/* 88/* PTEIDX nibble */
126 * This bit in a hardware PTE indicates that the page is *not* executable. 89#define _PTEIDX_SECONDARY 0x8
127 */ 90#define _PTEIDX_GROUP_IX 0x7
128#define HW_NO_EXEC _PAGE_EXEC 91
129 92
130/* 93/*
131 * POWER4 and newer have per page execute protection, older chips can only 94 * POWER4 and newer have per page execute protection, older chips can only
@@ -164,21 +127,10 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
164#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 127#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
165#endif /* __ASSEMBLY__ */ 128#endif /* __ASSEMBLY__ */
166 129
167/* shift to put page number into pte */
168#define PTE_SHIFT (17)
169
170#ifdef CONFIG_HUGETLB_PAGE 130#ifdef CONFIG_HUGETLB_PAGE
171 131
172#ifndef __ASSEMBLY__
173int hash_huge_page(struct mm_struct *mm, unsigned long access,
174 unsigned long ea, unsigned long vsid, int local);
175#endif /* __ASSEMBLY__ */
176
177#define HAVE_ARCH_UNMAPPED_AREA 132#define HAVE_ARCH_UNMAPPED_AREA
178#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN 133#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
179#else
180
181#define hash_huge_page(mm,a,ea,vsid,local) -1
182 134
183#endif 135#endif
184 136
@@ -197,7 +149,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
197 pte_t pte; 149 pte_t pte;
198 150
199 151
200 pte_val(pte) = (pfn << PTE_SHIFT) | pgprot_val(pgprot); 152 pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot);
201 return pte; 153 return pte;
202} 154}
203 155
@@ -209,30 +161,25 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
209 161
210/* pte_clear moved to later in this file */ 162/* pte_clear moved to later in this file */
211 163
212#define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) 164#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT)))
213#define pte_page(x) pfn_to_page(pte_pfn(x)) 165#define pte_page(x) pfn_to_page(pte_pfn(x))
214 166
215#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) 167#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
216#define pmd_none(pmd) (!pmd_val(pmd)) 168#define pmd_none(pmd) (!pmd_val(pmd))
217#define pmd_bad(pmd) (pmd_val(pmd) == 0) 169#define pmd_bad(pmd) (pmd_val(pmd) == 0)
218#define pmd_present(pmd) (pmd_val(pmd) != 0) 170#define pmd_present(pmd) (pmd_val(pmd) != 0)
219#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) 171#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
220#define pmd_page_kernel(pmd) (pmd_val(pmd)) 172#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
221#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) 173#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
222 174
223#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) 175#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
224#define pud_none(pud) (!pud_val(pud)) 176#define pud_none(pud) (!pud_val(pud))
225#define pud_bad(pud) ((pud_val(pud)) == 0) 177#define pud_bad(pud) ((pud_val(pud)) == 0)
226#define pud_present(pud) (pud_val(pud) != 0) 178#define pud_present(pud) (pud_val(pud) != 0)
227#define pud_clear(pudp) (pud_val(*(pudp)) = 0) 179#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
228#define pud_page(pud) (pud_val(pud)) 180#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
229 181
230#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) 182#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
231#define pgd_none(pgd) (!pgd_val(pgd))
232#define pgd_bad(pgd) (pgd_val(pgd) == 0)
233#define pgd_present(pgd) (pgd_val(pgd) != 0)
234#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
235#define pgd_page(pgd) (pgd_val(pgd))
236 183
237/* 184/*
238 * Find an entry in a page-table-directory. We combine the address region 185 * Find an entry in a page-table-directory. We combine the address region
@@ -243,9 +190,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
243 190
244#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) 191#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
245 192
246#define pud_offset(pgdp, addr) \
247 (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
248
249#define pmd_offset(pudp,addr) \ 193#define pmd_offset(pudp,addr) \
250 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) 194 (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
251 195
@@ -271,7 +215,6 @@ static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;}
271static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;} 215static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
272static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;} 216static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
273static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;} 217static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
274static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE;}
275 218
276static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; } 219static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
277static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; } 220static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
@@ -286,7 +229,6 @@ static inline pte_t pte_mkclean(pte_t pte) {
286 pte_val(pte) &= ~(_PAGE_DIRTY); return pte; } 229 pte_val(pte) &= ~(_PAGE_DIRTY); return pte; }
287static inline pte_t pte_mkold(pte_t pte) { 230static inline pte_t pte_mkold(pte_t pte) {
288 pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } 231 pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
289
290static inline pte_t pte_mkread(pte_t pte) { 232static inline pte_t pte_mkread(pte_t pte) {
291 pte_val(pte) |= _PAGE_USER; return pte; } 233 pte_val(pte) |= _PAGE_USER; return pte; }
292static inline pte_t pte_mkexec(pte_t pte) { 234static inline pte_t pte_mkexec(pte_t pte) {
@@ -298,7 +240,7 @@ static inline pte_t pte_mkdirty(pte_t pte) {
298static inline pte_t pte_mkyoung(pte_t pte) { 240static inline pte_t pte_mkyoung(pte_t pte) {
299 pte_val(pte) |= _PAGE_ACCESSED; return pte; } 241 pte_val(pte) |= _PAGE_ACCESSED; return pte; }
300static inline pte_t pte_mkhuge(pte_t pte) { 242static inline pte_t pte_mkhuge(pte_t pte) {
301 pte_val(pte) |= _PAGE_HUGE; return pte; } 243 return pte; }
302 244
303/* Atomic PTE updates */ 245/* Atomic PTE updates */
304static inline unsigned long pte_update(pte_t *p, unsigned long clr) 246static inline unsigned long pte_update(pte_t *p, unsigned long clr)
@@ -321,11 +263,13 @@ static inline unsigned long pte_update(pte_t *p, unsigned long clr)
321/* PTE updating functions, this function puts the PTE in the 263/* PTE updating functions, this function puts the PTE in the
322 * batch, doesn't actually triggers the hash flush immediately, 264 * batch, doesn't actually triggers the hash flush immediately,
323 * you need to call flush_tlb_pending() to do that. 265 * you need to call flush_tlb_pending() to do that.
266 * Pass -1 for "normal" size (4K or 64K)
324 */ 267 */
325extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte, 268extern void hpte_update(struct mm_struct *mm, unsigned long addr,
326 int wrprot); 269 pte_t *ptep, unsigned long pte, int huge);
327 270
328static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 271static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
272 unsigned long addr, pte_t *ptep)
329{ 273{
330 unsigned long old; 274 unsigned long old;
331 275
@@ -333,7 +277,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
333 return 0; 277 return 0;
334 old = pte_update(ptep, _PAGE_ACCESSED); 278 old = pte_update(ptep, _PAGE_ACCESSED);
335 if (old & _PAGE_HASHPTE) { 279 if (old & _PAGE_HASHPTE) {
336 hpte_update(mm, addr, old, 0); 280 hpte_update(mm, addr, ptep, old, 0);
337 flush_tlb_pending(); 281 flush_tlb_pending();
338 } 282 }
339 return (old & _PAGE_ACCESSED) != 0; 283 return (old & _PAGE_ACCESSED) != 0;
@@ -351,7 +295,8 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned lon
351 * moment we always flush but we need to fix hpte_update and test if the 295 * moment we always flush but we need to fix hpte_update and test if the
352 * optimisation is worth it. 296 * optimisation is worth it.
353 */ 297 */
354static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 298static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm,
299 unsigned long addr, pte_t *ptep)
355{ 300{
356 unsigned long old; 301 unsigned long old;
357 302
@@ -359,7 +304,7 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
359 return 0; 304 return 0;
360 old = pte_update(ptep, _PAGE_DIRTY); 305 old = pte_update(ptep, _PAGE_DIRTY);
361 if (old & _PAGE_HASHPTE) 306 if (old & _PAGE_HASHPTE)
362 hpte_update(mm, addr, old, 0); 307 hpte_update(mm, addr, ptep, old, 0);
363 return (old & _PAGE_DIRTY) != 0; 308 return (old & _PAGE_DIRTY) != 0;
364} 309}
365#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY 310#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
@@ -371,7 +316,8 @@ static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned lon
371}) 316})
372 317
373#define __HAVE_ARCH_PTEP_SET_WRPROTECT 318#define __HAVE_ARCH_PTEP_SET_WRPROTECT
374static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 319static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
320 pte_t *ptep)
375{ 321{
376 unsigned long old; 322 unsigned long old;
377 323
@@ -379,7 +325,7 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
379 return; 325 return;
380 old = pte_update(ptep, _PAGE_RW); 326 old = pte_update(ptep, _PAGE_RW);
381 if (old & _PAGE_HASHPTE) 327 if (old & _PAGE_HASHPTE)
382 hpte_update(mm, addr, old, 0); 328 hpte_update(mm, addr, ptep, old, 0);
383} 329}
384 330
385/* 331/*
@@ -408,21 +354,23 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
408}) 354})
409 355
410#define __HAVE_ARCH_PTEP_GET_AND_CLEAR 356#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
411static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) 357static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
358 unsigned long addr, pte_t *ptep)
412{ 359{
413 unsigned long old = pte_update(ptep, ~0UL); 360 unsigned long old = pte_update(ptep, ~0UL);
414 361
415 if (old & _PAGE_HASHPTE) 362 if (old & _PAGE_HASHPTE)
416 hpte_update(mm, addr, old, 0); 363 hpte_update(mm, addr, ptep, old, 0);
417 return __pte(old); 364 return __pte(old);
418} 365}
419 366
420static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t * ptep) 367static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
368 pte_t * ptep)
421{ 369{
422 unsigned long old = pte_update(ptep, ~0UL); 370 unsigned long old = pte_update(ptep, ~0UL);
423 371
424 if (old & _PAGE_HASHPTE) 372 if (old & _PAGE_HASHPTE)
425 hpte_update(mm, addr, old, 0); 373 hpte_update(mm, addr, ptep, old, 0);
426} 374}
427 375
428/* 376/*
@@ -435,7 +383,14 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
435 pte_clear(mm, addr, ptep); 383 pte_clear(mm, addr, ptep);
436 flush_tlb_pending(); 384 flush_tlb_pending();
437 } 385 }
438 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); 386 pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
387
388#ifdef CONFIG_PPC_64K_PAGES
389 if (mmu_virtual_psize != MMU_PAGE_64K)
390 pte = __pte(pte_val(pte) | _PAGE_COMBO);
391#endif /* CONFIG_PPC_64K_PAGES */
392
393 *ptep = pte;
439} 394}
440 395
441/* Set the dirty and/or accessed bits atomically in a linux PTE, this 396/* Set the dirty and/or accessed bits atomically in a linux PTE, this
@@ -482,8 +437,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
482 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) 437 printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
483#define pmd_ERROR(e) \ 438#define pmd_ERROR(e) \
484 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) 439 printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
485#define pud_ERROR(e) \
486 printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
487#define pgd_ERROR(e) \ 440#define pgd_ERROR(e) \
488 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 441 printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
489 442
@@ -509,12 +462,12 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
509/* Encode and de-code a swap entry */ 462/* Encode and de-code a swap entry */
510#define __swp_type(entry) (((entry).val >> 1) & 0x3f) 463#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
511#define __swp_offset(entry) ((entry).val >> 8) 464#define __swp_offset(entry) ((entry).val >> 8)
512#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) 465#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
513#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> PTE_SHIFT }) 466#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
514#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_SHIFT }) 467#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
515#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT) 468#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
516#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_SHIFT)|_PAGE_FILE}) 469#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
517#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT) 470#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
518 471
519/* 472/*
520 * kern_addr_valid is intended to indicate whether an address is a valid 473 * kern_addr_valid is intended to indicate whether an address is a valid
@@ -532,29 +485,22 @@ void pgtable_cache_init(void);
532/* 485/*
533 * find_linux_pte returns the address of a linux pte for a given 486 * find_linux_pte returns the address of a linux pte for a given
534 * effective address and directory. If not found, it returns zero. 487 * effective address and directory. If not found, it returns zero.
535 */ 488 */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
536static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
537{ 489{
538 pgd_t *pg; 490 pgd_t *pg;
539 pud_t *pu; 491 pud_t *pu;
540 pmd_t *pm; 492 pmd_t *pm;
541 pte_t *pt = NULL; 493 pte_t *pt = NULL;
542 pte_t pte;
543 494
544 pg = pgdir + pgd_index(ea); 495 pg = pgdir + pgd_index(ea);
545 if (!pgd_none(*pg)) { 496 if (!pgd_none(*pg)) {
546 pu = pud_offset(pg, ea); 497 pu = pud_offset(pg, ea);
547 if (!pud_none(*pu)) { 498 if (!pud_none(*pu)) {
548 pm = pmd_offset(pu, ea); 499 pm = pmd_offset(pu, ea);
549 if (pmd_present(*pm)) { 500 if (pmd_present(*pm))
550 pt = pte_offset_kernel(pm, ea); 501 pt = pte_offset_kernel(pm, ea);
551 pte = *pt;
552 if (!pte_present(pte))
553 pt = NULL;
554 }
555 } 502 }
556 } 503 }
557
558 return pt; 504 return pt;
559} 505}
560 506
diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h
index e8d0d2ab4c0f..bdb47174ff0e 100644
--- a/include/asm-ppc64/prom.h
+++ b/include/asm-ppc64/prom.h
@@ -188,6 +188,14 @@ extern struct device_node *of_get_next_child(const struct device_node *node,
188extern struct device_node *of_node_get(struct device_node *node); 188extern struct device_node *of_node_get(struct device_node *node);
189extern void of_node_put(struct device_node *node); 189extern void of_node_put(struct device_node *node);
190 190
191/* For scanning the flat device-tree at boot time */
192int __init of_scan_flat_dt(int (*it)(unsigned long node,
193 const char *uname, int depth,
194 void *data),
195 void *data);
196void* __init of_get_flat_dt_prop(unsigned long node, const char *name,
197 unsigned long *size);
198
191/* For updating the device tree at runtime */ 199/* For updating the device tree at runtime */
192extern void of_attach_node(struct device_node *); 200extern void of_attach_node(struct device_node *);
193extern void of_detach_node(const struct device_node *); 201extern void of_detach_node(const struct device_node *);
diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h
index 99b8ca52f101..0cdd66c9f4b7 100644
--- a/include/asm-ppc64/system.h
+++ b/include/asm-ppc64/system.h
@@ -248,7 +248,7 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
248} 248}
249 249
250static __inline__ unsigned long 250static __inline__ unsigned long
251__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) 251__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
252{ 252{
253 unsigned long prev; 253 unsigned long prev;
254 254