aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-04-17 11:40:45 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-17 11:40:45 -0400
commitfe770bf0310d90b3b033c19044d45b7de5f2041c (patch)
tree5d4ca3147f569020201095189e0c43543d1d7d09
parent926e5392ba8a388ae32ca0d2714cc2c73945c609 (diff)
x86: clean up the page table dumper and add 32-bit support
Clean up the page table dumper (fix boundary conditions, table driven address ranges, some formatting changes since it is no longer using the kernel log but a separate virtual file), and generalize to 32 bits. [ mingo@elte.hu: x86: fix the pagetable dumper ] Signed-off-by: H. Peter Anvin <hpa@zytor.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r--arch/x86/Kconfig.debug2
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/dump_pagetables.c301
3 files changed, 179 insertions, 126 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index cb7002eca88..7ce8e702566 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -56,7 +56,7 @@ config DEBUG_PER_CPU_MAPS
56 56
57config X86_PTDUMP 57config X86_PTDUMP
58 bool "Export kernel pagetable layout to userspace via debugfs" 58 bool "Export kernel pagetable layout to userspace via debugfs"
59 depends on X86_64 59 depends on DEBUG_KERNEL
60 select DEBUG_FS 60 select DEBUG_FS
61 help 61 help
62 Say Y here if you want to show the kernel pagetable layout in a 62 Say Y here if you want to show the kernel pagetable layout in a
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 28632f42ca6..9ab9889863f 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -3,6 +3,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o
3obj-$(CONFIG_X86_32) += pgtable_32.o 3obj-$(CONFIG_X86_32) += pgtable_32.o
4 4
5obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 5obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
6obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
6 7
7obj-$(CONFIG_HIGHMEM) += highmem_32.o 8obj-$(CONFIG_HIGHMEM) += highmem_32.o
8 9
@@ -12,5 +13,4 @@ else
12obj-$(CONFIG_NUMA) += numa_64.o 13obj-$(CONFIG_NUMA) += numa_64.o
13obj-$(CONFIG_K8_NUMA) += k8topology_64.o 14obj-$(CONFIG_K8_NUMA) += k8topology_64.o
14obj-$(CONFIG_ACPI_NUMA) += srat_64.o 15obj-$(CONFIG_ACPI_NUMA) += srat_64.o
15obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
16endif 16endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 5e7f6430c27..6d840338f80 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -12,9 +12,10 @@
12 * of the License. 12 * of the License.
13 */ 13 */
14 14
15#include <linux/debugfs.h>
16#include <linux/mm.h>
15#include <linux/module.h> 17#include <linux/module.h>
16#include <linux/seq_file.h> 18#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18 19
19#include <asm/pgtable.h> 20#include <asm/pgtable.h>
20 21
@@ -28,73 +29,107 @@ struct pg_state {
28 pgprot_t current_prot; 29 pgprot_t current_prot;
29 unsigned long start_address; 30 unsigned long start_address;
30 unsigned long current_address; 31 unsigned long current_address;
31 int printed_vmalloc; 32 const struct addr_marker *marker;
32 int printed_modules;
33 int printed_vmemmap;
34 int printed_highmap;
35}; 33};
36 34
37/* Multipliers for offsets within the PTEs */ 35struct addr_marker {
38#define LEVEL_4_MULT (PAGE_SIZE) 36 unsigned long start_address;
39#define LEVEL_3_MULT (512UL * LEVEL_4_MULT) 37 const char *name;
40#define LEVEL_2_MULT (512UL * LEVEL_3_MULT) 38};
41#define LEVEL_1_MULT (512UL * LEVEL_2_MULT) 39
40/* Address space markers hints */
41static struct addr_marker address_markers[] = {
42 { 0, "User Space" },
43#ifdef CONFIG_X86_64
44 { 0x8000000000000000UL, "Kernel Space" },
45 { 0xffff810000000000UL, "Low Kernel Mapping" },
46 { VMALLOC_START, "vmalloc() Area" },
47 { MODULES_VADDR, "Modules" },
48 { MODULES_END, "End Modules" },
49 { VMEMMAP_START, "Vmemmap" },
50 { __START_KERNEL_map, "High Kernel Mapping" },
51#else
52 { PAGE_OFFSET, "Kernel Mapping" },
53 { 0/* VMALLOC_START */, "vmalloc() Area" },
54 { 0/*VMALLOC_END*/, "vmalloc() End" },
55# ifdef CONFIG_HIGHMEM
56 { 0/*PKMAP_BASE*/, "Persisent kmap() Area" },
57# endif
58 { 0/*FIXADDR_START*/, "Fixmap Area" },
59#endif
60 { -1, NULL } /* End of list */
61};
42 62
63/* Multipliers for offsets within the PTEs */
64#define PTE_LEVEL_MULT (PAGE_SIZE)
65#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
66#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
67#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
43 68
44/* 69/*
45 * Print a readable form of a pgprot_t to the seq_file 70 * Print a readable form of a pgprot_t to the seq_file
46 */ 71 */
47static void printk_prot(struct seq_file *m, pgprot_t prot, int level) 72static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
48{ 73{
49 unsigned long pr = pgprot_val(prot); 74 pgprotval_t pr = pgprot_val(prot);
50 75 static const char * const level_name[] =
51 if (pr & _PAGE_USER) 76 { "cr3", "pgd", "pud", "pmd", "pte" };
52 seq_printf(m, "USR "); 77
53 else 78 if (!pgprot_val(prot)) {
54 seq_printf(m, " "); 79 /* Not present */
55 if (pr & _PAGE_RW) 80 seq_printf(m, " ");
56 seq_printf(m, "RW "); 81 } else {
57 else 82 if (pr & _PAGE_USER)
58 seq_printf(m, "ro "); 83 seq_printf(m, "USR ");
59 if (pr & _PAGE_PWT)
60 seq_printf(m, "PWT ");
61 else
62 seq_printf(m, " ");
63 if (pr & _PAGE_PCD)
64 seq_printf(m, "PCD ");
65 else
66 seq_printf(m, " ");
67
68 /* Bit 9 has a different meaning on level 3 vs 4 */
69 if (level <= 3) {
70 if (pr & _PAGE_PSE)
71 seq_printf(m, "PSE ");
72 else 84 else
73 seq_printf(m, " "); 85 seq_printf(m, " ");
74 } else { 86 if (pr & _PAGE_RW)
75 if (pr & _PAGE_PAT) 87 seq_printf(m, "RW ");
76 seq_printf(m, "pat "); 88 else
89 seq_printf(m, "ro ");
90 if (pr & _PAGE_PWT)
91 seq_printf(m, "PWT ");
92 else
93 seq_printf(m, " ");
94 if (pr & _PAGE_PCD)
95 seq_printf(m, "PCD ");
77 else 96 else
78 seq_printf(m, " "); 97 seq_printf(m, " ");
98
99 /* Bit 9 has a different meaning on level 3 vs 4 */
100 if (level <= 3) {
101 if (pr & _PAGE_PSE)
102 seq_printf(m, "PSE ");
103 else
104 seq_printf(m, " ");
105 } else {
106 if (pr & _PAGE_PAT)
107 seq_printf(m, "pat ");
108 else
109 seq_printf(m, " ");
110 }
111 if (pr & _PAGE_GLOBAL)
112 seq_printf(m, "GLB ");
113 else
114 seq_printf(m, " ");
115 if (pr & _PAGE_NX)
116 seq_printf(m, "NX ");
117 else
118 seq_printf(m, "x ");
79 } 119 }
80 if (pr & _PAGE_GLOBAL) 120 seq_printf(m, "%s\n", level_name[level]);
81 seq_printf(m, "GLB ");
82 else
83 seq_printf(m, " ");
84 if (pr & _PAGE_NX)
85 seq_printf(m, "NX ");
86 else
87 seq_printf(m, "x ");
88} 121}
89 122
90/* 123/*
91 * Sign-extend the 48 bit address to 64 bit 124 * On 64 bits, sign-extend the 48 bit address to 64 bit
92 */ 125 */
93static unsigned long sign_extend(unsigned long u) 126static unsigned long normalize_addr(unsigned long u)
94{ 127{
95 if (u>>47) 128#ifdef CONFIG_X86_64
96 u = u | (0xffffUL << 48); 129 return (signed long)(u << 16) >> 16;
130#else
97 return u; 131 return u;
132#endif
98} 133}
99 134
100/* 135/*
@@ -103,81 +138,62 @@ static unsigned long sign_extend(unsigned long u)
103 * print what we collected so far. 138 * print what we collected so far.
104 */ 139 */
105static void note_page(struct seq_file *m, struct pg_state *st, 140static void note_page(struct seq_file *m, struct pg_state *st,
106 pgprot_t new_prot, int level) 141 pgprot_t new_prot, int level)
107{ 142{
108 unsigned long prot, cur; 143 pgprotval_t prot, cur;
144 static const char units[] = "KMGTPE";
109 145
110 /* 146 /*
111 * If we have a "break" in the series, we need to flush the state that 147 * If we have a "break" in the series, we need to flush the state that
112 * we have now. "break" is either changing perms or a different level. 148 * we have now. "break" is either changing perms, levels or
149 * address space marker.
113 */ 150 */
114 prot = pgprot_val(new_prot) & ~(PTE_MASK); 151 prot = pgprot_val(new_prot) & ~(PTE_MASK);
115 cur = pgprot_val(st->current_prot) & ~(PTE_MASK); 152 cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
116 153
117 if ((prot != cur || level != st->level) && 154 if (!st->level) {
118 st->current_address != st->start_address) { 155 /* First entry */
119 char unit = 'K'; 156 st->current_prot = new_prot;
157 st->level = level;
158 st->marker = address_markers;
159 seq_printf(m, "---[ %s ]---\n", st->marker->name);
160 } else if (prot != cur || level != st->level ||
161 st->current_address >= st->marker[1].start_address) {
162 const char *unit = units;
120 unsigned long delta; 163 unsigned long delta;
121 164
122 /* 165 /*
123 * We print markers for special areas of address space,
124 * such as the start of vmalloc space etc.
125 * This helps in the interpretation.
126 */
127 if (!st->printed_vmalloc &&
128 st->start_address >= VMALLOC_START) {
129 seq_printf(m, "---[ VMALLOC SPACE ]---\n");
130 st->printed_vmalloc = 1;
131 }
132 if (!st->printed_modules &&
133 st->start_address >= MODULES_VADDR) {
134 seq_printf(m, "---[ MODULES SPACE ]---\n");
135 st->printed_modules = 1;
136 }
137 if (st->printed_modules < 2 &&
138 st->start_address >= MODULES_END) {
139 seq_printf(m, "---[ END MODULES SPACE ]---\n");
140 st->printed_modules = 2;
141 }
142 if (!st->printed_vmemmap &&
143 st->start_address >= VMEMMAP_START) {
144 seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
145 st->printed_vmemmap = 1;
146 }
147 if (!st->printed_highmap &&
148 st->start_address >= __START_KERNEL_map) {
149 seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
150 st->printed_highmap = 1;
151 }
152
153 /*
154 * Now print the actual finished series 166 * Now print the actual finished series
155 */ 167 */
156 seq_printf(m, "[ %016lx - %016lx ", 168 seq_printf(m, "0x%p-0x%p ",
157 st->start_address, st->current_address); 169 (void *)st->start_address,
170 (void *)st->current_address);
158 171
159 delta = (st->current_address - st->start_address) >> 10; 172 delta = (st->current_address - st->start_address) >> 10;
160 if ((delta & 1023) == 0) { 173 while (!(delta & 1023) && unit[1]) {
161 delta = delta >> 10; 174 delta >>= 10;
162 unit = 'M'; 175 unit++;
163 } 176 }
164 if (pgprot_val(st->current_prot)) { 177 seq_printf(m, "%9lu%c ", delta, *unit);
165 seq_printf(m, "Size %9lu%cb ", delta, unit); 178 printk_prot(m, st->current_prot, st->level);
166 printk_prot(m, st->current_prot, st->level); 179
167 seq_printf(m, "L%i]\n", st->level); 180 /*
168 } else { 181 * We print markers for special areas of address space,
169 /* don't print protections on non-present memory */ 182 * such as the start of vmalloc space etc.
170 seq_printf(m, "%14lu%cb", delta, unit); 183 * This helps in the interpretation.
171 seq_printf(m, " L%i]\n", 184 */
172 st->level); 185 if (st->current_address >= st->marker[1].start_address) {
186 st->marker++;
187 seq_printf(m, "---[ %s ]---\n", st->marker->name);
173 } 188 }
189
174 st->start_address = st->current_address; 190 st->start_address = st->current_address;
175 st->current_prot = new_prot; 191 st->current_prot = new_prot;
176 st->level = level; 192 st->level = level;
177 }; 193 }
178} 194}
179 195
180static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr, 196static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
181 unsigned long P) 197 unsigned long P)
182{ 198{
183 int i; 199 int i;
@@ -187,14 +203,15 @@ static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
187 for (i = 0; i < PTRS_PER_PTE; i++) { 203 for (i = 0; i < PTRS_PER_PTE; i++) {
188 pgprot_t prot = pte_pgprot(*start); 204 pgprot_t prot = pte_pgprot(*start);
189 205
190 st->current_address = sign_extend(P + i * LEVEL_4_MULT); 206 st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
191 note_page(m, st, prot, 4); 207 note_page(m, st, prot, 4);
192 start++; 208 start++;
193 } 209 }
194} 210}
195 211
212#if PTRS_PER_PMD > 1
196 213
197static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr, 214static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
198 unsigned long P) 215 unsigned long P)
199{ 216{
200 int i; 217 int i;
@@ -202,25 +219,30 @@ static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
202 219
203 start = (pmd_t *) pud_page_vaddr(addr); 220 start = (pmd_t *) pud_page_vaddr(addr);
204 for (i = 0; i < PTRS_PER_PMD; i++) { 221 for (i = 0; i < PTRS_PER_PMD; i++) {
205 st->current_address = sign_extend(P + i * LEVEL_3_MULT); 222 st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
206 if (!pmd_none(*start)) { 223 if (!pmd_none(*start)) {
207 unsigned long prot; 224 pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
208 225
209 prot = pmd_val(*start) & ~(PTE_MASK); 226 if (pmd_large(*start) || !pmd_present(*start))
210 /* Deal with 2Mb pages */
211 if (pmd_large(*start))
212 note_page(m, st, __pgprot(prot), 3); 227 note_page(m, st, __pgprot(prot), 3);
213 else 228 else
214 walk_level_4(m, st, *start, 229 walk_pte_level(m, st, *start,
215 P + i * LEVEL_3_MULT); 230 P + i * PMD_LEVEL_MULT);
216 } else 231 } else
217 note_page(m, st, __pgprot(0), 3); 232 note_page(m, st, __pgprot(0), 3);
218 start++; 233 start++;
219 } 234 }
220} 235}
221 236
237#else
238#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
239#define pud_large(a) pmd_large(__pmd(pud_val(a)))
240#define pud_none(a) pmd_none(__pmd(pud_val(a)))
241#endif
222 242
223static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr, 243#if PTRS_PER_PUD > 1
244
245static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
224 unsigned long P) 246 unsigned long P)
225{ 247{
226 int i; 248 int i;
@@ -229,16 +251,15 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
229 start = (pud_t *) pgd_page_vaddr(addr); 251 start = (pud_t *) pgd_page_vaddr(addr);
230 252
231 for (i = 0; i < PTRS_PER_PUD; i++) { 253 for (i = 0; i < PTRS_PER_PUD; i++) {
254 st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
232 if (!pud_none(*start)) { 255 if (!pud_none(*start)) {
233 unsigned long prot; 256 pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
234 257
235 prot = pud_val(*start) & ~(PTE_MASK); 258 if (pud_large(*start) || !pud_present(*start))
236 /* Deal with 1Gb pages */
237 if (pud_large(*start))
238 note_page(m, st, __pgprot(prot), 2); 259 note_page(m, st, __pgprot(prot), 2);
239 else 260 else
240 walk_level_3(m, st, *start, 261 walk_pmd_level(m, st, *start,
241 P + i * LEVEL_2_MULT); 262 P + i * PUD_LEVEL_MULT);
242 } else 263 } else
243 note_page(m, st, __pgprot(0), 2); 264 note_page(m, st, __pgprot(0), 2);
244 265
@@ -246,28 +267,48 @@ static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
246 } 267 }
247} 268}
248 269
249static void walk_level_1(struct seq_file *m) 270#else
271#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
272#define pgd_large(a) pud_large(__pud(pgd_val(a)))
273#define pgd_none(a) pud_none(__pud(pgd_val(a)))
274#endif
275
276static void walk_pgd_level(struct seq_file *m)
250{ 277{
278#ifdef CONFIG_X86_64
251 pgd_t *start = (pgd_t *) &init_level4_pgt; 279 pgd_t *start = (pgd_t *) &init_level4_pgt;
280#else
281 pgd_t *start = swapper_pg_dir;
282#endif
252 int i; 283 int i;
253 struct pg_state st; 284 struct pg_state st;
254 285
255 memset(&st, 0, sizeof(st)); 286 memset(&st, 0, sizeof(st));
256 st.level = 1;
257 287
258 for (i = 0; i < PTRS_PER_PGD; i++) { 288 for (i = 0; i < PTRS_PER_PGD; i++) {
259 if (!pgd_none(*start)) 289 st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
260 walk_level_2(m, &st, *start, i * LEVEL_1_MULT); 290 if (!pgd_none(*start)) {
261 else 291 pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
292
293 if (pgd_large(*start) || !pgd_present(*start))
294 note_page(m, &st, __pgprot(prot), 1);
295 else
296 walk_pud_level(m, &st, *start,
297 i * PGD_LEVEL_MULT);
298 } else
262 note_page(m, &st, __pgprot(0), 1); 299 note_page(m, &st, __pgprot(0), 1);
300
263 start++; 301 start++;
264 } 302 }
303
304 /* Flush out the last page */
305 st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
306 note_page(m, &st, __pgprot(0), 0);
265} 307}
266 308
267static int ptdump_show(struct seq_file *m, void *v) 309static int ptdump_show(struct seq_file *m, void *v)
268{ 310{
269 seq_puts(m, "Kernel pagetable dump\n"); 311 walk_pgd_level(m);
270 walk_level_1(m);
271 return 0; 312 return 0;
272} 313}
273 314
@@ -287,6 +328,18 @@ int pt_dump_init(void)
287{ 328{
288 struct dentry *pe; 329 struct dentry *pe;
289 330
331#ifdef CONFIG_X86_32
332 /* Not a compile-time constant on x86-32 */
333 address_markers[2].start_address = VMALLOC_START;
334 address_markers[3].start_address = VMALLOC_END;
335# ifdef CONFIG_HIGHMEM
336 address_markers[4].start_address = PKMAP_BASE;
337 address_markers[5].start_address = FIXADDR_START;
338# else
339 address_markers[4].start_address = FIXADDR_START;
340# endif
341#endif
342
290 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, 343 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
291 &ptdump_fops); 344 &ptdump_fops);
292 if (!pe) 345 if (!pe)