aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2008-04-17 11:40:45 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-17 11:40:45 -0400
commit926e5392ba8a388ae32ca0d2714cc2c73945c609 (patch)
tree2718b50b8b66a3614f47d3246b080ee8511b299e /arch
parent2596e0fae094be9354b29ddb17e6326a18012e8c (diff)
x86: add code to dump the (kernel) page tables for visual inspection by kernel developers
This patch adds code to the kernel to have an (optional) /proc/kernel_page_tables debug file that basically dumps the kernel pagetables; this allows us kernel developers to verify that nothing fishy is going on and that the various mappings are set up correctly. This was quite useful in finding various change_page_attr() bugs, and is very likely to be useful in the future as well. Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> Cc: mingo@elte.hu Cc: tglx@tglx.de Cc: hpa@zytor.com Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/Kconfig.debug12
-rw-r--r--arch/x86/mm/Makefile1
-rw-r--r--arch/x86/mm/dump_pagetables.c301
3 files changed, 314 insertions, 0 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 702eb39901ca..cb7002eca887 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -54,6 +54,18 @@ config DEBUG_PER_CPU_MAPS
54 54
55 Say N if unsure. 55 Say N if unsure.
56 56
57config X86_PTDUMP
58 bool "Export kernel pagetable layout to userspace via debugfs"
59 depends on X86_64
60 select DEBUG_FS
61 help
62 Say Y here if you want to show the kernel pagetable layout in a
63 debugfs file. This information is only useful for kernel developers
64 who are working in architecture specific areas of the kernel.
65 It is probably not a good idea to enable this feature in a production
66 kernel.
67 If in doubt, say "N"
68
57config DEBUG_RODATA 69config DEBUG_RODATA
58 bool "Write protect kernel read-only data structures" 70 bool "Write protect kernel read-only data structures"
59 default y 71 default y
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 8e81660604bc..28632f42ca66 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -12,4 +12,5 @@ else
12obj-$(CONFIG_NUMA) += numa_64.o 12obj-$(CONFIG_NUMA) += numa_64.o
13obj-$(CONFIG_K8_NUMA) += k8topology_64.o 13obj-$(CONFIG_K8_NUMA) += k8topology_64.o
14obj-$(CONFIG_ACPI_NUMA) += srat_64.o 14obj-$(CONFIG_ACPI_NUMA) += srat_64.o
15obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o
15endif 16endif
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
new file mode 100644
index 000000000000..5e7f6430c27e
--- /dev/null
+++ b/arch/x86/mm/dump_pagetables.c
@@ -0,0 +1,301 @@
1/*
2 * Debug helper to dump the current kernel pagetables of the system
3 * so that we can see what the various memory ranges are set to.
4 *
5 * (C) Copyright 2008 Intel Corporation
6 *
7 * Author: Arjan van de Ven <arjan@linux.intel.com>
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; version 2
12 * of the License.
13 */
14
15#include <linux/module.h>
16#include <linux/seq_file.h>
17#include <linux/debugfs.h>
18
19#include <asm/pgtable.h>
20
21/*
22 * The dumper groups pagetable entries of the same type into one, and for
23 * that it needs to keep some state when walking, and flush this state
24 * when a "break" in the continuity is found.
25 */
26struct pg_state {
27 int level;
28 pgprot_t current_prot;
29 unsigned long start_address;
30 unsigned long current_address;
31 int printed_vmalloc;
32 int printed_modules;
33 int printed_vmemmap;
34 int printed_highmap;
35};
36
37/* Multipliers for offsets within the PTEs */
38#define LEVEL_4_MULT (PAGE_SIZE)
39#define LEVEL_3_MULT (512UL * LEVEL_4_MULT)
40#define LEVEL_2_MULT (512UL * LEVEL_3_MULT)
41#define LEVEL_1_MULT (512UL * LEVEL_2_MULT)
42
43
44/*
45 * Print a readable form of a pgprot_t to the seq_file
46 */
47static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
48{
49 unsigned long pr = pgprot_val(prot);
50
51 if (pr & _PAGE_USER)
52 seq_printf(m, "USR ");
53 else
54 seq_printf(m, " ");
55 if (pr & _PAGE_RW)
56 seq_printf(m, "RW ");
57 else
58 seq_printf(m, "ro ");
59 if (pr & _PAGE_PWT)
60 seq_printf(m, "PWT ");
61 else
62 seq_printf(m, " ");
63 if (pr & _PAGE_PCD)
64 seq_printf(m, "PCD ");
65 else
66 seq_printf(m, " ");
67
68 /* Bit 9 has a different meaning on level 3 vs 4 */
69 if (level <= 3) {
70 if (pr & _PAGE_PSE)
71 seq_printf(m, "PSE ");
72 else
73 seq_printf(m, " ");
74 } else {
75 if (pr & _PAGE_PAT)
76 seq_printf(m, "pat ");
77 else
78 seq_printf(m, " ");
79 }
80 if (pr & _PAGE_GLOBAL)
81 seq_printf(m, "GLB ");
82 else
83 seq_printf(m, " ");
84 if (pr & _PAGE_NX)
85 seq_printf(m, "NX ");
86 else
87 seq_printf(m, "x ");
88}
89
90/*
91 * Sign-extend the 48 bit address to 64 bit
92 */
93static unsigned long sign_extend(unsigned long u)
94{
95 if (u>>47)
96 u = u | (0xffffUL << 48);
97 return u;
98}
99
100/*
101 * This function gets called on a break in a continuous series
102 * of PTE entries; the next one is different so we need to
103 * print what we collected so far.
104 */
105static void note_page(struct seq_file *m, struct pg_state *st,
106 pgprot_t new_prot, int level)
107{
108 unsigned long prot, cur;
109
110 /*
111 * If we have a "break" in the series, we need to flush the state that
112 * we have now. "break" is either changing perms or a different level.
113 */
114 prot = pgprot_val(new_prot) & ~(PTE_MASK);
115 cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
116
117 if ((prot != cur || level != st->level) &&
118 st->current_address != st->start_address) {
119 char unit = 'K';
120 unsigned long delta;
121
122 /*
123 * We print markers for special areas of address space,
124 * such as the start of vmalloc space etc.
125 * This helps in the interpretation.
126 */
127 if (!st->printed_vmalloc &&
128 st->start_address >= VMALLOC_START) {
129 seq_printf(m, "---[ VMALLOC SPACE ]---\n");
130 st->printed_vmalloc = 1;
131 }
132 if (!st->printed_modules &&
133 st->start_address >= MODULES_VADDR) {
134 seq_printf(m, "---[ MODULES SPACE ]---\n");
135 st->printed_modules = 1;
136 }
137 if (st->printed_modules < 2 &&
138 st->start_address >= MODULES_END) {
139 seq_printf(m, "---[ END MODULES SPACE ]---\n");
140 st->printed_modules = 2;
141 }
142 if (!st->printed_vmemmap &&
143 st->start_address >= VMEMMAP_START) {
144 seq_printf(m, "---[ VMMEMMAP SPACE ]---\n");
145 st->printed_vmemmap = 1;
146 }
147 if (!st->printed_highmap &&
148 st->start_address >= __START_KERNEL_map) {
149 seq_printf(m, "---[ HIGH KERNEL MAPPING ]---\n");
150 st->printed_highmap = 1;
151 }
152
153 /*
154 * Now print the actual finished series
155 */
156 seq_printf(m, "[ %016lx - %016lx ",
157 st->start_address, st->current_address);
158
159 delta = (st->current_address - st->start_address) >> 10;
160 if ((delta & 1023) == 0) {
161 delta = delta >> 10;
162 unit = 'M';
163 }
164 if (pgprot_val(st->current_prot)) {
165 seq_printf(m, "Size %9lu%cb ", delta, unit);
166 printk_prot(m, st->current_prot, st->level);
167 seq_printf(m, "L%i]\n", st->level);
168 } else {
169 /* don't print protections on non-present memory */
170 seq_printf(m, "%14lu%cb", delta, unit);
171 seq_printf(m, " L%i]\n",
172 st->level);
173 }
174 st->start_address = st->current_address;
175 st->current_prot = new_prot;
176 st->level = level;
177 };
178}
179
180static void walk_level_4(struct seq_file *m, struct pg_state *st, pmd_t addr,
181 unsigned long P)
182{
183 int i;
184 pte_t *start;
185
186 start = (pte_t *) pmd_page_vaddr(addr);
187 for (i = 0; i < PTRS_PER_PTE; i++) {
188 pgprot_t prot = pte_pgprot(*start);
189
190 st->current_address = sign_extend(P + i * LEVEL_4_MULT);
191 note_page(m, st, prot, 4);
192 start++;
193 }
194}
195
196
197static void walk_level_3(struct seq_file *m, struct pg_state *st, pud_t addr,
198 unsigned long P)
199{
200 int i;
201 pmd_t *start;
202
203 start = (pmd_t *) pud_page_vaddr(addr);
204 for (i = 0; i < PTRS_PER_PMD; i++) {
205 st->current_address = sign_extend(P + i * LEVEL_3_MULT);
206 if (!pmd_none(*start)) {
207 unsigned long prot;
208
209 prot = pmd_val(*start) & ~(PTE_MASK);
210 /* Deal with 2Mb pages */
211 if (pmd_large(*start))
212 note_page(m, st, __pgprot(prot), 3);
213 else
214 walk_level_4(m, st, *start,
215 P + i * LEVEL_3_MULT);
216 } else
217 note_page(m, st, __pgprot(0), 3);
218 start++;
219 }
220}
221
222
223static void walk_level_2(struct seq_file *m, struct pg_state *st, pgd_t addr,
224 unsigned long P)
225{
226 int i;
227 pud_t *start;
228
229 start = (pud_t *) pgd_page_vaddr(addr);
230
231 for (i = 0; i < PTRS_PER_PUD; i++) {
232 if (!pud_none(*start)) {
233 unsigned long prot;
234
235 prot = pud_val(*start) & ~(PTE_MASK);
236 /* Deal with 1Gb pages */
237 if (pud_large(*start))
238 note_page(m, st, __pgprot(prot), 2);
239 else
240 walk_level_3(m, st, *start,
241 P + i * LEVEL_2_MULT);
242 } else
243 note_page(m, st, __pgprot(0), 2);
244
245 start++;
246 }
247}
248
249static void walk_level_1(struct seq_file *m)
250{
251 pgd_t *start = (pgd_t *) &init_level4_pgt;
252 int i;
253 struct pg_state st;
254
255 memset(&st, 0, sizeof(st));
256 st.level = 1;
257
258 for (i = 0; i < PTRS_PER_PGD; i++) {
259 if (!pgd_none(*start))
260 walk_level_2(m, &st, *start, i * LEVEL_1_MULT);
261 else
262 note_page(m, &st, __pgprot(0), 1);
263 start++;
264 }
265}
266
267static int ptdump_show(struct seq_file *m, void *v)
268{
269 seq_puts(m, "Kernel pagetable dump\n");
270 walk_level_1(m);
271 return 0;
272}
273
274static int ptdump_open(struct inode *inode, struct file *filp)
275{
276 return single_open(filp, ptdump_show, NULL);
277}
278
279static const struct file_operations ptdump_fops = {
280 .open = ptdump_open,
281 .read = seq_read,
282 .llseek = seq_lseek,
283 .release = single_release,
284};
285
286int pt_dump_init(void)
287{
288 struct dentry *pe;
289
290 pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
291 &ptdump_fops);
292 if (!pe)
293 return -ENOMEM;
294
295 return 0;
296}
297
298__initcall(pt_dump_init);
299MODULE_LICENSE("GPL");
300MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
301MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");