diff options
author | Heiko Carstens <heiko.carstens@de.ibm.com> | 2012-10-04 08:46:12 -0400 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2012-10-09 08:16:58 -0400 |
commit | e76e82d772522b05ed93228478d2a4460754b6a4 (patch) | |
tree | ea31d76fc3103518c1802d135e71266feabd70f1 /arch/s390/mm | |
parent | 51eee033dca3d6dc81febc5a69f30b964f3bddf3 (diff) |
s390/mm: add page table dumper
This is more or less the same as the x86 page table dumper which was
merged four years ago: 926e5392 "x86: add code to dump the (kernel)
page tables for visual inspection by kernel developers".
We add a file at /sys/kernel/debug/kernel_page_tables for debugging
purposes so it's quite easy to see the kernel page table layout and
possible odd mappings:
---[ Identity Mapping ]---
0x0000000000000000-0x0000000000100000 1M PTE RW
---[ Kernel Image Start ]---
0x0000000000100000-0x0000000000800000 7M PMD RO
0x0000000000800000-0x00000000008a9000 676K PTE RO
0x00000000008a9000-0x0000000000900000 348K PTE RW
0x0000000000900000-0x0000000001500000 12M PMD RW
---[ Kernel Image End ]---
0x0000000001500000-0x0000000280000000 10219M PMD RW
0x0000000280000000-0x000003d280000000 3904G PUD I
---[ vmemmap Area ]---
0x000003d280000000-0x000003d288c00000 140M PTE RW
0x000003d288c00000-0x000003d300000000 1908M PMD I
0x000003d300000000-0x000003e000000000 52G PUD I
---[ vmalloc Area ]---
0x000003e000000000-0x000003e000009000 36K PTE RW
0x000003e000009000-0x000003e0000ee000 916K PTE I
0x000003e0000ee000-0x000003e000146000 352K PTE RW
0x000003e000146000-0x000003e000200000 744K PTE I
0x000003e000200000-0x000003e080000000 2046M PMD I
0x000003e080000000-0x0000040000000000 126G PUD I
This usually makes only sense for kernel developers. The output
with CONFIG_DEBUG_PAGEALLOC is not very helpful, because of the
huge number of mapped out pages, however I decided for the time
being to not add a !DEBUG_PAGEALLOC dependency.
Maybe it's helpful for somebody even with that option.
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/s390/mm/dump_pagetables.c | 219 |
2 files changed, 220 insertions, 0 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 0f5536b0c1a1..1bea6d1f55ab 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile | |||
@@ -7,3 +7,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ | |||
7 | obj-$(CONFIG_CMM) += cmm.o | 7 | obj-$(CONFIG_CMM) += cmm.o |
8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 8 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
9 | obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o | 9 | obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o |
10 | obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o | ||
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c new file mode 100644 index 000000000000..cd1c62d160ed --- /dev/null +++ b/arch/s390/mm/dump_pagetables.c | |||
@@ -0,0 +1,219 @@ | |||
1 | #include <linux/seq_file.h> | ||
2 | #include <linux/debugfs.h> | ||
3 | #include <linux/module.h> | ||
4 | #include <linux/mm.h> | ||
5 | #include <asm/sections.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | static unsigned long max_addr; | ||
9 | |||
10 | struct addr_marker { | ||
11 | unsigned long start_address; | ||
12 | const char *name; | ||
13 | }; | ||
14 | |||
15 | enum address_markers_idx { | ||
16 | IDENTITY_NR = 0, | ||
17 | KERNEL_START_NR, | ||
18 | KERNEL_END_NR, | ||
19 | VMEMMAP_NR, | ||
20 | VMALLOC_NR, | ||
21 | }; | ||
22 | |||
23 | static struct addr_marker address_markers[] = { | ||
24 | [IDENTITY_NR] = {0, "Identity Mapping"}, | ||
25 | [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, | ||
26 | [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, | ||
27 | [VMEMMAP_NR] = {0, "vmemmap Area"}, | ||
28 | [VMALLOC_NR] = {0, "vmalloc Area"}, | ||
29 | { -1, NULL } | ||
30 | }; | ||
31 | |||
32 | struct pg_state { | ||
33 | int level; | ||
34 | unsigned int current_prot; | ||
35 | unsigned long start_address; | ||
36 | unsigned long current_address; | ||
37 | const struct addr_marker *marker; | ||
38 | }; | ||
39 | |||
40 | static void print_prot(struct seq_file *m, unsigned int pr, int level) | ||
41 | { | ||
42 | static const char * const level_name[] = | ||
43 | { "ASCE", "PGD", "PUD", "PMD", "PTE" }; | ||
44 | |||
45 | seq_printf(m, "%s ", level_name[level]); | ||
46 | if (pr & _PAGE_INVALID) | ||
47 | seq_printf(m, "I\n"); | ||
48 | else | ||
49 | seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW"); | ||
50 | } | ||
51 | |||
52 | static void note_page(struct seq_file *m, struct pg_state *st, | ||
53 | unsigned int new_prot, int level) | ||
54 | { | ||
55 | static const char units[] = "KMGTPE"; | ||
56 | int width = sizeof(unsigned long) * 2; | ||
57 | const char *unit = units; | ||
58 | unsigned int prot, cur; | ||
59 | unsigned long delta; | ||
60 | |||
61 | /* | ||
62 | * If we have a "break" in the series, we need to flush the state | ||
63 | * that we have now. "break" is either changing perms, levels or | ||
64 | * address space marker. | ||
65 | */ | ||
66 | prot = new_prot; | ||
67 | cur = st->current_prot; | ||
68 | |||
69 | if (!st->level) { | ||
70 | /* First entry */ | ||
71 | st->current_prot = new_prot; | ||
72 | st->level = level; | ||
73 | st->marker = address_markers; | ||
74 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
75 | } else if (prot != cur || level != st->level || | ||
76 | st->current_address >= st->marker[1].start_address) { | ||
77 | /* Print the actual finished series */ | ||
78 | seq_printf(m, "0x%0*lx-0x%0*lx", | ||
79 | width, st->start_address, | ||
80 | width, st->current_address); | ||
81 | delta = (st->current_address - st->start_address) >> 10; | ||
82 | while (!(delta & 0x3ff) && unit[1]) { | ||
83 | delta >>= 10; | ||
84 | unit++; | ||
85 | } | ||
86 | seq_printf(m, "%9lu%c ", delta, *unit); | ||
87 | print_prot(m, st->current_prot, st->level); | ||
88 | if (st->current_address >= st->marker[1].start_address) { | ||
89 | st->marker++; | ||
90 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
91 | } | ||
92 | st->start_address = st->current_address; | ||
93 | st->current_prot = new_prot; | ||
94 | st->level = level; | ||
95 | } | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * The actual page table walker functions. In order to keep the implementation | ||
100 | * of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO | ||
101 | * flags to note_page() if a region, segment or page table entry is invalid or | ||
102 | * read-only. | ||
103 | * After all it's just a hint that the current level being walked contains an | ||
104 | * invalid or read-only entry. | ||
105 | */ | ||
106 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, | ||
107 | pmd_t *pmd, unsigned long addr) | ||
108 | { | ||
109 | unsigned int prot; | ||
110 | pte_t *pte; | ||
111 | int i; | ||
112 | |||
113 | for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { | ||
114 | st->current_address = addr; | ||
115 | pte = pte_offset_kernel(pmd, addr); | ||
116 | prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); | ||
117 | note_page(m, st, prot, 4); | ||
118 | addr += PAGE_SIZE; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, | ||
123 | pud_t *pud, unsigned long addr) | ||
124 | { | ||
125 | unsigned int prot; | ||
126 | pmd_t *pmd; | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { | ||
130 | st->current_address = addr; | ||
131 | pmd = pmd_offset(pud, addr); | ||
132 | if (!pmd_none(*pmd)) { | ||
133 | if (pmd_large(*pmd)) { | ||
134 | prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO; | ||
135 | note_page(m, st, prot, 3); | ||
136 | } else | ||
137 | walk_pte_level(m, st, pmd, addr); | ||
138 | } else | ||
139 | note_page(m, st, _PAGE_INVALID, 3); | ||
140 | addr += PMD_SIZE; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, | ||
145 | pgd_t *pgd, unsigned long addr) | ||
146 | { | ||
147 | pud_t *pud; | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { | ||
151 | st->current_address = addr; | ||
152 | pud = pud_offset(pgd, addr); | ||
153 | if (!pud_none(*pud)) | ||
154 | walk_pmd_level(m, st, pud, addr); | ||
155 | else | ||
156 | note_page(m, st, _PAGE_INVALID, 2); | ||
157 | addr += PUD_SIZE; | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static void walk_pgd_level(struct seq_file *m) | ||
162 | { | ||
163 | unsigned long addr = 0; | ||
164 | struct pg_state st; | ||
165 | pgd_t *pgd; | ||
166 | int i; | ||
167 | |||
168 | memset(&st, 0, sizeof(st)); | ||
169 | for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { | ||
170 | st.current_address = addr; | ||
171 | pgd = pgd_offset_k(addr); | ||
172 | if (!pgd_none(*pgd)) | ||
173 | walk_pud_level(m, &st, pgd, addr); | ||
174 | else | ||
175 | note_page(m, &st, _PAGE_INVALID, 1); | ||
176 | addr += PGDIR_SIZE; | ||
177 | } | ||
178 | /* Flush out the last page */ | ||
179 | st.current_address = max_addr; | ||
180 | note_page(m, &st, 0, 0); | ||
181 | } | ||
182 | |||
183 | static int ptdump_show(struct seq_file *m, void *v) | ||
184 | { | ||
185 | walk_pgd_level(m); | ||
186 | return 0; | ||
187 | } | ||
188 | |||
189 | static int ptdump_open(struct inode *inode, struct file *filp) | ||
190 | { | ||
191 | return single_open(filp, ptdump_show, NULL); | ||
192 | } | ||
193 | |||
194 | static const struct file_operations ptdump_fops = { | ||
195 | .open = ptdump_open, | ||
196 | .read = seq_read, | ||
197 | .llseek = seq_lseek, | ||
198 | .release = single_release, | ||
199 | }; | ||
200 | |||
201 | static int pt_dump_init(void) | ||
202 | { | ||
203 | /* | ||
204 | * Figure out the maximum virtual address being accessible with the | ||
205 | * kernel ASCE. We need this to keep the page table walker functions | ||
206 | * from accessing non-existent entries. | ||
207 | */ | ||
208 | #ifdef CONFIG_64BIT | ||
209 | max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; | ||
210 | max_addr = 1UL << (max_addr * 11 + 31); | ||
211 | #else | ||
212 | max_addr = 1UL << 31; | ||
213 | #endif | ||
214 | address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; | ||
215 | address_markers[VMALLOC_NR].start_address = VMALLOC_START; | ||
216 | debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); | ||
217 | return 0; | ||
218 | } | ||
219 | device_initcall(pt_dump_init); | ||