Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
3891a04a | 33 | unsigned long lines; |
ef6bea6d | 34 | bool to_dmesg; |
926e5392 AV |
35 | }; |
36 | ||
fe770bf0 PA |
37 | struct addr_marker { |
38 | unsigned long start_address; | |
39 | const char *name; | |
3891a04a | 40 | unsigned long max_lines; |
fe770bf0 PA |
41 | }; |
42 | ||
92851e2f AS |
43 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
44 | enum address_markers_idx { | |
45 | USER_SPACE_NR = 0, | |
46 | #ifdef CONFIG_X86_64 | |
47 | KERNEL_SPACE_NR, | |
48 | LOW_KERNEL_NR, | |
49 | VMALLOC_START_NR, | |
50 | VMEMMAP_START_NR, | |
8a5a5d15 | 51 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 52 | ESPFIX_START_NR, |
8a5a5d15 | 53 | # endif |
92851e2f AS |
54 | HIGH_KERNEL_NR, |
55 | MODULES_VADDR_NR, | |
56 | MODULES_END_NR, | |
57 | #else | |
58 | KERNEL_SPACE_NR, | |
59 | VMALLOC_START_NR, | |
60 | VMALLOC_END_NR, | |
61 | # ifdef CONFIG_HIGHMEM | |
62 | PKMAP_BASE_NR, | |
63 | # endif | |
64 | FIXADDR_START_NR, | |
65 | #endif | |
66 | }; | |
67 | ||
fe770bf0 PA |
68 | /* Address space markers hints */ |
69 | static struct addr_marker address_markers[] = { | |
70 | { 0, "User Space" }, | |
71 | #ifdef CONFIG_X86_64 | |
72 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 73 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 74 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 | 75 | { VMEMMAP_START, "Vmemmap" }, |
8a5a5d15 | 76 | # ifdef CONFIG_X86_ESPFIX64 |
3891a04a | 77 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
8a5a5d15 | 78 | # endif |
fe770bf0 | 79 | { __START_KERNEL_map, "High Kernel Mapping" }, |
9a79cf9c YL |
80 | { MODULES_VADDR, "Modules" }, |
81 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
82 | #else |
83 | { PAGE_OFFSET, "Kernel Mapping" }, | |
84 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
85 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
86 | # ifdef CONFIG_HIGHMEM | |
87 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
88 | # endif | |
89 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
90 | #endif | |
91 | { -1, NULL } /* End of list */ | |
92 | }; | |
926e5392 | 93 | |
fe770bf0 PA |
94 | /* Multipliers for offsets within the PTEs */ |
95 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
96 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
97 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
98 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 99 | |
ef6bea6d BP |
100 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
101 | ({ \ | |
102 | if (to_dmesg) \ | |
103 | printk(KERN_INFO fmt, ##args); \ | |
104 | else \ | |
105 | if (m) \ | |
106 | seq_printf(m, fmt, ##args); \ | |
107 | }) | |
108 | ||
109 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
110 | ({ \ | |
111 | if (to_dmesg) \ | |
112 | printk(KERN_CONT fmt, ##args); \ | |
113 | else \ | |
114 | if (m) \ | |
115 | seq_printf(m, fmt, ##args); \ | |
116 | }) | |
117 | ||
926e5392 AV |
118 | /* |
119 | * Print a readable form of a pgprot_t to the seq_file | |
120 | */ | |
ef6bea6d | 121 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 122 | { |
fe770bf0 PA |
123 | pgprotval_t pr = pgprot_val(prot); |
124 | static const char * const level_name[] = | |
125 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
126 | ||
127 | if (!pgprot_val(prot)) { | |
128 | /* Not present */ | |
ef6bea6d | 129 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
130 | } else { |
131 | if (pr & _PAGE_USER) | |
ef6bea6d | 132 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 133 | else |
ef6bea6d | 134 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 135 | if (pr & _PAGE_RW) |
ef6bea6d | 136 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 137 | else |
ef6bea6d | 138 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 139 | if (pr & _PAGE_PWT) |
ef6bea6d | 140 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 141 | else |
ef6bea6d | 142 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 143 | if (pr & _PAGE_PCD) |
ef6bea6d | 144 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 145 | else |
ef6bea6d | 146 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
147 | |
148 | /* Bit 9 has a different meaning on level 3 vs 4 */ | |
149 | if (level <= 3) { | |
150 | if (pr & _PAGE_PSE) | |
ef6bea6d | 151 | pt_dump_cont_printf(m, dmsg, "PSE "); |
fe770bf0 | 152 | else |
ef6bea6d | 153 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
154 | } else { |
155 | if (pr & _PAGE_PAT) | |
ef6bea6d | 156 | pt_dump_cont_printf(m, dmsg, "pat "); |
fe770bf0 | 157 | else |
ef6bea6d | 158 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
159 | } |
160 | if (pr & _PAGE_GLOBAL) | |
ef6bea6d | 161 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 162 | else |
ef6bea6d | 163 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 164 | if (pr & _PAGE_NX) |
ef6bea6d | 165 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 166 | else |
ef6bea6d | 167 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 168 | } |
ef6bea6d | 169 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
170 | } |
171 | ||
172 | /* | |
fe770bf0 | 173 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 174 | */ |
fe770bf0 | 175 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 176 | { |
fe770bf0 PA |
177 | #ifdef CONFIG_X86_64 |
178 | return (signed long)(u << 16) >> 16; | |
179 | #else | |
926e5392 | 180 | return u; |
fe770bf0 | 181 | #endif |
926e5392 AV |
182 | } |
183 | ||
184 | /* | |
185 | * This function gets called on a break in a continuous series | |
186 | * of PTE entries; the next one is different so we need to | |
187 | * print what we collected so far. | |
188 | */ | |
189 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 190 | pgprot_t new_prot, int level) |
926e5392 | 191 | { |
fe770bf0 | 192 | pgprotval_t prot, cur; |
3891a04a | 193 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
194 | |
195 | /* | |
196 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
197 | * we have now. "break" is either changing perms, levels or |
198 | * address space marker. | |
926e5392 | 199 | */ |
27990eac JF |
200 | prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; |
201 | cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; | |
926e5392 | 202 | |
fe770bf0 PA |
203 | if (!st->level) { |
204 | /* First entry */ | |
205 | st->current_prot = new_prot; | |
206 | st->level = level; | |
207 | st->marker = address_markers; | |
3891a04a | 208 | st->lines = 0; |
ef6bea6d BP |
209 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
210 | st->marker->name); | |
fe770bf0 PA |
211 | } else if (prot != cur || level != st->level || |
212 | st->current_address >= st->marker[1].start_address) { | |
213 | const char *unit = units; | |
926e5392 | 214 | unsigned long delta; |
6424fb38 | 215 | int width = sizeof(unsigned long) * 2; |
926e5392 | 216 | |
926e5392 AV |
217 | /* |
218 | * Now print the actual finished series | |
219 | */ | |
3891a04a PA |
220 | if (!st->marker->max_lines || |
221 | st->lines < st->marker->max_lines) { | |
222 | pt_dump_seq_printf(m, st->to_dmesg, | |
223 | "0x%0*lx-0x%0*lx ", | |
224 | width, st->start_address, | |
225 | width, st->current_address); | |
926e5392 | 226 | |
3891a04a PA |
227 | delta = st->current_address - st->start_address; |
228 | while (!(delta & 1023) && unit[1]) { | |
229 | delta >>= 10; | |
230 | unit++; | |
231 | } | |
232 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
233 | delta, *unit); | |
234 | printk_prot(m, st->current_prot, st->level, | |
235 | st->to_dmesg); | |
926e5392 | 236 | } |
3891a04a | 237 | st->lines++; |
fe770bf0 PA |
238 | |
239 | /* | |
240 | * We print markers for special areas of address space, | |
241 | * such as the start of vmalloc space etc. | |
242 | * This helps in the interpretation. | |
243 | */ | |
244 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
245 | if (st->marker->max_lines && |
246 | st->lines > st->marker->max_lines) { | |
247 | unsigned long nskip = | |
248 | st->lines - st->marker->max_lines; | |
249 | pt_dump_seq_printf(m, st->to_dmesg, | |
250 | "... %lu entr%s skipped ... \n", | |
251 | nskip, | |
252 | nskip == 1 ? "y" : "ies"); | |
253 | } | |
fe770bf0 | 254 | st->marker++; |
3891a04a | 255 | st->lines = 0; |
ef6bea6d BP |
256 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
257 | st->marker->name); | |
926e5392 | 258 | } |
fe770bf0 | 259 | |
926e5392 AV |
260 | st->start_address = st->current_address; |
261 | st->current_prot = new_prot; | |
262 | st->level = level; | |
fe770bf0 | 263 | } |
926e5392 AV |
264 | } |
265 | ||
fe770bf0 | 266 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
267 | unsigned long P) |
268 | { | |
269 | int i; | |
270 | pte_t *start; | |
271 | ||
272 | start = (pte_t *) pmd_page_vaddr(addr); | |
273 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
274 | pgprot_t prot = pte_pgprot(*start); | |
275 | ||
fe770bf0 | 276 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
926e5392 AV |
277 | note_page(m, st, prot, 4); |
278 | start++; | |
279 | } | |
280 | } | |
281 | ||
fe770bf0 | 282 | #if PTRS_PER_PMD > 1 |
926e5392 | 283 | |
fe770bf0 | 284 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
285 | unsigned long P) |
286 | { | |
287 | int i; | |
288 | pmd_t *start; | |
289 | ||
290 | start = (pmd_t *) pud_page_vaddr(addr); | |
291 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 292 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 293 | if (!pmd_none(*start)) { |
77be1fab | 294 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 295 | |
fe770bf0 | 296 | if (pmd_large(*start) || !pmd_present(*start)) |
926e5392 AV |
297 | note_page(m, st, __pgprot(prot), 3); |
298 | else | |
fe770bf0 PA |
299 | walk_pte_level(m, st, *start, |
300 | P + i * PMD_LEVEL_MULT); | |
926e5392 AV |
301 | } else |
302 | note_page(m, st, __pgprot(0), 3); | |
303 | start++; | |
304 | } | |
305 | } | |
306 | ||
fe770bf0 PA |
307 | #else |
308 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
309 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
310 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
311 | #endif | |
926e5392 | 312 | |
fe770bf0 PA |
313 | #if PTRS_PER_PUD > 1 |
314 | ||
315 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
316 | unsigned long P) |
317 | { | |
318 | int i; | |
319 | pud_t *start; | |
320 | ||
321 | start = (pud_t *) pgd_page_vaddr(addr); | |
322 | ||
323 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 324 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 325 | if (!pud_none(*start)) { |
77be1fab | 326 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 327 | |
fe770bf0 | 328 | if (pud_large(*start) || !pud_present(*start)) |
926e5392 AV |
329 | note_page(m, st, __pgprot(prot), 2); |
330 | else | |
fe770bf0 PA |
331 | walk_pmd_level(m, st, *start, |
332 | P + i * PUD_LEVEL_MULT); | |
926e5392 AV |
333 | } else |
334 | note_page(m, st, __pgprot(0), 2); | |
335 | ||
336 | start++; | |
337 | } | |
338 | } | |
339 | ||
fe770bf0 PA |
340 | #else |
341 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
342 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
343 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
344 | #endif | |
345 | ||
ef6bea6d | 346 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) |
926e5392 | 347 | { |
fe770bf0 | 348 | #ifdef CONFIG_X86_64 |
926e5392 | 349 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
350 | #else |
351 | pgd_t *start = swapper_pg_dir; | |
352 | #endif | |
926e5392 | 353 | int i; |
ef6bea6d | 354 | struct pg_state st = {}; |
926e5392 | 355 | |
ef6bea6d BP |
356 | if (pgd) { |
357 | start = pgd; | |
358 | st.to_dmesg = true; | |
359 | } | |
926e5392 AV |
360 | |
361 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
362 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
363 | if (!pgd_none(*start)) { | |
77be1fab | 364 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
fe770bf0 PA |
365 | |
366 | if (pgd_large(*start) || !pgd_present(*start)) | |
367 | note_page(m, &st, __pgprot(prot), 1); | |
368 | else | |
369 | walk_pud_level(m, &st, *start, | |
370 | i * PGD_LEVEL_MULT); | |
371 | } else | |
926e5392 | 372 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 373 | |
926e5392 AV |
374 | start++; |
375 | } | |
fe770bf0 PA |
376 | |
377 | /* Flush out the last page */ | |
378 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
379 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
380 | } |
381 | ||
382 | static int ptdump_show(struct seq_file *m, void *v) | |
383 | { | |
ef6bea6d | 384 | ptdump_walk_pgd_level(m, NULL); |
926e5392 AV |
385 | return 0; |
386 | } | |
387 | ||
388 | static int ptdump_open(struct inode *inode, struct file *filp) | |
389 | { | |
390 | return single_open(filp, ptdump_show, NULL); | |
391 | } | |
392 | ||
393 | static const struct file_operations ptdump_fops = { | |
394 | .open = ptdump_open, | |
395 | .read = seq_read, | |
396 | .llseek = seq_lseek, | |
397 | .release = single_release, | |
398 | }; | |
399 | ||
a4928cff | 400 | static int pt_dump_init(void) |
926e5392 AV |
401 | { |
402 | struct dentry *pe; | |
403 | ||
fe770bf0 PA |
404 | #ifdef CONFIG_X86_32 |
405 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
406 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
407 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 408 | # ifdef CONFIG_HIGHMEM |
92851e2f | 409 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 410 | # endif |
92851e2f | 411 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
412 | #endif |
413 | ||
926e5392 AV |
414 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
415 | &ptdump_fops); | |
416 | if (!pe) | |
417 | return -ENOMEM; | |
418 | ||
419 | return 0; | |
420 | } | |
421 | ||
422 | __initcall(pt_dump_init); | |
423 | MODULE_LICENSE("GPL"); | |
424 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
425 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |