Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
3891a04a | 33 | unsigned long lines; |
ef6bea6d | 34 | bool to_dmesg; |
926e5392 AV |
35 | }; |
36 | ||
fe770bf0 PA |
37 | struct addr_marker { |
38 | unsigned long start_address; | |
39 | const char *name; | |
3891a04a | 40 | unsigned long max_lines; |
fe770bf0 PA |
41 | }; |
42 | ||
92851e2f AS |
43 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
44 | enum address_markers_idx { | |
45 | USER_SPACE_NR = 0, | |
46 | #ifdef CONFIG_X86_64 | |
47 | KERNEL_SPACE_NR, | |
48 | LOW_KERNEL_NR, | |
49 | VMALLOC_START_NR, | |
50 | VMEMMAP_START_NR, | |
3891a04a | 51 | ESPFIX_START_NR, |
92851e2f AS |
52 | HIGH_KERNEL_NR, |
53 | MODULES_VADDR_NR, | |
54 | MODULES_END_NR, | |
55 | #else | |
56 | KERNEL_SPACE_NR, | |
57 | VMALLOC_START_NR, | |
58 | VMALLOC_END_NR, | |
59 | # ifdef CONFIG_HIGHMEM | |
60 | PKMAP_BASE_NR, | |
61 | # endif | |
62 | FIXADDR_START_NR, | |
63 | #endif | |
64 | }; | |
65 | ||
fe770bf0 PA |
66 | /* Address space markers hints */ |
67 | static struct addr_marker address_markers[] = { | |
68 | { 0, "User Space" }, | |
69 | #ifdef CONFIG_X86_64 | |
70 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 71 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 72 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 | 73 | { VMEMMAP_START, "Vmemmap" }, |
3891a04a | 74 | { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, |
fe770bf0 | 75 | { __START_KERNEL_map, "High Kernel Mapping" }, |
9a79cf9c YL |
76 | { MODULES_VADDR, "Modules" }, |
77 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
78 | #else |
79 | { PAGE_OFFSET, "Kernel Mapping" }, | |
80 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
81 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
82 | # ifdef CONFIG_HIGHMEM | |
83 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
84 | # endif | |
85 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
86 | #endif | |
87 | { -1, NULL } /* End of list */ | |
88 | }; | |
926e5392 | 89 | |
fe770bf0 PA |
90 | /* Multipliers for offsets within the PTEs */ |
91 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
92 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
93 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
94 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 95 | |
ef6bea6d BP |
96 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
97 | ({ \ | |
98 | if (to_dmesg) \ | |
99 | printk(KERN_INFO fmt, ##args); \ | |
100 | else \ | |
101 | if (m) \ | |
102 | seq_printf(m, fmt, ##args); \ | |
103 | }) | |
104 | ||
105 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
106 | ({ \ | |
107 | if (to_dmesg) \ | |
108 | printk(KERN_CONT fmt, ##args); \ | |
109 | else \ | |
110 | if (m) \ | |
111 | seq_printf(m, fmt, ##args); \ | |
112 | }) | |
113 | ||
926e5392 AV |
114 | /* |
115 | * Print a readable form of a pgprot_t to the seq_file | |
116 | */ | |
ef6bea6d | 117 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 118 | { |
fe770bf0 PA |
119 | pgprotval_t pr = pgprot_val(prot); |
120 | static const char * const level_name[] = | |
121 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
122 | ||
123 | if (!pgprot_val(prot)) { | |
124 | /* Not present */ | |
ef6bea6d | 125 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
126 | } else { |
127 | if (pr & _PAGE_USER) | |
ef6bea6d | 128 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 129 | else |
ef6bea6d | 130 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 131 | if (pr & _PAGE_RW) |
ef6bea6d | 132 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 133 | else |
ef6bea6d | 134 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 135 | if (pr & _PAGE_PWT) |
ef6bea6d | 136 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 137 | else |
ef6bea6d | 138 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 139 | if (pr & _PAGE_PCD) |
ef6bea6d | 140 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 141 | else |
ef6bea6d | 142 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
143 | |
144 | /* Bit 9 has a different meaning on level 3 vs 4 */ | |
145 | if (level <= 3) { | |
146 | if (pr & _PAGE_PSE) | |
ef6bea6d | 147 | pt_dump_cont_printf(m, dmsg, "PSE "); |
fe770bf0 | 148 | else |
ef6bea6d | 149 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
150 | } else { |
151 | if (pr & _PAGE_PAT) | |
ef6bea6d | 152 | pt_dump_cont_printf(m, dmsg, "pat "); |
fe770bf0 | 153 | else |
ef6bea6d | 154 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
155 | } |
156 | if (pr & _PAGE_GLOBAL) | |
ef6bea6d | 157 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 158 | else |
ef6bea6d | 159 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 160 | if (pr & _PAGE_NX) |
ef6bea6d | 161 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 162 | else |
ef6bea6d | 163 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 164 | } |
ef6bea6d | 165 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
166 | } |
167 | ||
168 | /* | |
fe770bf0 | 169 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 170 | */ |
fe770bf0 | 171 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 172 | { |
fe770bf0 PA |
173 | #ifdef CONFIG_X86_64 |
174 | return (signed long)(u << 16) >> 16; | |
175 | #else | |
926e5392 | 176 | return u; |
fe770bf0 | 177 | #endif |
926e5392 AV |
178 | } |
179 | ||
180 | /* | |
181 | * This function gets called on a break in a continuous series | |
182 | * of PTE entries; the next one is different so we need to | |
183 | * print what we collected so far. | |
184 | */ | |
185 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 186 | pgprot_t new_prot, int level) |
926e5392 | 187 | { |
fe770bf0 | 188 | pgprotval_t prot, cur; |
3891a04a | 189 | static const char units[] = "BKMGTPE"; |
926e5392 AV |
190 | |
191 | /* | |
192 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
193 | * we have now. "break" is either changing perms, levels or |
194 | * address space marker. | |
926e5392 | 195 | */ |
27990eac JF |
196 | prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; |
197 | cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; | |
926e5392 | 198 | |
fe770bf0 PA |
199 | if (!st->level) { |
200 | /* First entry */ | |
201 | st->current_prot = new_prot; | |
202 | st->level = level; | |
203 | st->marker = address_markers; | |
3891a04a | 204 | st->lines = 0; |
ef6bea6d BP |
205 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
206 | st->marker->name); | |
fe770bf0 PA |
207 | } else if (prot != cur || level != st->level || |
208 | st->current_address >= st->marker[1].start_address) { | |
209 | const char *unit = units; | |
926e5392 | 210 | unsigned long delta; |
6424fb38 | 211 | int width = sizeof(unsigned long) * 2; |
926e5392 | 212 | |
926e5392 AV |
213 | /* |
214 | * Now print the actual finished series | |
215 | */ | |
3891a04a PA |
216 | if (!st->marker->max_lines || |
217 | st->lines < st->marker->max_lines) { | |
218 | pt_dump_seq_printf(m, st->to_dmesg, | |
219 | "0x%0*lx-0x%0*lx ", | |
220 | width, st->start_address, | |
221 | width, st->current_address); | |
926e5392 | 222 | |
3891a04a PA |
223 | delta = st->current_address - st->start_address; |
224 | while (!(delta & 1023) && unit[1]) { | |
225 | delta >>= 10; | |
226 | unit++; | |
227 | } | |
228 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", | |
229 | delta, *unit); | |
230 | printk_prot(m, st->current_prot, st->level, | |
231 | st->to_dmesg); | |
926e5392 | 232 | } |
3891a04a | 233 | st->lines++; |
fe770bf0 PA |
234 | |
235 | /* | |
236 | * We print markers for special areas of address space, | |
237 | * such as the start of vmalloc space etc. | |
238 | * This helps in the interpretation. | |
239 | */ | |
240 | if (st->current_address >= st->marker[1].start_address) { | |
3891a04a PA |
241 | if (st->marker->max_lines && |
242 | st->lines > st->marker->max_lines) { | |
243 | unsigned long nskip = | |
244 | st->lines - st->marker->max_lines; | |
245 | pt_dump_seq_printf(m, st->to_dmesg, | |
246 | "... %lu entr%s skipped ... \n", | |
247 | nskip, | |
248 | nskip == 1 ? "y" : "ies"); | |
249 | } | |
fe770bf0 | 250 | st->marker++; |
3891a04a | 251 | st->lines = 0; |
ef6bea6d BP |
252 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
253 | st->marker->name); | |
926e5392 | 254 | } |
fe770bf0 | 255 | |
926e5392 AV |
256 | st->start_address = st->current_address; |
257 | st->current_prot = new_prot; | |
258 | st->level = level; | |
fe770bf0 | 259 | } |
926e5392 AV |
260 | } |
261 | ||
fe770bf0 | 262 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
263 | unsigned long P) |
264 | { | |
265 | int i; | |
266 | pte_t *start; | |
267 | ||
268 | start = (pte_t *) pmd_page_vaddr(addr); | |
269 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
270 | pgprot_t prot = pte_pgprot(*start); | |
271 | ||
fe770bf0 | 272 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
926e5392 AV |
273 | note_page(m, st, prot, 4); |
274 | start++; | |
275 | } | |
276 | } | |
277 | ||
fe770bf0 | 278 | #if PTRS_PER_PMD > 1 |
926e5392 | 279 | |
fe770bf0 | 280 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
281 | unsigned long P) |
282 | { | |
283 | int i; | |
284 | pmd_t *start; | |
285 | ||
286 | start = (pmd_t *) pud_page_vaddr(addr); | |
287 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 288 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 289 | if (!pmd_none(*start)) { |
77be1fab | 290 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 291 | |
fe770bf0 | 292 | if (pmd_large(*start) || !pmd_present(*start)) |
926e5392 AV |
293 | note_page(m, st, __pgprot(prot), 3); |
294 | else | |
fe770bf0 PA |
295 | walk_pte_level(m, st, *start, |
296 | P + i * PMD_LEVEL_MULT); | |
926e5392 AV |
297 | } else |
298 | note_page(m, st, __pgprot(0), 3); | |
299 | start++; | |
300 | } | |
301 | } | |
302 | ||
fe770bf0 PA |
303 | #else |
304 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
305 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
306 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
307 | #endif | |
926e5392 | 308 | |
fe770bf0 PA |
309 | #if PTRS_PER_PUD > 1 |
310 | ||
311 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
312 | unsigned long P) |
313 | { | |
314 | int i; | |
315 | pud_t *start; | |
316 | ||
317 | start = (pud_t *) pgd_page_vaddr(addr); | |
318 | ||
319 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 320 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 321 | if (!pud_none(*start)) { |
77be1fab | 322 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 323 | |
fe770bf0 | 324 | if (pud_large(*start) || !pud_present(*start)) |
926e5392 AV |
325 | note_page(m, st, __pgprot(prot), 2); |
326 | else | |
fe770bf0 PA |
327 | walk_pmd_level(m, st, *start, |
328 | P + i * PUD_LEVEL_MULT); | |
926e5392 AV |
329 | } else |
330 | note_page(m, st, __pgprot(0), 2); | |
331 | ||
332 | start++; | |
333 | } | |
334 | } | |
335 | ||
fe770bf0 PA |
336 | #else |
337 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
338 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
339 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
340 | #endif | |
341 | ||
ef6bea6d | 342 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) |
926e5392 | 343 | { |
fe770bf0 | 344 | #ifdef CONFIG_X86_64 |
926e5392 | 345 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
346 | #else |
347 | pgd_t *start = swapper_pg_dir; | |
348 | #endif | |
926e5392 | 349 | int i; |
ef6bea6d | 350 | struct pg_state st = {}; |
926e5392 | 351 | |
ef6bea6d BP |
352 | if (pgd) { |
353 | start = pgd; | |
354 | st.to_dmesg = true; | |
355 | } | |
926e5392 AV |
356 | |
357 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
358 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
359 | if (!pgd_none(*start)) { | |
77be1fab | 360 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
fe770bf0 PA |
361 | |
362 | if (pgd_large(*start) || !pgd_present(*start)) | |
363 | note_page(m, &st, __pgprot(prot), 1); | |
364 | else | |
365 | walk_pud_level(m, &st, *start, | |
366 | i * PGD_LEVEL_MULT); | |
367 | } else | |
926e5392 | 368 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 369 | |
926e5392 AV |
370 | start++; |
371 | } | |
fe770bf0 PA |
372 | |
373 | /* Flush out the last page */ | |
374 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
375 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
376 | } |
377 | ||
378 | static int ptdump_show(struct seq_file *m, void *v) | |
379 | { | |
ef6bea6d | 380 | ptdump_walk_pgd_level(m, NULL); |
926e5392 AV |
381 | return 0; |
382 | } | |
383 | ||
384 | static int ptdump_open(struct inode *inode, struct file *filp) | |
385 | { | |
386 | return single_open(filp, ptdump_show, NULL); | |
387 | } | |
388 | ||
389 | static const struct file_operations ptdump_fops = { | |
390 | .open = ptdump_open, | |
391 | .read = seq_read, | |
392 | .llseek = seq_lseek, | |
393 | .release = single_release, | |
394 | }; | |
395 | ||
a4928cff | 396 | static int pt_dump_init(void) |
926e5392 AV |
397 | { |
398 | struct dentry *pe; | |
399 | ||
fe770bf0 PA |
400 | #ifdef CONFIG_X86_32 |
401 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
402 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
403 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 404 | # ifdef CONFIG_HIGHMEM |
92851e2f | 405 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 406 | # endif |
92851e2f | 407 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
408 | #endif |
409 | ||
926e5392 AV |
410 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
411 | &ptdump_fops); | |
412 | if (!pe) | |
413 | return -ENOMEM; | |
414 | ||
415 | return 0; | |
416 | } | |
417 | ||
418 | __initcall(pt_dump_init); | |
419 | MODULE_LICENSE("GPL"); | |
420 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
421 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |