Commit | Line | Data |
---|---|---|
926e5392 AV |
1 | /* |
2 | * Debug helper to dump the current kernel pagetables of the system | |
3 | * so that we can see what the various memory ranges are set to. | |
4 | * | |
5 | * (C) Copyright 2008 Intel Corporation | |
6 | * | |
7 | * Author: Arjan van de Ven <arjan@linux.intel.com> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | ||
fe770bf0 PA |
15 | #include <linux/debugfs.h> |
16 | #include <linux/mm.h> | |
926e5392 AV |
17 | #include <linux/module.h> |
18 | #include <linux/seq_file.h> | |
926e5392 AV |
19 | |
20 | #include <asm/pgtable.h> | |
21 | ||
22 | /* | |
23 | * The dumper groups pagetable entries of the same type into one, and for | |
24 | * that it needs to keep some state when walking, and flush this state | |
25 | * when a "break" in the continuity is found. | |
26 | */ | |
27 | struct pg_state { | |
28 | int level; | |
29 | pgprot_t current_prot; | |
30 | unsigned long start_address; | |
31 | unsigned long current_address; | |
fe770bf0 | 32 | const struct addr_marker *marker; |
ef6bea6d | 33 | bool to_dmesg; |
926e5392 AV |
34 | }; |
35 | ||
fe770bf0 PA |
36 | struct addr_marker { |
37 | unsigned long start_address; | |
38 | const char *name; | |
39 | }; | |
40 | ||
92851e2f AS |
41 | /* indices for address_markers; keep sync'd w/ address_markers below */ |
42 | enum address_markers_idx { | |
43 | USER_SPACE_NR = 0, | |
44 | #ifdef CONFIG_X86_64 | |
45 | KERNEL_SPACE_NR, | |
46 | LOW_KERNEL_NR, | |
47 | VMALLOC_START_NR, | |
48 | VMEMMAP_START_NR, | |
49 | HIGH_KERNEL_NR, | |
50 | MODULES_VADDR_NR, | |
51 | MODULES_END_NR, | |
52 | #else | |
53 | KERNEL_SPACE_NR, | |
54 | VMALLOC_START_NR, | |
55 | VMALLOC_END_NR, | |
56 | # ifdef CONFIG_HIGHMEM | |
57 | PKMAP_BASE_NR, | |
58 | # endif | |
59 | FIXADDR_START_NR, | |
60 | #endif | |
61 | }; | |
62 | ||
fe770bf0 PA |
63 | /* Address space markers hints */ |
64 | static struct addr_marker address_markers[] = { | |
65 | { 0, "User Space" }, | |
66 | #ifdef CONFIG_X86_64 | |
67 | { 0x8000000000000000UL, "Kernel Space" }, | |
684eb016 | 68 | { PAGE_OFFSET, "Low Kernel Mapping" }, |
fe770bf0 | 69 | { VMALLOC_START, "vmalloc() Area" }, |
fe770bf0 PA |
70 | { VMEMMAP_START, "Vmemmap" }, |
71 | { __START_KERNEL_map, "High Kernel Mapping" }, | |
9a79cf9c YL |
72 | { MODULES_VADDR, "Modules" }, |
73 | { MODULES_END, "End Modules" }, | |
fe770bf0 PA |
74 | #else |
75 | { PAGE_OFFSET, "Kernel Mapping" }, | |
76 | { 0/* VMALLOC_START */, "vmalloc() Area" }, | |
77 | { 0/*VMALLOC_END*/, "vmalloc() End" }, | |
78 | # ifdef CONFIG_HIGHMEM | |
79 | { 0/*PKMAP_BASE*/, "Persisent kmap() Area" }, | |
80 | # endif | |
81 | { 0/*FIXADDR_START*/, "Fixmap Area" }, | |
82 | #endif | |
83 | { -1, NULL } /* End of list */ | |
84 | }; | |
926e5392 | 85 | |
fe770bf0 PA |
86 | /* Multipliers for offsets within the PTEs */ |
87 | #define PTE_LEVEL_MULT (PAGE_SIZE) | |
88 | #define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT) | |
89 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | |
90 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | |
926e5392 | 91 | |
ef6bea6d BP |
92 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ |
93 | ({ \ | |
94 | if (to_dmesg) \ | |
95 | printk(KERN_INFO fmt, ##args); \ | |
96 | else \ | |
97 | if (m) \ | |
98 | seq_printf(m, fmt, ##args); \ | |
99 | }) | |
100 | ||
101 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | |
102 | ({ \ | |
103 | if (to_dmesg) \ | |
104 | printk(KERN_CONT fmt, ##args); \ | |
105 | else \ | |
106 | if (m) \ | |
107 | seq_printf(m, fmt, ##args); \ | |
108 | }) | |
109 | ||
926e5392 AV |
110 | /* |
111 | * Print a readable form of a pgprot_t to the seq_file | |
112 | */ | |
ef6bea6d | 113 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
926e5392 | 114 | { |
fe770bf0 PA |
115 | pgprotval_t pr = pgprot_val(prot); |
116 | static const char * const level_name[] = | |
117 | { "cr3", "pgd", "pud", "pmd", "pte" }; | |
118 | ||
119 | if (!pgprot_val(prot)) { | |
120 | /* Not present */ | |
ef6bea6d | 121 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
122 | } else { |
123 | if (pr & _PAGE_USER) | |
ef6bea6d | 124 | pt_dump_cont_printf(m, dmsg, "USR "); |
926e5392 | 125 | else |
ef6bea6d | 126 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 127 | if (pr & _PAGE_RW) |
ef6bea6d | 128 | pt_dump_cont_printf(m, dmsg, "RW "); |
fe770bf0 | 129 | else |
ef6bea6d | 130 | pt_dump_cont_printf(m, dmsg, "ro "); |
fe770bf0 | 131 | if (pr & _PAGE_PWT) |
ef6bea6d | 132 | pt_dump_cont_printf(m, dmsg, "PWT "); |
fe770bf0 | 133 | else |
ef6bea6d | 134 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 135 | if (pr & _PAGE_PCD) |
ef6bea6d | 136 | pt_dump_cont_printf(m, dmsg, "PCD "); |
926e5392 | 137 | else |
ef6bea6d | 138 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
139 | |
140 | /* Bit 9 has a different meaning on level 3 vs 4 */ | |
141 | if (level <= 3) { | |
142 | if (pr & _PAGE_PSE) | |
ef6bea6d | 143 | pt_dump_cont_printf(m, dmsg, "PSE "); |
fe770bf0 | 144 | else |
ef6bea6d | 145 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
146 | } else { |
147 | if (pr & _PAGE_PAT) | |
ef6bea6d | 148 | pt_dump_cont_printf(m, dmsg, "pat "); |
fe770bf0 | 149 | else |
ef6bea6d | 150 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 PA |
151 | } |
152 | if (pr & _PAGE_GLOBAL) | |
ef6bea6d | 153 | pt_dump_cont_printf(m, dmsg, "GLB "); |
fe770bf0 | 154 | else |
ef6bea6d | 155 | pt_dump_cont_printf(m, dmsg, " "); |
fe770bf0 | 156 | if (pr & _PAGE_NX) |
ef6bea6d | 157 | pt_dump_cont_printf(m, dmsg, "NX "); |
fe770bf0 | 158 | else |
ef6bea6d | 159 | pt_dump_cont_printf(m, dmsg, "x "); |
926e5392 | 160 | } |
ef6bea6d | 161 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
926e5392 AV |
162 | } |
163 | ||
164 | /* | |
fe770bf0 | 165 | * On 64 bits, sign-extend the 48 bit address to 64 bit |
926e5392 | 166 | */ |
fe770bf0 | 167 | static unsigned long normalize_addr(unsigned long u) |
926e5392 | 168 | { |
fe770bf0 PA |
169 | #ifdef CONFIG_X86_64 |
170 | return (signed long)(u << 16) >> 16; | |
171 | #else | |
926e5392 | 172 | return u; |
fe770bf0 | 173 | #endif |
926e5392 AV |
174 | } |
175 | ||
176 | /* | |
177 | * This function gets called on a break in a continuous series | |
178 | * of PTE entries; the next one is different so we need to | |
179 | * print what we collected so far. | |
180 | */ | |
181 | static void note_page(struct seq_file *m, struct pg_state *st, | |
fe770bf0 | 182 | pgprot_t new_prot, int level) |
926e5392 | 183 | { |
fe770bf0 PA |
184 | pgprotval_t prot, cur; |
185 | static const char units[] = "KMGTPE"; | |
926e5392 AV |
186 | |
187 | /* | |
188 | * If we have a "break" in the series, we need to flush the state that | |
fe770bf0 PA |
189 | * we have now. "break" is either changing perms, levels or |
190 | * address space marker. | |
926e5392 | 191 | */ |
27990eac JF |
192 | prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; |
193 | cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; | |
926e5392 | 194 | |
fe770bf0 PA |
195 | if (!st->level) { |
196 | /* First entry */ | |
197 | st->current_prot = new_prot; | |
198 | st->level = level; | |
199 | st->marker = address_markers; | |
ef6bea6d BP |
200 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
201 | st->marker->name); | |
fe770bf0 PA |
202 | } else if (prot != cur || level != st->level || |
203 | st->current_address >= st->marker[1].start_address) { | |
204 | const char *unit = units; | |
926e5392 | 205 | unsigned long delta; |
6424fb38 | 206 | int width = sizeof(unsigned long) * 2; |
926e5392 | 207 | |
926e5392 AV |
208 | /* |
209 | * Now print the actual finished series | |
210 | */ | |
ef6bea6d BP |
211 | pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", |
212 | width, st->start_address, | |
213 | width, st->current_address); | |
926e5392 AV |
214 | |
215 | delta = (st->current_address - st->start_address) >> 10; | |
fe770bf0 PA |
216 | while (!(delta & 1023) && unit[1]) { |
217 | delta >>= 10; | |
218 | unit++; | |
926e5392 | 219 | } |
ef6bea6d BP |
220 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); |
221 | printk_prot(m, st->current_prot, st->level, st->to_dmesg); | |
fe770bf0 PA |
222 | |
223 | /* | |
224 | * We print markers for special areas of address space, | |
225 | * such as the start of vmalloc space etc. | |
226 | * This helps in the interpretation. | |
227 | */ | |
228 | if (st->current_address >= st->marker[1].start_address) { | |
229 | st->marker++; | |
ef6bea6d BP |
230 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
231 | st->marker->name); | |
926e5392 | 232 | } |
fe770bf0 | 233 | |
926e5392 AV |
234 | st->start_address = st->current_address; |
235 | st->current_prot = new_prot; | |
236 | st->level = level; | |
fe770bf0 | 237 | } |
926e5392 AV |
238 | } |
239 | ||
fe770bf0 | 240 | static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr, |
926e5392 AV |
241 | unsigned long P) |
242 | { | |
243 | int i; | |
244 | pte_t *start; | |
245 | ||
246 | start = (pte_t *) pmd_page_vaddr(addr); | |
247 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
248 | pgprot_t prot = pte_pgprot(*start); | |
249 | ||
fe770bf0 | 250 | st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT); |
926e5392 AV |
251 | note_page(m, st, prot, 4); |
252 | start++; | |
253 | } | |
254 | } | |
255 | ||
fe770bf0 | 256 | #if PTRS_PER_PMD > 1 |
926e5392 | 257 | |
fe770bf0 | 258 | static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, |
926e5392 AV |
259 | unsigned long P) |
260 | { | |
261 | int i; | |
262 | pmd_t *start; | |
263 | ||
264 | start = (pmd_t *) pud_page_vaddr(addr); | |
265 | for (i = 0; i < PTRS_PER_PMD; i++) { | |
fe770bf0 | 266 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
926e5392 | 267 | if (!pmd_none(*start)) { |
77be1fab | 268 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 269 | |
fe770bf0 | 270 | if (pmd_large(*start) || !pmd_present(*start)) |
926e5392 AV |
271 | note_page(m, st, __pgprot(prot), 3); |
272 | else | |
fe770bf0 PA |
273 | walk_pte_level(m, st, *start, |
274 | P + i * PMD_LEVEL_MULT); | |
926e5392 AV |
275 | } else |
276 | note_page(m, st, __pgprot(0), 3); | |
277 | start++; | |
278 | } | |
279 | } | |
280 | ||
fe770bf0 PA |
281 | #else |
282 | #define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p) | |
283 | #define pud_large(a) pmd_large(__pmd(pud_val(a))) | |
284 | #define pud_none(a) pmd_none(__pmd(pud_val(a))) | |
285 | #endif | |
926e5392 | 286 | |
fe770bf0 PA |
287 | #if PTRS_PER_PUD > 1 |
288 | ||
289 | static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |
926e5392 AV |
290 | unsigned long P) |
291 | { | |
292 | int i; | |
293 | pud_t *start; | |
294 | ||
295 | start = (pud_t *) pgd_page_vaddr(addr); | |
296 | ||
297 | for (i = 0; i < PTRS_PER_PUD; i++) { | |
fe770bf0 | 298 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
926e5392 | 299 | if (!pud_none(*start)) { |
77be1fab | 300 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
926e5392 | 301 | |
fe770bf0 | 302 | if (pud_large(*start) || !pud_present(*start)) |
926e5392 AV |
303 | note_page(m, st, __pgprot(prot), 2); |
304 | else | |
fe770bf0 PA |
305 | walk_pmd_level(m, st, *start, |
306 | P + i * PUD_LEVEL_MULT); | |
926e5392 AV |
307 | } else |
308 | note_page(m, st, __pgprot(0), 2); | |
309 | ||
310 | start++; | |
311 | } | |
312 | } | |
313 | ||
fe770bf0 PA |
314 | #else |
315 | #define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p) | |
316 | #define pgd_large(a) pud_large(__pud(pgd_val(a))) | |
317 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | |
318 | #endif | |
319 | ||
ef6bea6d | 320 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) |
926e5392 | 321 | { |
fe770bf0 | 322 | #ifdef CONFIG_X86_64 |
926e5392 | 323 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
fe770bf0 PA |
324 | #else |
325 | pgd_t *start = swapper_pg_dir; | |
326 | #endif | |
926e5392 | 327 | int i; |
ef6bea6d | 328 | struct pg_state st = {}; |
926e5392 | 329 | |
ef6bea6d BP |
330 | if (pgd) { |
331 | start = pgd; | |
332 | st.to_dmesg = true; | |
333 | } | |
926e5392 AV |
334 | |
335 | for (i = 0; i < PTRS_PER_PGD; i++) { | |
fe770bf0 PA |
336 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
337 | if (!pgd_none(*start)) { | |
77be1fab | 338 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
fe770bf0 PA |
339 | |
340 | if (pgd_large(*start) || !pgd_present(*start)) | |
341 | note_page(m, &st, __pgprot(prot), 1); | |
342 | else | |
343 | walk_pud_level(m, &st, *start, | |
344 | i * PGD_LEVEL_MULT); | |
345 | } else | |
926e5392 | 346 | note_page(m, &st, __pgprot(0), 1); |
fe770bf0 | 347 | |
926e5392 AV |
348 | start++; |
349 | } | |
fe770bf0 PA |
350 | |
351 | /* Flush out the last page */ | |
352 | st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT); | |
353 | note_page(m, &st, __pgprot(0), 0); | |
926e5392 AV |
354 | } |
355 | ||
356 | static int ptdump_show(struct seq_file *m, void *v) | |
357 | { | |
ef6bea6d | 358 | ptdump_walk_pgd_level(m, NULL); |
926e5392 AV |
359 | return 0; |
360 | } | |
361 | ||
362 | static int ptdump_open(struct inode *inode, struct file *filp) | |
363 | { | |
364 | return single_open(filp, ptdump_show, NULL); | |
365 | } | |
366 | ||
367 | static const struct file_operations ptdump_fops = { | |
368 | .open = ptdump_open, | |
369 | .read = seq_read, | |
370 | .llseek = seq_lseek, | |
371 | .release = single_release, | |
372 | }; | |
373 | ||
a4928cff | 374 | static int pt_dump_init(void) |
926e5392 AV |
375 | { |
376 | struct dentry *pe; | |
377 | ||
fe770bf0 PA |
378 | #ifdef CONFIG_X86_32 |
379 | /* Not a compile-time constant on x86-32 */ | |
92851e2f AS |
380 | address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; |
381 | address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; | |
fe770bf0 | 382 | # ifdef CONFIG_HIGHMEM |
92851e2f | 383 | address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; |
fe770bf0 | 384 | # endif |
92851e2f | 385 | address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; |
fe770bf0 PA |
386 | #endif |
387 | ||
926e5392 AV |
388 | pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, |
389 | &ptdump_fops); | |
390 | if (!pe) | |
391 | return -ENOMEM; | |
392 | ||
393 | return 0; | |
394 | } | |
395 | ||
396 | __initcall(pt_dump_init); | |
397 | MODULE_LICENSE("GPL"); | |
398 | MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); | |
399 | MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables"); |