x86: add is_f00f_bug helper to fault_32|64.c
[deliverable/linux.git] / arch / x86 / mm / fault_32.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 */
4
5#include <linux/signal.h>
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
9#include <linux/string.h>
10#include <linux/types.h>
11#include <linux/ptrace.h>
12#include <linux/mman.h>
13#include <linux/mm.h>
14#include <linux/smp.h>
1da177e4
LT
15#include <linux/interrupt.h>
16#include <linux/init.h>
17#include <linux/tty.h>
18#include <linux/vt_kern.h> /* For unblank_screen() */
19#include <linux/highmem.h>
28609f6e 20#include <linux/bootmem.h> /* for max_low_pfn */
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
3d97ae5b 23#include <linux/kprobes.h>
11a4180c 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4 28#include <asm/desc.h>
78be3706 29#include <asm/segment.h>
1da177e4 30
33cb5243
HH
31/*
32 * Page fault error code bits
33 * bit 0 == 0 means no page found, 1 means protection fault
34 * bit 1 == 0 means read, 1 means write
35 * bit 2 == 0 means kernel, 1 means user-mode
36 * bit 3 == 1 means use of reserved bit detected
37 * bit 4 == 1 means fault was an instruction fetch
38 */
6f4d368e 39#define PF_PROT (1<<0)
33cb5243 40#define PF_WRITE (1<<1)
6f4d368e
HH
41#define PF_USER (1<<2)
42#define PF_RSVD (1<<3)
33cb5243
HH
43#define PF_INSTR (1<<4)
44
74a0b576 45static inline int notify_page_fault(struct pt_regs *regs)
b71b5b65 46{
33cb5243 47#ifdef CONFIG_KPROBES
74a0b576
CH
48 int ret = 0;
49
50 /* kprobe_running() needs smp_processor_id() */
51 if (!user_mode_vm(regs)) {
52 preempt_disable();
53 if (kprobe_running() && kprobe_fault_handler(regs, 14))
54 ret = 1;
55 preempt_enable();
56 }
b71b5b65 57
74a0b576 58 return ret;
74a0b576 59#else
74a0b576 60 return 0;
74a0b576 61#endif
33cb5243 62}
b71b5b65 63
33cb5243 64/*
1dc85be0 65 * X86_32
1da177e4
LT
66 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
67 * Check that here and ignore it.
1dc85be0
HH
68 *
69 * X86_64
70 * Sometimes the CPU reports invalid exceptions on prefetch.
71 * Check that here and ignore it.
72 *
73 * Opcode checker based on code by Richard Brunner
1da177e4 74 */
1dc85be0
HH
75static int is_prefetch(struct pt_regs *regs, unsigned long addr,
76 unsigned long error_code)
33cb5243 77{
1dc85be0 78 unsigned char *instr;
1da177e4 79 int scan_more = 1;
33cb5243 80 int prefetch = 0;
1dc85be0
HH
81 unsigned char *max_instr;
82
83#ifdef CONFIG_X86_32
1dc85be0
HH
84 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
85 boot_cpu_data.x86 >= 6)) {
86 /* Catch an obscure case of prefetch inside an NX page. */
87 if (nx_enabled && (error_code & PF_INSTR))
88 return 0;
89 } else {
90 return 0;
91 }
1dc85be0
HH
92#else
93 /* If it was a exec fault ignore */
94 if (error_code & PF_INSTR)
95 return 0;
1dc85be0 96#endif
1da177e4 97
f2857ce9 98 instr = (unsigned char *)convert_ip_to_linear(current, regs);
1dc85be0
HH
99 max_instr = instr + 15;
100
1dc85be0
HH
101 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
102 return 0;
1dc85be0
HH
103
104 while (scan_more && instr < max_instr) {
1da177e4
LT
105 unsigned char opcode;
106 unsigned char instr_hi;
107 unsigned char instr_lo;
108
11a4180c 109 if (probe_kernel_address(instr, opcode))
33cb5243 110 break;
1da177e4 111
33cb5243
HH
112 instr_hi = opcode & 0xf0;
113 instr_lo = opcode & 0x0f;
1da177e4
LT
114 instr++;
115
33cb5243 116 switch (instr_hi) {
1da177e4
LT
117 case 0x20:
118 case 0x30:
33cb5243
HH
119 /*
120 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
121 * In X86_64 long mode, the CPU will signal invalid
122 * opcode if some of these prefixes are present so
123 * X86_64 will never get here anyway
124 */
1da177e4
LT
125 scan_more = ((instr_lo & 7) == 0x6);
126 break;
33cb5243
HH
127#ifdef CONFIG_X86_64
128 case 0x40:
129 /*
130 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
131 * Need to figure out under what instruction mode the
132 * instruction was issued. Could check the LDT for lm,
133 * but for now it's good enough to assume that long
134 * mode only uses well known segments or kernel.
135 */
136 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
137 break;
138#endif
1da177e4
LT
139 case 0x60:
140 /* 0x64 thru 0x67 are valid prefixes in all modes. */
141 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 142 break;
1da177e4 143 case 0xF0:
33cb5243 144 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 145 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 146 break;
1da177e4
LT
147 case 0x00:
148 /* Prefetch instruction is 0x0F0D or 0x0F18 */
149 scan_more = 0;
f2857ce9 150
11a4180c 151 if (probe_kernel_address(instr, opcode))
1da177e4
LT
152 break;
153 prefetch = (instr_lo == 0xF) &&
154 (opcode == 0x0D || opcode == 0x18);
33cb5243 155 break;
1da177e4
LT
156 default:
157 scan_more = 0;
158 break;
33cb5243 159 }
1da177e4
LT
160 }
161 return prefetch;
162}
163
c4aba4a8 164static void force_sig_info_fault(int si_signo, int si_code,
869f96a0
IM
165 unsigned long address, struct task_struct *tsk)
166{
167 siginfo_t info;
168
169 info.si_signo = si_signo;
170 info.si_errno = 0;
171 info.si_code = si_code;
172 info.si_addr = (void __user *)address;
173 force_sig_info(si_signo, &info, tsk);
174}
175
e66a9512
HH
176void dump_pagetable(unsigned long address)
177{
178 __typeof__(pte_val(__pte(0))) page;
179
180 page = read_cr3();
181 page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
182#ifdef CONFIG_X86_PAE
183 printk("*pdpt = %016Lx ", page);
184 if ((page >> PAGE_SHIFT) < max_low_pfn
185 && page & _PAGE_PRESENT) {
186 page &= PAGE_MASK;
187 page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
188 & (PTRS_PER_PMD - 1)];
189 printk(KERN_CONT "*pde = %016Lx ", page);
190 page &= ~_PAGE_NX;
191 }
192#else
193 printk("*pde = %08lx ", page);
194#endif
195
196 /*
197 * We must not directly access the pte in the highpte
198 * case if the page table is located in highmem.
199 * And let's rather not kmap-atomic the pte, just in case
200 * it's allocated already.
201 */
202 if ((page >> PAGE_SHIFT) < max_low_pfn
203 && (page & _PAGE_PRESENT)
204 && !(page & _PAGE_PSE)) {
205 page &= PAGE_MASK;
206 page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
207 & (PTRS_PER_PTE - 1)];
208 printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
209 }
210
211 printk("\n");
212}
213
101f12af
JB
214static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
215{
216 unsigned index = pgd_index(address);
217 pgd_t *pgd_k;
218 pud_t *pud, *pud_k;
219 pmd_t *pmd, *pmd_k;
220
221 pgd += index;
222 pgd_k = init_mm.pgd + index;
223
224 if (!pgd_present(*pgd_k))
225 return NULL;
226
227 /*
228 * set_pgd(pgd, *pgd_k); here would be useless on PAE
229 * and redundant with the set_pmd() on non-PAE. As would
230 * set_pud.
231 */
232
233 pud = pud_offset(pgd, address);
234 pud_k = pud_offset(pgd_k, address);
235 if (!pud_present(*pud_k))
236 return NULL;
237
238 pmd = pmd_offset(pud, address);
239 pmd_k = pmd_offset(pud_k, address);
240 if (!pmd_present(*pmd_k))
241 return NULL;
8b14cb99 242 if (!pmd_present(*pmd)) {
101f12af 243 set_pmd(pmd, *pmd_k);
8b14cb99
ZA
244 arch_flush_lazy_mmu_mode();
245 } else
101f12af
JB
246 BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
247 return pmd_k;
248}
249
1dc85be0
HH
250#ifdef CONFIG_X86_64
251static const char errata93_warning[] =
252KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
253KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
254KERN_ERR "******* Please consider a BIOS update.\n"
255KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 256#endif
1dc85be0
HH
257
258/* Workaround for K8 erratum #93 & buggy BIOS.
259 BIOS SMM functions are required to use a specific workaround
260 to avoid corruption of the 64bit RIP register on C stepping K8.
261 A lot of BIOS that didn't get tested properly miss this.
262 The OS sees this as a page fault with the upper 32bits of RIP cleared.
263 Try to work around it here.
fdfe8aa8
HH
264 Note we only handle faults in kernel here.
265 Does nothing for X86_32
266 */
1dc85be0
HH
267static int is_errata93(struct pt_regs *regs, unsigned long address)
268{
fdfe8aa8 269#ifdef CONFIG_X86_64
1dc85be0
HH
270 static int warned;
271 if (address != regs->ip)
272 return 0;
273 if ((address >> 32) != 0)
274 return 0;
275 address |= 0xffffffffUL << 32;
276 if ((address >= (u64)_stext && address <= (u64)_etext) ||
277 (address >= MODULES_VADDR && address <= MODULES_END)) {
278 if (!warned) {
279 printk(errata93_warning);
280 warned = 1;
281 }
282 regs->ip = address;
283 return 1;
284 }
fdfe8aa8 285#endif
1dc85be0
HH
286 return 0;
287}
fdfe8aa8 288
29caf2f9
HH
289void do_invalid_op(struct pt_regs *, unsigned long);
290
291static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
292{
293#ifdef CONFIG_X86_F00F_BUG
294 unsigned long nr;
295 /*
296 * Pentium F0 0F C7 C8 bug workaround.
297 */
298 if (boot_cpu_data.f00f_bug) {
299 nr = (address - idt_descr.address) >> 3;
300
301 if (nr == 6) {
302 do_invalid_op(regs, 0);
303 return 1;
304 }
305 }
306#endif
307 return 0;
308}
1dc85be0 309
101f12af
JB
310/*
311 * Handle a fault on the vmalloc or module mapping area
312 *
313 * This assumes no large pages in there.
314 */
315static inline int vmalloc_fault(unsigned long address)
316{
fdfe8aa8 317#ifdef CONFIG_X86_32
101f12af
JB
318 unsigned long pgd_paddr;
319 pmd_t *pmd_k;
320 pte_t *pte_k;
321 /*
322 * Synchronize this task's top level page-table
323 * with the 'reference' page table.
324 *
325 * Do _not_ use "current" here. We might be inside
326 * an interrupt in the middle of a task switch..
327 */
328 pgd_paddr = read_cr3();
329 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
330 if (!pmd_k)
331 return -1;
332 pte_k = pte_offset_kernel(pmd_k, address);
333 if (!pte_present(*pte_k))
334 return -1;
335 return 0;
fdfe8aa8
HH
336#else
337 pgd_t *pgd, *pgd_ref;
338 pud_t *pud, *pud_ref;
339 pmd_t *pmd, *pmd_ref;
340 pte_t *pte, *pte_ref;
341
342 /* Copy kernel mappings over when needed. This can also
343 happen within a race in page table update. In the later
344 case just flush. */
345
346 pgd = pgd_offset(current->mm ?: &init_mm, address);
347 pgd_ref = pgd_offset_k(address);
348 if (pgd_none(*pgd_ref))
349 return -1;
350 if (pgd_none(*pgd))
351 set_pgd(pgd, *pgd_ref);
352 else
353 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
354
355 /* Below here mismatches are bugs because these lower tables
356 are shared */
357
358 pud = pud_offset(pgd, address);
359 pud_ref = pud_offset(pgd_ref, address);
360 if (pud_none(*pud_ref))
361 return -1;
362 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
363 BUG();
364 pmd = pmd_offset(pud, address);
365 pmd_ref = pmd_offset(pud_ref, address);
366 if (pmd_none(*pmd_ref))
367 return -1;
368 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
369 BUG();
370 pte_ref = pte_offset_kernel(pmd_ref, address);
371 if (!pte_present(*pte_ref))
372 return -1;
373 pte = pte_offset_kernel(pmd, address);
374 /* Don't use pte_page here, because the mappings can point
375 outside mem_map, and the NUMA hash lookup cannot handle
376 that. */
377 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
378 BUG();
379 return 0;
380#endif
101f12af
JB
381}
382
abd4f750
MAS
383int show_unhandled_signals = 1;
384
1da177e4
LT
385/*
386 * This routine handles page faults. It determines the address,
387 * and the problem, and then passes it off to one of the appropriate
388 * routines.
1da177e4 389 */
75604d7f 390void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
1da177e4
LT
391{
392 struct task_struct *tsk;
393 struct mm_struct *mm;
33cb5243 394 struct vm_area_struct *vma;
1da177e4 395 unsigned long address;
869f96a0 396 int write, si_code;
83c54070 397 int fault;
1da177e4 398
143a5d32
PZ
399 /*
400 * We can fault from pretty much anywhere, with unknown IRQ state.
401 */
402 trace_hardirqs_fixup();
403
608566b4
HH
404 tsk = current;
405 mm = tsk->mm;
406 prefetchw(&mm->mmap_sem);
407
1da177e4 408 /* get the address */
33cb5243 409 address = read_cr2();
1da177e4 410
869f96a0 411 si_code = SEGV_MAPERR;
1da177e4 412
608566b4
HH
413 if (notify_page_fault(regs))
414 return;
415
1da177e4
LT
416 /*
417 * We fault-in kernel-space virtual memory on-demand. The
418 * 'reference' page table is init_mm.pgd.
419 *
420 * NOTE! We MUST NOT take any locks for this case. We may
421 * be in an interrupt or a critical region, and should
422 * only copy the information from the master page table,
423 * nothing more.
424 *
425 * This verifies that the fault happens in kernel space
426 * (error_code & 4) == 0, and that the fault was not a
101f12af 427 * protection error (error_code & 9) == 0.
1da177e4 428 */
101f12af 429 if (unlikely(address >= TASK_SIZE)) {
318aa296
HH
430 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
431 vmalloc_fault(address) >= 0)
101f12af 432 return;
101f12af 433 /*
1da177e4
LT
434 * Don't take the mm semaphore here. If we fixup a prefetch
435 * fault we could otherwise deadlock.
436 */
437 goto bad_area_nosemaphore;
101f12af
JB
438 }
439
101f12af
JB
440 /* It's safe to allow irq's after cr2 has been saved and the vmalloc
441 fault has been handled. */
65ea5b03 442 if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
101f12af 443 local_irq_enable();
1da177e4 444
1da177e4
LT
445 /*
446 * If we're in an interrupt, have no user context or are running in an
33cb5243 447 * atomic region then we must not take the fault.
1da177e4
LT
448 */
449 if (in_atomic() || !mm)
450 goto bad_area_nosemaphore;
451
452 /* When running in the kernel we expect faults to occur only to
453 * addresses in user space. All other faults represent errors in the
27b46d76 454 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 455 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
456 * we will deadlock attempting to validate the fault against the
457 * address space. Luckily the kernel only validly references user
458 * space from well defined areas of code, which are listed in the
459 * exceptions table.
460 *
461 * As the vast majority of faults will be valid we will only perform
27b46d76 462 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
463 * Attempt to lock the address space, if we cannot we then validate the
464 * source. If this is invalid we can skip the address space check,
465 * thus avoiding the deadlock.
466 */
467 if (!down_read_trylock(&mm->mmap_sem)) {
33cb5243 468 if ((error_code & PF_USER) == 0 &&
65ea5b03 469 !search_exception_tables(regs->ip))
1da177e4
LT
470 goto bad_area_nosemaphore;
471 down_read(&mm->mmap_sem);
472 }
473
474 vma = find_vma(mm, address);
475 if (!vma)
476 goto bad_area;
477 if (vma->vm_start <= address)
478 goto good_area;
479 if (!(vma->vm_flags & VM_GROWSDOWN))
480 goto bad_area;
33cb5243 481 if (error_code & PF_USER) {
1da177e4 482 /*
65ea5b03 483 * Accessing the stack below %sp is always a bug.
21528454
CE
484 * The large cushion allows instructions like enter
485 * and pusha to work. ("enter $65535,$31" pushes
65ea5b03 486 * 32 pointers and then decrements %sp by 65535.)
1da177e4 487 */
65ea5b03 488 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
489 goto bad_area;
490 }
491 if (expand_stack(vma, address))
492 goto bad_area;
493/*
494 * Ok, we have a good vm_area for this memory access, so
495 * we can handle it..
496 */
497good_area:
869f96a0 498 si_code = SEGV_ACCERR;
1da177e4 499 write = 0;
33cb5243
HH
500 switch (error_code & (PF_PROT|PF_WRITE)) {
501 default: /* 3: write, present */
502 /* fall through */
503 case PF_WRITE: /* write, not present */
504 if (!(vma->vm_flags & VM_WRITE))
505 goto bad_area;
506 write++;
507 break;
508 case PF_PROT: /* read, present */
509 goto bad_area;
510 case 0: /* read, not present */
511 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 512 goto bad_area;
1da177e4
LT
513 }
514
515 survive:
516 /*
517 * If for any reason at all we couldn't handle the fault,
518 * make sure we exit gracefully rather than endlessly redo
519 * the fault.
520 */
83c54070
NP
521 fault = handle_mm_fault(mm, vma, address, write);
522 if (unlikely(fault & VM_FAULT_ERROR)) {
523 if (fault & VM_FAULT_OOM)
1da177e4 524 goto out_of_memory;
83c54070
NP
525 else if (fault & VM_FAULT_SIGBUS)
526 goto do_sigbus;
527 BUG();
1da177e4 528 }
83c54070
NP
529 if (fault & VM_FAULT_MAJOR)
530 tsk->maj_flt++;
531 else
532 tsk->min_flt++;
1da177e4 533
d729ab35 534#ifdef CONFIG_X86_32
1da177e4
LT
535 /*
536 * Did it hit the DOS screen memory VA from vm86 mode?
537 */
d729ab35 538 if (v8086_mode(regs)) {
1da177e4
LT
539 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
540 if (bit < 32)
541 tsk->thread.screen_bitmap |= 1 << bit;
542 }
d729ab35 543#endif
1da177e4
LT
544 up_read(&mm->mmap_sem);
545 return;
546
547/*
548 * Something tried to access memory that isn't in our memory map..
549 * Fix it, but check if it's kernel or user first..
550 */
551bad_area:
552 up_read(&mm->mmap_sem);
553
554bad_area_nosemaphore:
555 /* User mode accesses just cause a SIGSEGV */
33cb5243 556 if (error_code & PF_USER) {
e5e3c84b
SR
557 /*
558 * It's possible to have interrupts off here.
559 */
560 local_irq_enable();
561
33cb5243
HH
562 /*
563 * Valid to do another page fault here because this one came
1da177e4
LT
564 * from user space.
565 */
566 if (is_prefetch(regs, address, error_code))
567 return;
568
abd4f750
MAS
569 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
570 printk_ratelimit()) {
6f4d368e
HH
571 printk(
572#ifdef CONFIG_X86_32
edcd8119 573 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e 574#else
03252919 575 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
6f4d368e
HH
576#endif
577 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
578 tsk->comm, task_pid_nr(tsk), address, regs->ip,
579 regs->sp, error_code);
03252919
AK
580 print_vma_addr(" in ", regs->ip);
581 printk("\n");
abd4f750 582 }
1da177e4
LT
583 tsk->thread.cr2 = address;
584 /* Kernel addresses are always protection faults */
585 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
586 tsk->thread.trap_no = 14;
869f96a0 587 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
588 return;
589 }
590
29caf2f9
HH
591 if (is_f00f_bug(regs, address))
592 return;
1da177e4
LT
593
594no_context:
595 /* Are we prepared to handle this kernel fault? */
596 if (fixup_exception(regs))
597 return;
598
33cb5243 599 /*
1da177e4 600 * Valid to do another page fault here, because if this fault
33cb5243 601 * had been triggered by is_prefetch fixup_exception would have
1da177e4
LT
602 * handled it.
603 */
33cb5243
HH
604 if (is_prefetch(regs, address, error_code))
605 return;
1da177e4 606
fdfe8aa8
HH
607 if (is_errata93(regs, address))
608 return;
609
1da177e4
LT
610/*
611 * Oops. The kernel tried to access some bad page. We'll have to
612 * terminate things with extreme prejudice.
613 */
614
615 bust_spinlocks(1);
616
dd287796 617 if (oops_may_print()) {
28609f6e
JB
618
619#ifdef CONFIG_X86_PAE
318aa296 620 if (error_code & PF_INSTR) {
f0646e43
IM
621 int level;
622 pte_t *pte = lookup_address(address, &level);
dd287796 623
4c3c4b45 624 if (pte && pte_present(*pte) && !pte_exec(*pte))
dd287796
AM
625 printk(KERN_CRIT "kernel tried to execute "
626 "NX-protected page - exploit attempt? "
627 "(uid: %d)\n", current->uid);
628 }
28609f6e 629#endif
dd287796
AM
630 if (address < PAGE_SIZE)
631 printk(KERN_ALERT "BUG: unable to handle kernel NULL "
632 "pointer dereference");
633 else
634 printk(KERN_ALERT "BUG: unable to handle kernel paging"
635 " request");
33cb5243 636 printk(" at virtual address %08lx\n", address);
65ea5b03 637 printk(KERN_ALERT "printing ip: %08lx ", regs->ip);
28609f6e 638
e66a9512 639 dump_pagetable(address);
28609f6e
JB
640 }
641
4f339ecb
AN
642 tsk->thread.cr2 = address;
643 tsk->thread.trap_no = 14;
644 tsk->thread.error_code = error_code;
1da177e4
LT
645 die("Oops", regs, error_code);
646 bust_spinlocks(0);
647 do_exit(SIGKILL);
648
649/*
650 * We ran out of memory, or some other thing happened to us that made
651 * us unable to handle the page fault gracefully.
652 */
653out_of_memory:
654 up_read(&mm->mmap_sem);
b460cbc5 655 if (is_global_init(tsk)) {
1da177e4
LT
656 yield();
657 down_read(&mm->mmap_sem);
658 goto survive;
659 }
660 printk("VM: killing process %s\n", tsk->comm);
318aa296 661 if (error_code & PF_USER)
dcca2bde 662 do_group_exit(SIGKILL);
1da177e4
LT
663 goto no_context;
664
665do_sigbus:
666 up_read(&mm->mmap_sem);
667
668 /* Kernel mode? Handle exceptions or die */
33cb5243 669 if (!(error_code & PF_USER))
1da177e4
LT
670 goto no_context;
671
672 /* User space => ok to do another page fault */
673 if (is_prefetch(regs, address, error_code))
674 return;
675
676 tsk->thread.cr2 = address;
677 tsk->thread.error_code = error_code;
678 tsk->thread.trap_no = 14;
869f96a0 679 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
101f12af 680}
1da177e4 681
101f12af
JB
682void vmalloc_sync_all(void)
683{
684 /*
685 * Note that races in the updates of insync and start aren't
686 * problematic: insync can only get set bits added, and updates to
687 * start are only improving performance (without affecting correctness
688 * if undone).
689 */
690 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
691 static unsigned long start = TASK_SIZE;
692 unsigned long address;
1da177e4 693
5311ab62
JF
694 if (SHARED_KERNEL_PMD)
695 return;
696
101f12af
JB
697 BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
698 for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
699 if (!test_bit(pgd_index(address), insync)) {
700 unsigned long flags;
701 struct page *page;
702
703 spin_lock_irqsave(&pgd_lock, flags);
704 for (page = pgd_list; page; page =
705 (struct page *)page->index)
706 if (!vmalloc_sync_one(page_address(page),
707 address)) {
708 BUG_ON(page != pgd_list);
709 break;
710 }
711 spin_unlock_irqrestore(&pgd_lock, flags);
712 if (!page)
713 set_bit(pgd_index(address), insync);
714 }
715 if (address == start && test_bit(pgd_index(address), insync))
716 start = address + PGDIR_SIZE;
1da177e4
LT
717 }
718}
This page took 0.870795 seconds and 5 git commands to generate.