]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/mm/fault_64.c
x86: add is_f00f_bug helper to fault_32|64.c
[net-next-2.6.git] / arch / x86 / mm / fault_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
4 */
5
1da177e4
LT
6#include <linux/signal.h>
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/types.h>
12#include <linux/ptrace.h>
13#include <linux/mman.h>
14#include <linux/mm.h>
15#include <linux/smp.h>
1da177e4
LT
16#include <linux/interrupt.h>
17#include <linux/init.h>
18#include <linux/tty.h>
19#include <linux/vt_kern.h> /* For unblank_screen() */
20#include <linux/compiler.h>
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
0f2fbdcb 23#include <linux/kprobes.h>
ab2bf0c1 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4
LT
28#include <asm/pgalloc.h>
29#include <asm/smp.h>
30#include <asm/tlbflush.h>
31#include <asm/proto.h>
1da177e4 32#include <asm-generic/sections.h>
1da177e4 33
33cb5243
HH
34/*
35 * Page fault error code bits
36 * bit 0 == 0 means no page found, 1 means protection fault
37 * bit 1 == 0 means read, 1 means write
38 * bit 2 == 0 means kernel, 1 means user-mode
39 * bit 3 == 1 means use of reserved bit detected
40 * bit 4 == 1 means fault was an instruction fetch
41 */
8a19da7b 42#define PF_PROT (1<<0)
66c58156 43#define PF_WRITE (1<<1)
8a19da7b
IM
44#define PF_USER (1<<2)
45#define PF_RSVD (1<<3)
66c58156
AK
46#define PF_INSTR (1<<4)
47
74a0b576 48static inline int notify_page_fault(struct pt_regs *regs)
1bd858a5 49{
33cb5243 50#ifdef CONFIG_KPROBES
74a0b576
CH
51 int ret = 0;
52
53 /* kprobe_running() needs smp_processor_id() */
54 if (!user_mode(regs)) {
55 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 14))
57 ret = 1;
58 preempt_enable();
59 }
1bd858a5 60
74a0b576 61 return ret;
74a0b576 62#else
74a0b576 63 return 0;
74a0b576 64#endif
33cb5243 65}
1bd858a5 66
1dc85be0
HH
67/*
68 * X86_32
69 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
70 * Check that here and ignore it.
71 *
72 * X86_64
73 * Sometimes the CPU reports invalid exceptions on prefetch.
74 * Check that here and ignore it.
75 *
76 * Opcode checker based on code by Richard Brunner
77 */
78static int is_prefetch(struct pt_regs *regs, unsigned long addr,
79 unsigned long error_code)
33cb5243 80{
ab2bf0c1 81 unsigned char *instr;
1da177e4 82 int scan_more = 1;
33cb5243 83 int prefetch = 0;
f1290ec9 84 unsigned char *max_instr;
1da177e4 85
1dc85be0 86#ifdef CONFIG_X86_32
1dc85be0
HH
87 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
88 boot_cpu_data.x86 >= 6)) {
89 /* Catch an obscure case of prefetch inside an NX page. */
90 if (nx_enabled && (error_code & PF_INSTR))
91 return 0;
92 } else {
93 return 0;
94 }
1dc85be0 95#else
1da177e4 96 /* If it was a exec fault ignore */
66c58156 97 if (error_code & PF_INSTR)
1da177e4 98 return 0;
1dc85be0
HH
99#endif
100
f2857ce9 101 instr = (unsigned char *)convert_ip_to_linear(current, regs);
f1290ec9 102 max_instr = instr + 15;
1da177e4 103
76381fee 104 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
1da177e4
LT
105 return 0;
106
33cb5243 107 while (scan_more && instr < max_instr) {
1da177e4
LT
108 unsigned char opcode;
109 unsigned char instr_hi;
110 unsigned char instr_lo;
111
ab2bf0c1 112 if (probe_kernel_address(instr, opcode))
33cb5243 113 break;
1da177e4 114
33cb5243
HH
115 instr_hi = opcode & 0xf0;
116 instr_lo = opcode & 0x0f;
1da177e4
LT
117 instr++;
118
33cb5243 119 switch (instr_hi) {
1da177e4
LT
120 case 0x20:
121 case 0x30:
33cb5243
HH
122 /*
123 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
124 * In X86_64 long mode, the CPU will signal invalid
125 * opcode if some of these prefixes are present so
126 * X86_64 will never get here anyway
127 */
1da177e4
LT
128 scan_more = ((instr_lo & 7) == 0x6);
129 break;
33cb5243 130#ifdef CONFIG_X86_64
1da177e4 131 case 0x40:
33cb5243
HH
132 /*
133 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
134 * Need to figure out under what instruction mode the
135 * instruction was issued. Could check the LDT for lm,
136 * but for now it's good enough to assume that long
137 * mode only uses well known segments or kernel.
138 */
76381fee 139 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
1da177e4 140 break;
33cb5243 141#endif
1da177e4
LT
142 case 0x60:
143 /* 0x64 thru 0x67 are valid prefixes in all modes. */
144 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 145 break;
1da177e4 146 case 0xF0:
1dc85be0 147 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 148 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 149 break;
1da177e4
LT
150 case 0x00:
151 /* Prefetch instruction is 0x0F0D or 0x0F18 */
152 scan_more = 0;
f2857ce9 153
ab2bf0c1 154 if (probe_kernel_address(instr, opcode))
1da177e4
LT
155 break;
156 prefetch = (instr_lo == 0xF) &&
157 (opcode == 0x0D || opcode == 0x18);
33cb5243 158 break;
1da177e4
LT
159 default:
160 scan_more = 0;
161 break;
33cb5243 162 }
1da177e4
LT
163 }
164 return prefetch;
165}
166
c4aba4a8
HH
167static void force_sig_info_fault(int si_signo, int si_code,
168 unsigned long address, struct task_struct *tsk)
169{
170 siginfo_t info;
171
172 info.si_signo = si_signo;
173 info.si_errno = 0;
174 info.si_code = si_code;
175 info.si_addr = (void __user *)address;
176 force_sig_info(si_signo, &info, tsk);
177}
178
33cb5243
HH
179static int bad_address(void *p)
180{
1da177e4 181 unsigned long dummy;
ab2bf0c1 182 return probe_kernel_address((unsigned long *)p, dummy);
33cb5243 183}
1da177e4
LT
184
185void dump_pagetable(unsigned long address)
186{
187 pgd_t *pgd;
188 pud_t *pud;
189 pmd_t *pmd;
190 pte_t *pte;
191
f51c9452 192 pgd = (pgd_t *)read_cr3();
1da177e4 193
33cb5243 194 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
1da177e4 195 pgd += pgd_index(address);
1da177e4 196 if (bad_address(pgd)) goto bad;
d646bce4 197 printk("PGD %lx ", pgd_val(*pgd));
33cb5243 198 if (!pgd_present(*pgd)) goto ret;
1da177e4 199
d2ae5b5f 200 pud = pud_offset(pgd, address);
1da177e4
LT
201 if (bad_address(pud)) goto bad;
202 printk("PUD %lx ", pud_val(*pud));
203 if (!pud_present(*pud)) goto ret;
204
205 pmd = pmd_offset(pud, address);
206 if (bad_address(pmd)) goto bad;
207 printk("PMD %lx ", pmd_val(*pmd));
b1992df3 208 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
1da177e4
LT
209
210 pte = pte_offset_kernel(pmd, address);
211 if (bad_address(pte)) goto bad;
33cb5243 212 printk("PTE %lx", pte_val(*pte));
1da177e4
LT
213ret:
214 printk("\n");
215 return;
216bad:
217 printk("BAD\n");
218}
219
1dc85be0 220#ifdef CONFIG_X86_64
33cb5243 221static const char errata93_warning[] =
1da177e4
LT
222KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
223KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
224KERN_ERR "******* Please consider a BIOS update.\n"
225KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 226#endif
1da177e4
LT
227
228/* Workaround for K8 erratum #93 & buggy BIOS.
229 BIOS SMM functions are required to use a specific workaround
33cb5243
HH
230 to avoid corruption of the 64bit RIP register on C stepping K8.
231 A lot of BIOS that didn't get tested properly miss this.
1da177e4
LT
232 The OS sees this as a page fault with the upper 32bits of RIP cleared.
233 Try to work around it here.
fdfe8aa8
HH
234 Note we only handle faults in kernel here.
235 Does nothing for X86_32
236 */
33cb5243 237static int is_errata93(struct pt_regs *regs, unsigned long address)
1da177e4 238{
fdfe8aa8 239#ifdef CONFIG_X86_64
1da177e4 240 static int warned;
65ea5b03 241 if (address != regs->ip)
1da177e4 242 return 0;
33cb5243 243 if ((address >> 32) != 0)
1da177e4
LT
244 return 0;
245 address |= 0xffffffffUL << 32;
33cb5243
HH
246 if ((address >= (u64)_stext && address <= (u64)_etext) ||
247 (address >= MODULES_VADDR && address <= MODULES_END)) {
1da177e4 248 if (!warned) {
33cb5243 249 printk(errata93_warning);
1da177e4
LT
250 warned = 1;
251 }
65ea5b03 252 regs->ip = address;
1da177e4
LT
253 return 1;
254 }
fdfe8aa8 255#endif
1da177e4 256 return 0;
33cb5243 257}
1da177e4 258
29caf2f9
HH
259void do_invalid_op(struct pt_regs *, unsigned long);
260
261static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
262{
263#ifdef CONFIG_X86_F00F_BUG
264 unsigned long nr;
265 /*
266 * Pentium F0 0F C7 C8 bug workaround.
267 */
268 if (boot_cpu_data.f00f_bug) {
269 nr = (address - idt_descr.address) >> 3;
270
271 if (nr == 6) {
272 do_invalid_op(regs, 0);
273 return 1;
274 }
275 }
276#endif
277 return 0;
278}
279
1da177e4
LT
280static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
281 unsigned long error_code)
282{
1209140c 283 unsigned long flags = oops_begin();
6e3f3617 284 struct task_struct *tsk;
1209140c 285
1da177e4
LT
286 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
287 current->comm, address);
288 dump_pagetable(address);
6e3f3617
JB
289 tsk = current;
290 tsk->thread.cr2 = address;
291 tsk->thread.trap_no = 14;
292 tsk->thread.error_code = error_code;
22f5991c
JB
293 if (__die("Bad pagetable", regs, error_code))
294 regs = NULL;
295 oops_end(flags, regs, SIGKILL);
1da177e4
LT
296}
297
298/*
f95190b2 299 * Handle a fault on the vmalloc area
3b9ba4d5
AK
300 *
301 * This assumes no large pages in there.
1da177e4
LT
302 */
303static int vmalloc_fault(unsigned long address)
304{
fdfe8aa8
HH
305#ifdef CONFIG_X86_32
306 unsigned long pgd_paddr;
307 pmd_t *pmd_k;
308 pte_t *pte_k;
309 /*
310 * Synchronize this task's top level page-table
311 * with the 'reference' page table.
312 *
313 * Do _not_ use "current" here. We might be inside
314 * an interrupt in the middle of a task switch..
315 */
316 pgd_paddr = read_cr3();
317 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
318 if (!pmd_k)
319 return -1;
320 pte_k = pte_offset_kernel(pmd_k, address);
321 if (!pte_present(*pte_k))
322 return -1;
323 return 0;
324#else
1da177e4
LT
325 pgd_t *pgd, *pgd_ref;
326 pud_t *pud, *pud_ref;
327 pmd_t *pmd, *pmd_ref;
328 pte_t *pte, *pte_ref;
329
330 /* Copy kernel mappings over when needed. This can also
331 happen within a race in page table update. In the later
332 case just flush. */
333
334 pgd = pgd_offset(current->mm ?: &init_mm, address);
335 pgd_ref = pgd_offset_k(address);
336 if (pgd_none(*pgd_ref))
337 return -1;
338 if (pgd_none(*pgd))
339 set_pgd(pgd, *pgd_ref);
8c914cb7 340 else
46a82b2d 341 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
1da177e4
LT
342
343 /* Below here mismatches are bugs because these lower tables
344 are shared */
345
346 pud = pud_offset(pgd, address);
347 pud_ref = pud_offset(pgd_ref, address);
348 if (pud_none(*pud_ref))
349 return -1;
46a82b2d 350 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
1da177e4
LT
351 BUG();
352 pmd = pmd_offset(pud, address);
353 pmd_ref = pmd_offset(pud_ref, address);
354 if (pmd_none(*pmd_ref))
355 return -1;
356 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
357 BUG();
358 pte_ref = pte_offset_kernel(pmd_ref, address);
359 if (!pte_present(*pte_ref))
360 return -1;
361 pte = pte_offset_kernel(pmd, address);
3b9ba4d5
AK
362 /* Don't use pte_page here, because the mappings can point
363 outside mem_map, and the NUMA hash lookup cannot handle
364 that. */
365 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
1da177e4 366 BUG();
1da177e4 367 return 0;
fdfe8aa8 368#endif
1da177e4
LT
369}
370
abd4f750 371int show_unhandled_signals = 1;
1da177e4
LT
372
373/*
374 * This routine handles page faults. It determines the address,
375 * and the problem, and then passes it off to one of the appropriate
376 * routines.
1da177e4 377 */
0f2fbdcb
PP
378asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
379 unsigned long error_code)
1da177e4
LT
380{
381 struct task_struct *tsk;
382 struct mm_struct *mm;
33cb5243 383 struct vm_area_struct *vma;
1da177e4 384 unsigned long address;
83c54070 385 int write, fault;
1209140c 386 unsigned long flags;
c4aba4a8 387 int si_code;
1da177e4 388
143a5d32
PZ
389 /*
390 * We can fault from pretty much anywhere, with unknown IRQ state.
391 */
392 trace_hardirqs_fixup();
393
a9ba9a3b
AV
394 tsk = current;
395 mm = tsk->mm;
396 prefetchw(&mm->mmap_sem);
397
1da177e4 398 /* get the address */
f51c9452 399 address = read_cr2();
1da177e4 400
c4aba4a8 401 si_code = SEGV_MAPERR;
1da177e4 402
608566b4
HH
403 if (notify_page_fault(regs))
404 return;
1da177e4
LT
405
406 /*
407 * We fault-in kernel-space virtual memory on-demand. The
408 * 'reference' page table is init_mm.pgd.
409 *
410 * NOTE! We MUST NOT take any locks for this case. We may
411 * be in an interrupt or a critical region, and should
412 * only copy the information from the master page table,
413 * nothing more.
414 *
415 * This verifies that the fault happens in kernel space
416 * (error_code & 4) == 0, and that the fault was not a
8b1bde93 417 * protection error (error_code & 9) == 0.
1da177e4 418 */
84929801 419 if (unlikely(address >= TASK_SIZE64)) {
f95190b2
AK
420 /*
421 * Don't check for the module range here: its PML4
422 * is always initialized because it's shared with the main
423 * kernel text. Only vmalloc may need PML4 syncups.
424 */
66c58156 425 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
f95190b2 426 ((address >= VMALLOC_START && address < VMALLOC_END))) {
8c914cb7
JB
427 if (vmalloc_fault(address) >= 0)
428 return;
1da177e4
LT
429 }
430 /*
431 * Don't take the mm semaphore here. If we fixup a prefetch
432 * fault we could otherwise deadlock.
433 */
434 goto bad_area_nosemaphore;
435 }
436
65ea5b03 437 if (likely(regs->flags & X86_EFLAGS_IF))
8c914cb7
JB
438 local_irq_enable();
439
66c58156 440 if (unlikely(error_code & PF_RSVD))
1da177e4
LT
441 pgtable_bad(address, regs, error_code);
442
443 /*
33cb5243
HH
444 * If we're in an interrupt, have no user context or are running in an
445 * atomic region then we must not take the fault.
1da177e4
LT
446 */
447 if (unlikely(in_atomic() || !mm))
448 goto bad_area_nosemaphore;
449
dbe3ed1c
LT
450 /*
451 * User-mode registers count as a user access even for any
452 * potential system fault or CPU buglet.
453 */
454 if (user_mode_vm(regs))
455 error_code |= PF_USER;
456
1da177e4
LT
457 again:
458 /* When running in the kernel we expect faults to occur only to
459 * addresses in user space. All other faults represent errors in the
676b1855 460 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 461 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
462 * we will deadlock attempting to validate the fault against the
463 * address space. Luckily the kernel only validly references user
464 * space from well defined areas of code, which are listed in the
465 * exceptions table.
466 *
467 * As the vast majority of faults will be valid we will only perform
676b1855 468 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
469 * Attempt to lock the address space, if we cannot we then validate the
470 * source. If this is invalid we can skip the address space check,
471 * thus avoiding the deadlock.
472 */
473 if (!down_read_trylock(&mm->mmap_sem)) {
66c58156 474 if ((error_code & PF_USER) == 0 &&
65ea5b03 475 !search_exception_tables(regs->ip))
1da177e4
LT
476 goto bad_area_nosemaphore;
477 down_read(&mm->mmap_sem);
478 }
479
480 vma = find_vma(mm, address);
481 if (!vma)
482 goto bad_area;
483 if (likely(vma->vm_start <= address))
484 goto good_area;
485 if (!(vma->vm_flags & VM_GROWSDOWN))
486 goto bad_area;
33cb5243 487 if (error_code & PF_USER) {
6f4d368e
HH
488 /*
489 * Accessing the stack below %sp is always a bug.
490 * The large cushion allows instructions like enter
491 * and pusha to work. ("enter $65535,$31" pushes
492 * 32 pointers and then decrements %sp by 65535.)
03fdc2c2 493 */
65ea5b03 494 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
495 goto bad_area;
496 }
497 if (expand_stack(vma, address))
498 goto bad_area;
499/*
500 * Ok, we have a good vm_area for this memory access, so
501 * we can handle it..
502 */
503good_area:
c4aba4a8 504 si_code = SEGV_ACCERR;
1da177e4 505 write = 0;
66c58156 506 switch (error_code & (PF_PROT|PF_WRITE)) {
33cb5243
HH
507 default: /* 3: write, present */
508 /* fall through */
509 case PF_WRITE: /* write, not present */
510 if (!(vma->vm_flags & VM_WRITE))
511 goto bad_area;
512 write++;
513 break;
514 case PF_PROT: /* read, present */
515 goto bad_area;
516 case 0: /* read, not present */
517 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 518 goto bad_area;
1da177e4
LT
519 }
520
521 /*
522 * If for any reason at all we couldn't handle the fault,
523 * make sure we exit gracefully rather than endlessly redo
524 * the fault.
525 */
83c54070
NP
526 fault = handle_mm_fault(mm, vma, address, write);
527 if (unlikely(fault & VM_FAULT_ERROR)) {
528 if (fault & VM_FAULT_OOM)
529 goto out_of_memory;
530 else if (fault & VM_FAULT_SIGBUS)
531 goto do_sigbus;
532 BUG();
1da177e4 533 }
83c54070
NP
534 if (fault & VM_FAULT_MAJOR)
535 tsk->maj_flt++;
536 else
537 tsk->min_flt++;
d729ab35
HH
538
539#ifdef CONFIG_X86_32
540 /*
541 * Did it hit the DOS screen memory VA from vm86 mode?
542 */
543 if (v8086_mode(regs)) {
544 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
545 if (bit < 32)
546 tsk->thread.screen_bitmap |= 1 << bit;
547 }
548#endif
1da177e4
LT
549 up_read(&mm->mmap_sem);
550 return;
551
552/*
553 * Something tried to access memory that isn't in our memory map..
554 * Fix it, but check if it's kernel or user first..
555 */
556bad_area:
557 up_read(&mm->mmap_sem);
558
559bad_area_nosemaphore:
1da177e4 560 /* User mode accesses just cause a SIGSEGV */
66c58156 561 if (error_code & PF_USER) {
e5e3c84b
SR
562
563 /*
564 * It's possible to have interrupts off here.
565 */
566 local_irq_enable();
567
1da177e4
LT
568 if (is_prefetch(regs, address, error_code))
569 return;
570
571 /* Work around K8 erratum #100 K8 in compat mode
572 occasionally jumps to illegal addresses >4GB. We
573 catch this here in the page fault handler because
574 these addresses are not reachable. Just detect this
575 case and return. Any code segment in LDT is
576 compatibility mode. */
577 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
578 (address >> 32))
579 return;
580
abd4f750
MAS
581 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
582 printk_ratelimit()) {
1da177e4 583 printk(
6f4d368e 584#ifdef CONFIG_X86_32
edcd8119 585 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e 586#else
03252919 587 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
6f4d368e
HH
588#endif
589 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
590 tsk->comm, task_pid_nr(tsk), address, regs->ip,
591 regs->sp, error_code);
03252919
AK
592 print_vma_addr(" in ", regs->ip);
593 printk("\n");
1da177e4 594 }
33cb5243 595
1da177e4
LT
596 tsk->thread.cr2 = address;
597 /* Kernel addresses are always protection faults */
598 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
599 tsk->thread.trap_no = 14;
c4aba4a8
HH
600
601 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
602 return;
603 }
604
29caf2f9
HH
605 if (is_f00f_bug(regs, address))
606 return;
607
1da177e4 608no_context:
1da177e4 609 /* Are we prepared to handle this kernel fault? */
33cb5243 610 if (fixup_exception(regs))
1da177e4 611 return;
1da177e4 612
33cb5243 613 /*
1da177e4
LT
614 * Hall of shame of CPU/BIOS bugs.
615 */
616
33cb5243
HH
617 if (is_prefetch(regs, address, error_code))
618 return;
1da177e4
LT
619
620 if (is_errata93(regs, address))
33cb5243 621 return;
1da177e4
LT
622
623/*
624 * Oops. The kernel tried to access some bad page. We'll have to
625 * terminate things with extreme prejudice.
626 */
627
1209140c 628 flags = oops_begin();
1da177e4
LT
629
630 if (address < PAGE_SIZE)
631 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
632 else
633 printk(KERN_ALERT "Unable to handle kernel paging request");
33cb5243 634 printk(" at %016lx RIP: \n" KERN_ALERT, address);
518edc93 635 printk_address(regs->ip, 1);
1da177e4 636 dump_pagetable(address);
6e3f3617
JB
637 tsk->thread.cr2 = address;
638 tsk->thread.trap_no = 14;
639 tsk->thread.error_code = error_code;
22f5991c
JB
640 if (__die("Oops", regs, error_code))
641 regs = NULL;
1da177e4
LT
642 /* Executive summary in case the body of the oops scrolled away */
643 printk(KERN_EMERG "CR2: %016lx\n", address);
22f5991c 644 oops_end(flags, regs, SIGKILL);
1da177e4
LT
645
646/*
647 * We ran out of memory, or some other thing happened to us that made
648 * us unable to handle the page fault gracefully.
649 */
650out_of_memory:
651 up_read(&mm->mmap_sem);
b460cbc5 652 if (is_global_init(current)) {
1da177e4
LT
653 yield();
654 goto again;
655 }
656 printk("VM: killing process %s\n", tsk->comm);
318aa296 657 if (error_code & PF_USER)
021daae2 658 do_group_exit(SIGKILL);
1da177e4
LT
659 goto no_context;
660
661do_sigbus:
662 up_read(&mm->mmap_sem);
663
664 /* Kernel mode? Handle exceptions or die */
66c58156 665 if (!(error_code & PF_USER))
1da177e4
LT
666 goto no_context;
667
668 tsk->thread.cr2 = address;
669 tsk->thread.error_code = error_code;
670 tsk->thread.trap_no = 14;
c4aba4a8 671 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
1da177e4
LT
672 return;
673}
9e43e1b7 674
8c914cb7 675DEFINE_SPINLOCK(pgd_lock);
2bff7383 676LIST_HEAD(pgd_list);
8c914cb7
JB
677
678void vmalloc_sync_all(void)
679{
6f4d368e
HH
680 /*
681 * Note that races in the updates of insync and start aren't
682 * problematic: insync can only get set bits added, and updates to
683 * start are only improving performance (without affecting correctness
684 * if undone).
685 */
8c914cb7
JB
686 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
687 static unsigned long start = VMALLOC_START & PGDIR_MASK;
688 unsigned long address;
689
690 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
691 if (!test_bit(pgd_index(address), insync)) {
692 const pgd_t *pgd_ref = pgd_offset_k(address);
693 struct page *page;
694
695 if (pgd_none(*pgd_ref))
696 continue;
697 spin_lock(&pgd_lock);
2bff7383 698 list_for_each_entry(page, &pgd_list, lru) {
8c914cb7
JB
699 pgd_t *pgd;
700 pgd = (pgd_t *)page_address(page) + pgd_index(address);
701 if (pgd_none(*pgd))
702 set_pgd(pgd, *pgd_ref);
703 else
46a82b2d 704 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8c914cb7
JB
705 }
706 spin_unlock(&pgd_lock);
707 set_bit(pgd_index(address), insync);
708 }
709 if (address == start)
710 start = address + PGDIR_SIZE;
711 }
712 /* Check that there is no need to do the same for the modules area. */
713 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
33cb5243 714 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
8c914cb7
JB
715 (__START_KERNEL & PGDIR_MASK)));
716}