]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/mm/fault_64.c
x86: introduce show_fault_oops helper to fault_32|64.c
[net-next-2.6.git] / arch / x86 / mm / fault_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
4 */
5
1da177e4
LT
6#include <linux/signal.h>
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/types.h>
12#include <linux/ptrace.h>
13#include <linux/mman.h>
14#include <linux/mm.h>
15#include <linux/smp.h>
1da177e4
LT
16#include <linux/interrupt.h>
17#include <linux/init.h>
18#include <linux/tty.h>
19#include <linux/vt_kern.h> /* For unblank_screen() */
20#include <linux/compiler.h>
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
0f2fbdcb 23#include <linux/kprobes.h>
ab2bf0c1 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4
LT
28#include <asm/pgalloc.h>
29#include <asm/smp.h>
30#include <asm/tlbflush.h>
31#include <asm/proto.h>
1da177e4 32#include <asm-generic/sections.h>
1da177e4 33
33cb5243
HH
34/*
35 * Page fault error code bits
36 * bit 0 == 0 means no page found, 1 means protection fault
37 * bit 1 == 0 means read, 1 means write
38 * bit 2 == 0 means kernel, 1 means user-mode
39 * bit 3 == 1 means use of reserved bit detected
40 * bit 4 == 1 means fault was an instruction fetch
41 */
8a19da7b 42#define PF_PROT (1<<0)
66c58156 43#define PF_WRITE (1<<1)
8a19da7b
IM
44#define PF_USER (1<<2)
45#define PF_RSVD (1<<3)
66c58156
AK
46#define PF_INSTR (1<<4)
47
74a0b576 48static inline int notify_page_fault(struct pt_regs *regs)
1bd858a5 49{
33cb5243 50#ifdef CONFIG_KPROBES
74a0b576
CH
51 int ret = 0;
52
53 /* kprobe_running() needs smp_processor_id() */
54 if (!user_mode(regs)) {
55 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 14))
57 ret = 1;
58 preempt_enable();
59 }
1bd858a5 60
74a0b576 61 return ret;
74a0b576 62#else
74a0b576 63 return 0;
74a0b576 64#endif
33cb5243 65}
1bd858a5 66
1dc85be0
HH
67/*
68 * X86_32
69 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
70 * Check that here and ignore it.
71 *
72 * X86_64
73 * Sometimes the CPU reports invalid exceptions on prefetch.
74 * Check that here and ignore it.
75 *
76 * Opcode checker based on code by Richard Brunner
77 */
78static int is_prefetch(struct pt_regs *regs, unsigned long addr,
79 unsigned long error_code)
33cb5243 80{
ab2bf0c1 81 unsigned char *instr;
1da177e4 82 int scan_more = 1;
33cb5243 83 int prefetch = 0;
f1290ec9 84 unsigned char *max_instr;
1da177e4 85
1dc85be0 86#ifdef CONFIG_X86_32
1dc85be0
HH
87 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
88 boot_cpu_data.x86 >= 6)) {
89 /* Catch an obscure case of prefetch inside an NX page. */
90 if (nx_enabled && (error_code & PF_INSTR))
91 return 0;
92 } else {
93 return 0;
94 }
1dc85be0 95#else
1da177e4 96 /* If it was a exec fault ignore */
66c58156 97 if (error_code & PF_INSTR)
1da177e4 98 return 0;
1dc85be0
HH
99#endif
100
f2857ce9 101 instr = (unsigned char *)convert_ip_to_linear(current, regs);
f1290ec9 102 max_instr = instr + 15;
1da177e4 103
76381fee 104 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
1da177e4
LT
105 return 0;
106
33cb5243 107 while (scan_more && instr < max_instr) {
1da177e4
LT
108 unsigned char opcode;
109 unsigned char instr_hi;
110 unsigned char instr_lo;
111
ab2bf0c1 112 if (probe_kernel_address(instr, opcode))
33cb5243 113 break;
1da177e4 114
33cb5243
HH
115 instr_hi = opcode & 0xf0;
116 instr_lo = opcode & 0x0f;
1da177e4
LT
117 instr++;
118
33cb5243 119 switch (instr_hi) {
1da177e4
LT
120 case 0x20:
121 case 0x30:
33cb5243
HH
122 /*
123 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
124 * In X86_64 long mode, the CPU will signal invalid
125 * opcode if some of these prefixes are present so
126 * X86_64 will never get here anyway
127 */
1da177e4
LT
128 scan_more = ((instr_lo & 7) == 0x6);
129 break;
33cb5243 130#ifdef CONFIG_X86_64
1da177e4 131 case 0x40:
33cb5243
HH
132 /*
133 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
134 * Need to figure out under what instruction mode the
135 * instruction was issued. Could check the LDT for lm,
136 * but for now it's good enough to assume that long
137 * mode only uses well known segments or kernel.
138 */
76381fee 139 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
1da177e4 140 break;
33cb5243 141#endif
1da177e4
LT
142 case 0x60:
143 /* 0x64 thru 0x67 are valid prefixes in all modes. */
144 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 145 break;
1da177e4 146 case 0xF0:
1dc85be0 147 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 148 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 149 break;
1da177e4
LT
150 case 0x00:
151 /* Prefetch instruction is 0x0F0D or 0x0F18 */
152 scan_more = 0;
f2857ce9 153
ab2bf0c1 154 if (probe_kernel_address(instr, opcode))
1da177e4
LT
155 break;
156 prefetch = (instr_lo == 0xF) &&
157 (opcode == 0x0D || opcode == 0x18);
33cb5243 158 break;
1da177e4
LT
159 default:
160 scan_more = 0;
161 break;
33cb5243 162 }
1da177e4
LT
163 }
164 return prefetch;
165}
166
c4aba4a8
HH
167static void force_sig_info_fault(int si_signo, int si_code,
168 unsigned long address, struct task_struct *tsk)
169{
170 siginfo_t info;
171
172 info.si_signo = si_signo;
173 info.si_errno = 0;
174 info.si_code = si_code;
175 info.si_addr = (void __user *)address;
176 force_sig_info(si_signo, &info, tsk);
177}
178
33cb5243
HH
179static int bad_address(void *p)
180{
1da177e4 181 unsigned long dummy;
ab2bf0c1 182 return probe_kernel_address((unsigned long *)p, dummy);
33cb5243 183}
1da177e4
LT
184
185void dump_pagetable(unsigned long address)
186{
187 pgd_t *pgd;
188 pud_t *pud;
189 pmd_t *pmd;
190 pte_t *pte;
191
f51c9452 192 pgd = (pgd_t *)read_cr3();
1da177e4 193
33cb5243 194 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
1da177e4 195 pgd += pgd_index(address);
1da177e4 196 if (bad_address(pgd)) goto bad;
d646bce4 197 printk("PGD %lx ", pgd_val(*pgd));
33cb5243 198 if (!pgd_present(*pgd)) goto ret;
1da177e4 199
d2ae5b5f 200 pud = pud_offset(pgd, address);
1da177e4
LT
201 if (bad_address(pud)) goto bad;
202 printk("PUD %lx ", pud_val(*pud));
203 if (!pud_present(*pud)) goto ret;
204
205 pmd = pmd_offset(pud, address);
206 if (bad_address(pmd)) goto bad;
207 printk("PMD %lx ", pmd_val(*pmd));
b1992df3 208 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
1da177e4
LT
209
210 pte = pte_offset_kernel(pmd, address);
211 if (bad_address(pte)) goto bad;
33cb5243 212 printk("PTE %lx", pte_val(*pte));
1da177e4
LT
213ret:
214 printk("\n");
215 return;
216bad:
217 printk("BAD\n");
218}
219
1dc85be0 220#ifdef CONFIG_X86_64
33cb5243 221static const char errata93_warning[] =
1da177e4
LT
222KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
223KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
224KERN_ERR "******* Please consider a BIOS update.\n"
225KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 226#endif
1da177e4
LT
227
228/* Workaround for K8 erratum #93 & buggy BIOS.
229 BIOS SMM functions are required to use a specific workaround
33cb5243
HH
230 to avoid corruption of the 64bit RIP register on C stepping K8.
231 A lot of BIOS that didn't get tested properly miss this.
1da177e4
LT
232 The OS sees this as a page fault with the upper 32bits of RIP cleared.
233 Try to work around it here.
fdfe8aa8
HH
234 Note we only handle faults in kernel here.
235 Does nothing for X86_32
236 */
33cb5243 237static int is_errata93(struct pt_regs *regs, unsigned long address)
1da177e4 238{
fdfe8aa8 239#ifdef CONFIG_X86_64
1da177e4 240 static int warned;
65ea5b03 241 if (address != regs->ip)
1da177e4 242 return 0;
33cb5243 243 if ((address >> 32) != 0)
1da177e4
LT
244 return 0;
245 address |= 0xffffffffUL << 32;
33cb5243
HH
246 if ((address >= (u64)_stext && address <= (u64)_etext) ||
247 (address >= MODULES_VADDR && address <= MODULES_END)) {
1da177e4 248 if (!warned) {
33cb5243 249 printk(errata93_warning);
1da177e4
LT
250 warned = 1;
251 }
65ea5b03 252 regs->ip = address;
1da177e4
LT
253 return 1;
254 }
fdfe8aa8 255#endif
1da177e4 256 return 0;
33cb5243 257}
1da177e4 258
35f3266f
HH
259/*
260 * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
261 * addresses >4GB. We catch this in the page fault handler because these
262 * addresses are not reachable. Just detect this case and return. Any code
263 * segment in LDT is compatibility mode.
264 */
265static int is_errata100(struct pt_regs *regs, unsigned long address)
266{
267#ifdef CONFIG_X86_64
268 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
269 (address >> 32))
270 return 1;
271#endif
272 return 0;
273}
274
29caf2f9
HH
275void do_invalid_op(struct pt_regs *, unsigned long);
276
277static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
278{
279#ifdef CONFIG_X86_F00F_BUG
280 unsigned long nr;
281 /*
282 * Pentium F0 0F C7 C8 bug workaround.
283 */
284 if (boot_cpu_data.f00f_bug) {
285 nr = (address - idt_descr.address) >> 3;
286
287 if (nr == 6) {
288 do_invalid_op(regs, 0);
289 return 1;
290 }
291 }
292#endif
293 return 0;
294}
295
b3279c7f
HH
296static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
297 unsigned long address)
298{
299 if (address < PAGE_SIZE)
300 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
301 else
302 printk(KERN_ALERT "Unable to handle kernel paging request");
303 printk(" at %016lx RIP: \n" KERN_ALERT, address);
304 printk_address(regs->ip, 1);
305 dump_pagetable(address);
306}
307
1da177e4
LT
308static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
309 unsigned long error_code)
310{
1209140c 311 unsigned long flags = oops_begin();
6e3f3617 312 struct task_struct *tsk;
1209140c 313
1da177e4
LT
314 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
315 current->comm, address);
316 dump_pagetable(address);
6e3f3617
JB
317 tsk = current;
318 tsk->thread.cr2 = address;
319 tsk->thread.trap_no = 14;
320 tsk->thread.error_code = error_code;
22f5991c
JB
321 if (__die("Bad pagetable", regs, error_code))
322 regs = NULL;
323 oops_end(flags, regs, SIGKILL);
1da177e4
LT
324}
325
326/*
f95190b2 327 * Handle a fault on the vmalloc area
3b9ba4d5
AK
328 *
329 * This assumes no large pages in there.
1da177e4
LT
330 */
331static int vmalloc_fault(unsigned long address)
332{
fdfe8aa8
HH
333#ifdef CONFIG_X86_32
334 unsigned long pgd_paddr;
335 pmd_t *pmd_k;
336 pte_t *pte_k;
337 /*
338 * Synchronize this task's top level page-table
339 * with the 'reference' page table.
340 *
341 * Do _not_ use "current" here. We might be inside
342 * an interrupt in the middle of a task switch..
343 */
344 pgd_paddr = read_cr3();
345 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
346 if (!pmd_k)
347 return -1;
348 pte_k = pte_offset_kernel(pmd_k, address);
349 if (!pte_present(*pte_k))
350 return -1;
351 return 0;
352#else
1da177e4
LT
353 pgd_t *pgd, *pgd_ref;
354 pud_t *pud, *pud_ref;
355 pmd_t *pmd, *pmd_ref;
356 pte_t *pte, *pte_ref;
357
358 /* Copy kernel mappings over when needed. This can also
359 happen within a race in page table update. In the later
360 case just flush. */
361
362 pgd = pgd_offset(current->mm ?: &init_mm, address);
363 pgd_ref = pgd_offset_k(address);
364 if (pgd_none(*pgd_ref))
365 return -1;
366 if (pgd_none(*pgd))
367 set_pgd(pgd, *pgd_ref);
8c914cb7 368 else
46a82b2d 369 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
1da177e4
LT
370
371 /* Below here mismatches are bugs because these lower tables
372 are shared */
373
374 pud = pud_offset(pgd, address);
375 pud_ref = pud_offset(pgd_ref, address);
376 if (pud_none(*pud_ref))
377 return -1;
46a82b2d 378 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
1da177e4
LT
379 BUG();
380 pmd = pmd_offset(pud, address);
381 pmd_ref = pmd_offset(pud_ref, address);
382 if (pmd_none(*pmd_ref))
383 return -1;
384 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
385 BUG();
386 pte_ref = pte_offset_kernel(pmd_ref, address);
387 if (!pte_present(*pte_ref))
388 return -1;
389 pte = pte_offset_kernel(pmd, address);
3b9ba4d5
AK
390 /* Don't use pte_page here, because the mappings can point
391 outside mem_map, and the NUMA hash lookup cannot handle
392 that. */
393 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
1da177e4 394 BUG();
1da177e4 395 return 0;
fdfe8aa8 396#endif
1da177e4
LT
397}
398
abd4f750 399int show_unhandled_signals = 1;
1da177e4
LT
400
401/*
402 * This routine handles page faults. It determines the address,
403 * and the problem, and then passes it off to one of the appropriate
404 * routines.
1da177e4 405 */
0f2fbdcb
PP
406asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
407 unsigned long error_code)
1da177e4
LT
408{
409 struct task_struct *tsk;
410 struct mm_struct *mm;
33cb5243 411 struct vm_area_struct *vma;
1da177e4 412 unsigned long address;
83c54070 413 int write, fault;
1209140c 414 unsigned long flags;
c4aba4a8 415 int si_code;
1da177e4 416
143a5d32
PZ
417 /*
418 * We can fault from pretty much anywhere, with unknown IRQ state.
419 */
420 trace_hardirqs_fixup();
421
a9ba9a3b
AV
422 tsk = current;
423 mm = tsk->mm;
424 prefetchw(&mm->mmap_sem);
425
1da177e4 426 /* get the address */
f51c9452 427 address = read_cr2();
1da177e4 428
c4aba4a8 429 si_code = SEGV_MAPERR;
1da177e4 430
608566b4
HH
431 if (notify_page_fault(regs))
432 return;
1da177e4
LT
433
434 /*
435 * We fault-in kernel-space virtual memory on-demand. The
436 * 'reference' page table is init_mm.pgd.
437 *
438 * NOTE! We MUST NOT take any locks for this case. We may
439 * be in an interrupt or a critical region, and should
440 * only copy the information from the master page table,
441 * nothing more.
442 *
443 * This verifies that the fault happens in kernel space
444 * (error_code & 4) == 0, and that the fault was not a
8b1bde93 445 * protection error (error_code & 9) == 0.
1da177e4 446 */
84929801 447 if (unlikely(address >= TASK_SIZE64)) {
f95190b2
AK
448 /*
449 * Don't check for the module range here: its PML4
450 * is always initialized because it's shared with the main
451 * kernel text. Only vmalloc may need PML4 syncups.
452 */
66c58156 453 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
f95190b2 454 ((address >= VMALLOC_START && address < VMALLOC_END))) {
8c914cb7
JB
455 if (vmalloc_fault(address) >= 0)
456 return;
1da177e4
LT
457 }
458 /*
459 * Don't take the mm semaphore here. If we fixup a prefetch
460 * fault we could otherwise deadlock.
461 */
462 goto bad_area_nosemaphore;
463 }
464
65ea5b03 465 if (likely(regs->flags & X86_EFLAGS_IF))
8c914cb7
JB
466 local_irq_enable();
467
66c58156 468 if (unlikely(error_code & PF_RSVD))
1da177e4
LT
469 pgtable_bad(address, regs, error_code);
470
471 /*
33cb5243
HH
472 * If we're in an interrupt, have no user context or are running in an
473 * atomic region then we must not take the fault.
1da177e4
LT
474 */
475 if (unlikely(in_atomic() || !mm))
476 goto bad_area_nosemaphore;
477
dbe3ed1c
LT
478 /*
479 * User-mode registers count as a user access even for any
480 * potential system fault or CPU buglet.
481 */
482 if (user_mode_vm(regs))
483 error_code |= PF_USER;
484
1da177e4
LT
485 again:
486 /* When running in the kernel we expect faults to occur only to
487 * addresses in user space. All other faults represent errors in the
676b1855 488 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 489 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
490 * we will deadlock attempting to validate the fault against the
491 * address space. Luckily the kernel only validly references user
492 * space from well defined areas of code, which are listed in the
493 * exceptions table.
494 *
495 * As the vast majority of faults will be valid we will only perform
676b1855 496 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
497 * Attempt to lock the address space, if we cannot we then validate the
498 * source. If this is invalid we can skip the address space check,
499 * thus avoiding the deadlock.
500 */
501 if (!down_read_trylock(&mm->mmap_sem)) {
66c58156 502 if ((error_code & PF_USER) == 0 &&
65ea5b03 503 !search_exception_tables(regs->ip))
1da177e4
LT
504 goto bad_area_nosemaphore;
505 down_read(&mm->mmap_sem);
506 }
507
508 vma = find_vma(mm, address);
509 if (!vma)
510 goto bad_area;
511 if (likely(vma->vm_start <= address))
512 goto good_area;
513 if (!(vma->vm_flags & VM_GROWSDOWN))
514 goto bad_area;
33cb5243 515 if (error_code & PF_USER) {
6f4d368e
HH
516 /*
517 * Accessing the stack below %sp is always a bug.
518 * The large cushion allows instructions like enter
519 * and pusha to work. ("enter $65535,$31" pushes
520 * 32 pointers and then decrements %sp by 65535.)
03fdc2c2 521 */
65ea5b03 522 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
523 goto bad_area;
524 }
525 if (expand_stack(vma, address))
526 goto bad_area;
527/*
528 * Ok, we have a good vm_area for this memory access, so
529 * we can handle it..
530 */
531good_area:
c4aba4a8 532 si_code = SEGV_ACCERR;
1da177e4 533 write = 0;
66c58156 534 switch (error_code & (PF_PROT|PF_WRITE)) {
33cb5243
HH
535 default: /* 3: write, present */
536 /* fall through */
537 case PF_WRITE: /* write, not present */
538 if (!(vma->vm_flags & VM_WRITE))
539 goto bad_area;
540 write++;
541 break;
542 case PF_PROT: /* read, present */
543 goto bad_area;
544 case 0: /* read, not present */
545 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 546 goto bad_area;
1da177e4
LT
547 }
548
549 /*
550 * If for any reason at all we couldn't handle the fault,
551 * make sure we exit gracefully rather than endlessly redo
552 * the fault.
553 */
83c54070
NP
554 fault = handle_mm_fault(mm, vma, address, write);
555 if (unlikely(fault & VM_FAULT_ERROR)) {
556 if (fault & VM_FAULT_OOM)
557 goto out_of_memory;
558 else if (fault & VM_FAULT_SIGBUS)
559 goto do_sigbus;
560 BUG();
1da177e4 561 }
83c54070
NP
562 if (fault & VM_FAULT_MAJOR)
563 tsk->maj_flt++;
564 else
565 tsk->min_flt++;
d729ab35
HH
566
567#ifdef CONFIG_X86_32
568 /*
569 * Did it hit the DOS screen memory VA from vm86 mode?
570 */
571 if (v8086_mode(regs)) {
572 unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
573 if (bit < 32)
574 tsk->thread.screen_bitmap |= 1 << bit;
575 }
576#endif
1da177e4
LT
577 up_read(&mm->mmap_sem);
578 return;
579
580/*
581 * Something tried to access memory that isn't in our memory map..
582 * Fix it, but check if it's kernel or user first..
583 */
584bad_area:
585 up_read(&mm->mmap_sem);
586
587bad_area_nosemaphore:
1da177e4 588 /* User mode accesses just cause a SIGSEGV */
66c58156 589 if (error_code & PF_USER) {
e5e3c84b
SR
590
591 /*
592 * It's possible to have interrupts off here.
593 */
594 local_irq_enable();
595
1da177e4
LT
596 if (is_prefetch(regs, address, error_code))
597 return;
598
35f3266f 599 if (is_errata100(regs, address))
1da177e4
LT
600 return;
601
abd4f750
MAS
602 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
603 printk_ratelimit()) {
1da177e4 604 printk(
6f4d368e 605#ifdef CONFIG_X86_32
edcd8119 606 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e 607#else
03252919 608 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
6f4d368e
HH
609#endif
610 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
611 tsk->comm, task_pid_nr(tsk), address, regs->ip,
612 regs->sp, error_code);
03252919
AK
613 print_vma_addr(" in ", regs->ip);
614 printk("\n");
1da177e4 615 }
33cb5243 616
1da177e4
LT
617 tsk->thread.cr2 = address;
618 /* Kernel addresses are always protection faults */
619 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
620 tsk->thread.trap_no = 14;
c4aba4a8
HH
621
622 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
623 return;
624 }
625
29caf2f9
HH
626 if (is_f00f_bug(regs, address))
627 return;
628
1da177e4 629no_context:
1da177e4 630 /* Are we prepared to handle this kernel fault? */
33cb5243 631 if (fixup_exception(regs))
1da177e4 632 return;
1da177e4 633
33cb5243 634 /*
1da177e4
LT
635 * Hall of shame of CPU/BIOS bugs.
636 */
637
33cb5243
HH
638 if (is_prefetch(regs, address, error_code))
639 return;
1da177e4
LT
640
641 if (is_errata93(regs, address))
33cb5243 642 return;
1da177e4
LT
643
644/*
645 * Oops. The kernel tried to access some bad page. We'll have to
646 * terminate things with extreme prejudice.
647 */
648
1209140c 649 flags = oops_begin();
1da177e4 650
b3279c7f
HH
651 show_fault_oops(regs, error_code, address);
652
6e3f3617
JB
653 tsk->thread.cr2 = address;
654 tsk->thread.trap_no = 14;
655 tsk->thread.error_code = error_code;
22f5991c
JB
656 if (__die("Oops", regs, error_code))
657 regs = NULL;
1da177e4
LT
658 /* Executive summary in case the body of the oops scrolled away */
659 printk(KERN_EMERG "CR2: %016lx\n", address);
22f5991c 660 oops_end(flags, regs, SIGKILL);
1da177e4
LT
661
662/*
663 * We ran out of memory, or some other thing happened to us that made
664 * us unable to handle the page fault gracefully.
665 */
666out_of_memory:
667 up_read(&mm->mmap_sem);
b460cbc5 668 if (is_global_init(current)) {
1da177e4
LT
669 yield();
670 goto again;
671 }
672 printk("VM: killing process %s\n", tsk->comm);
318aa296 673 if (error_code & PF_USER)
021daae2 674 do_group_exit(SIGKILL);
1da177e4
LT
675 goto no_context;
676
677do_sigbus:
678 up_read(&mm->mmap_sem);
679
680 /* Kernel mode? Handle exceptions or die */
66c58156 681 if (!(error_code & PF_USER))
1da177e4
LT
682 goto no_context;
683
684 tsk->thread.cr2 = address;
685 tsk->thread.error_code = error_code;
686 tsk->thread.trap_no = 14;
c4aba4a8 687 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
1da177e4
LT
688 return;
689}
9e43e1b7 690
8c914cb7 691DEFINE_SPINLOCK(pgd_lock);
2bff7383 692LIST_HEAD(pgd_list);
8c914cb7
JB
693
694void vmalloc_sync_all(void)
695{
6f4d368e
HH
696 /*
697 * Note that races in the updates of insync and start aren't
698 * problematic: insync can only get set bits added, and updates to
699 * start are only improving performance (without affecting correctness
700 * if undone).
701 */
8c914cb7
JB
702 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
703 static unsigned long start = VMALLOC_START & PGDIR_MASK;
704 unsigned long address;
705
706 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
707 if (!test_bit(pgd_index(address), insync)) {
708 const pgd_t *pgd_ref = pgd_offset_k(address);
709 struct page *page;
710
711 if (pgd_none(*pgd_ref))
712 continue;
713 spin_lock(&pgd_lock);
2bff7383 714 list_for_each_entry(page, &pgd_list, lru) {
8c914cb7
JB
715 pgd_t *pgd;
716 pgd = (pgd_t *)page_address(page) + pgd_index(address);
717 if (pgd_none(*pgd))
718 set_pgd(pgd, *pgd_ref);
719 else
46a82b2d 720 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8c914cb7
JB
721 }
722 spin_unlock(&pgd_lock);
723 set_bit(pgd_index(address), insync);
724 }
725 if (address == start)
726 start = address + PGDIR_SIZE;
727 }
728 /* Check that there is no need to do the same for the modules area. */
729 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
33cb5243 730 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
8c914cb7
JB
731 (__START_KERNEL & PGDIR_MASK)));
732}