]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/mm/fault_64.c
x86: don't disable the APIC if it hasn't been mapped yet
[net-next-2.6.git] / arch / x86 / mm / fault_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 1995 Linus Torvalds
3 * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
4 */
5
1da177e4
LT
6#include <linux/signal.h>
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/errno.h>
10#include <linux/string.h>
11#include <linux/types.h>
12#include <linux/ptrace.h>
13#include <linux/mman.h>
14#include <linux/mm.h>
15#include <linux/smp.h>
1da177e4
LT
16#include <linux/interrupt.h>
17#include <linux/init.h>
18#include <linux/tty.h>
19#include <linux/vt_kern.h> /* For unblank_screen() */
20#include <linux/compiler.h>
1eeb66a1 21#include <linux/vmalloc.h>
1da177e4 22#include <linux/module.h>
0f2fbdcb 23#include <linux/kprobes.h>
ab2bf0c1 24#include <linux/uaccess.h>
1eeb66a1 25#include <linux/kdebug.h>
1da177e4
LT
26
27#include <asm/system.h>
1da177e4
LT
28#include <asm/pgalloc.h>
29#include <asm/smp.h>
30#include <asm/tlbflush.h>
31#include <asm/proto.h>
1da177e4 32#include <asm-generic/sections.h>
1da177e4 33
33cb5243
HH
34/*
35 * Page fault error code bits
36 * bit 0 == 0 means no page found, 1 means protection fault
37 * bit 1 == 0 means read, 1 means write
38 * bit 2 == 0 means kernel, 1 means user-mode
39 * bit 3 == 1 means use of reserved bit detected
40 * bit 4 == 1 means fault was an instruction fetch
41 */
8a19da7b 42#define PF_PROT (1<<0)
66c58156 43#define PF_WRITE (1<<1)
8a19da7b
IM
44#define PF_USER (1<<2)
45#define PF_RSVD (1<<3)
66c58156
AK
46#define PF_INSTR (1<<4)
47
74a0b576 48static inline int notify_page_fault(struct pt_regs *regs)
1bd858a5 49{
33cb5243 50#ifdef CONFIG_KPROBES
74a0b576
CH
51 int ret = 0;
52
53 /* kprobe_running() needs smp_processor_id() */
54 if (!user_mode(regs)) {
55 preempt_disable();
56 if (kprobe_running() && kprobe_fault_handler(regs, 14))
57 ret = 1;
58 preempt_enable();
59 }
1bd858a5 60
74a0b576 61 return ret;
74a0b576 62#else
74a0b576 63 return 0;
74a0b576 64#endif
33cb5243 65}
1bd858a5 66
1dc85be0
HH
67/*
68 * X86_32
69 * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
70 * Check that here and ignore it.
71 *
72 * X86_64
73 * Sometimes the CPU reports invalid exceptions on prefetch.
74 * Check that here and ignore it.
75 *
76 * Opcode checker based on code by Richard Brunner
77 */
78static int is_prefetch(struct pt_regs *regs, unsigned long addr,
79 unsigned long error_code)
33cb5243 80{
ab2bf0c1 81 unsigned char *instr;
1da177e4 82 int scan_more = 1;
33cb5243 83 int prefetch = 0;
f1290ec9 84 unsigned char *max_instr;
1da177e4 85
1dc85be0 86#ifdef CONFIG_X86_32
1dc85be0
HH
87 if (unlikely(boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
88 boot_cpu_data.x86 >= 6)) {
89 /* Catch an obscure case of prefetch inside an NX page. */
90 if (nx_enabled && (error_code & PF_INSTR))
91 return 0;
92 } else {
93 return 0;
94 }
1dc85be0 95#else
1da177e4 96 /* If it was a exec fault ignore */
66c58156 97 if (error_code & PF_INSTR)
1da177e4 98 return 0;
1dc85be0
HH
99#endif
100
f2857ce9 101 instr = (unsigned char *)convert_ip_to_linear(current, regs);
f1290ec9 102 max_instr = instr + 15;
1da177e4 103
76381fee 104 if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
1da177e4
LT
105 return 0;
106
33cb5243 107 while (scan_more && instr < max_instr) {
1da177e4
LT
108 unsigned char opcode;
109 unsigned char instr_hi;
110 unsigned char instr_lo;
111
ab2bf0c1 112 if (probe_kernel_address(instr, opcode))
33cb5243 113 break;
1da177e4 114
33cb5243
HH
115 instr_hi = opcode & 0xf0;
116 instr_lo = opcode & 0x0f;
1da177e4
LT
117 instr++;
118
33cb5243 119 switch (instr_hi) {
1da177e4
LT
120 case 0x20:
121 case 0x30:
33cb5243
HH
122 /*
123 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
124 * In X86_64 long mode, the CPU will signal invalid
125 * opcode if some of these prefixes are present so
126 * X86_64 will never get here anyway
127 */
1da177e4
LT
128 scan_more = ((instr_lo & 7) == 0x6);
129 break;
33cb5243 130#ifdef CONFIG_X86_64
1da177e4 131 case 0x40:
33cb5243
HH
132 /*
133 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
134 * Need to figure out under what instruction mode the
135 * instruction was issued. Could check the LDT for lm,
136 * but for now it's good enough to assume that long
137 * mode only uses well known segments or kernel.
138 */
76381fee 139 scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
1da177e4 140 break;
33cb5243 141#endif
1da177e4
LT
142 case 0x60:
143 /* 0x64 thru 0x67 are valid prefixes in all modes. */
144 scan_more = (instr_lo & 0xC) == 0x4;
33cb5243 145 break;
1da177e4 146 case 0xF0:
1dc85be0 147 /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
1da177e4 148 scan_more = !instr_lo || (instr_lo>>1) == 1;
33cb5243 149 break;
1da177e4
LT
150 case 0x00:
151 /* Prefetch instruction is 0x0F0D or 0x0F18 */
152 scan_more = 0;
f2857ce9 153
ab2bf0c1 154 if (probe_kernel_address(instr, opcode))
1da177e4
LT
155 break;
156 prefetch = (instr_lo == 0xF) &&
157 (opcode == 0x0D || opcode == 0x18);
33cb5243 158 break;
1da177e4
LT
159 default:
160 scan_more = 0;
161 break;
33cb5243 162 }
1da177e4
LT
163 }
164 return prefetch;
165}
166
c4aba4a8
HH
167static void force_sig_info_fault(int si_signo, int si_code,
168 unsigned long address, struct task_struct *tsk)
169{
170 siginfo_t info;
171
172 info.si_signo = si_signo;
173 info.si_errno = 0;
174 info.si_code = si_code;
175 info.si_addr = (void __user *)address;
176 force_sig_info(si_signo, &info, tsk);
177}
178
33cb5243
HH
179static int bad_address(void *p)
180{
1da177e4 181 unsigned long dummy;
ab2bf0c1 182 return probe_kernel_address((unsigned long *)p, dummy);
33cb5243 183}
1da177e4
LT
184
185void dump_pagetable(unsigned long address)
186{
187 pgd_t *pgd;
188 pud_t *pud;
189 pmd_t *pmd;
190 pte_t *pte;
191
f51c9452 192 pgd = (pgd_t *)read_cr3();
1da177e4 193
33cb5243 194 pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
1da177e4 195 pgd += pgd_index(address);
1da177e4 196 if (bad_address(pgd)) goto bad;
d646bce4 197 printk("PGD %lx ", pgd_val(*pgd));
33cb5243 198 if (!pgd_present(*pgd)) goto ret;
1da177e4 199
d2ae5b5f 200 pud = pud_offset(pgd, address);
1da177e4
LT
201 if (bad_address(pud)) goto bad;
202 printk("PUD %lx ", pud_val(*pud));
203 if (!pud_present(*pud)) goto ret;
204
205 pmd = pmd_offset(pud, address);
206 if (bad_address(pmd)) goto bad;
207 printk("PMD %lx ", pmd_val(*pmd));
b1992df3 208 if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
1da177e4
LT
209
210 pte = pte_offset_kernel(pmd, address);
211 if (bad_address(pte)) goto bad;
33cb5243 212 printk("PTE %lx", pte_val(*pte));
1da177e4
LT
213ret:
214 printk("\n");
215 return;
216bad:
217 printk("BAD\n");
218}
219
1dc85be0 220#ifdef CONFIG_X86_64
33cb5243 221static const char errata93_warning[] =
1da177e4
LT
222KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
223KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
224KERN_ERR "******* Please consider a BIOS update.\n"
225KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
fdfe8aa8 226#endif
1da177e4
LT
227
228/* Workaround for K8 erratum #93 & buggy BIOS.
229 BIOS SMM functions are required to use a specific workaround
33cb5243
HH
230 to avoid corruption of the 64bit RIP register on C stepping K8.
231 A lot of BIOS that didn't get tested properly miss this.
1da177e4
LT
232 The OS sees this as a page fault with the upper 32bits of RIP cleared.
233 Try to work around it here.
fdfe8aa8
HH
234 Note we only handle faults in kernel here.
235 Does nothing for X86_32
236 */
33cb5243 237static int is_errata93(struct pt_regs *regs, unsigned long address)
1da177e4 238{
fdfe8aa8 239#ifdef CONFIG_X86_64
1da177e4 240 static int warned;
65ea5b03 241 if (address != regs->ip)
1da177e4 242 return 0;
33cb5243 243 if ((address >> 32) != 0)
1da177e4
LT
244 return 0;
245 address |= 0xffffffffUL << 32;
33cb5243
HH
246 if ((address >= (u64)_stext && address <= (u64)_etext) ||
247 (address >= MODULES_VADDR && address <= MODULES_END)) {
1da177e4 248 if (!warned) {
33cb5243 249 printk(errata93_warning);
1da177e4
LT
250 warned = 1;
251 }
65ea5b03 252 regs->ip = address;
1da177e4
LT
253 return 1;
254 }
fdfe8aa8 255#endif
1da177e4 256 return 0;
33cb5243 257}
1da177e4 258
1da177e4
LT
259static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
260 unsigned long error_code)
261{
1209140c 262 unsigned long flags = oops_begin();
6e3f3617 263 struct task_struct *tsk;
1209140c 264
1da177e4
LT
265 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
266 current->comm, address);
267 dump_pagetable(address);
6e3f3617
JB
268 tsk = current;
269 tsk->thread.cr2 = address;
270 tsk->thread.trap_no = 14;
271 tsk->thread.error_code = error_code;
22f5991c
JB
272 if (__die("Bad pagetable", regs, error_code))
273 regs = NULL;
274 oops_end(flags, regs, SIGKILL);
1da177e4
LT
275}
276
277/*
f95190b2 278 * Handle a fault on the vmalloc area
3b9ba4d5
AK
279 *
280 * This assumes no large pages in there.
1da177e4
LT
281 */
282static int vmalloc_fault(unsigned long address)
283{
fdfe8aa8
HH
284#ifdef CONFIG_X86_32
285 unsigned long pgd_paddr;
286 pmd_t *pmd_k;
287 pte_t *pte_k;
288 /*
289 * Synchronize this task's top level page-table
290 * with the 'reference' page table.
291 *
292 * Do _not_ use "current" here. We might be inside
293 * an interrupt in the middle of a task switch..
294 */
295 pgd_paddr = read_cr3();
296 pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
297 if (!pmd_k)
298 return -1;
299 pte_k = pte_offset_kernel(pmd_k, address);
300 if (!pte_present(*pte_k))
301 return -1;
302 return 0;
303#else
1da177e4
LT
304 pgd_t *pgd, *pgd_ref;
305 pud_t *pud, *pud_ref;
306 pmd_t *pmd, *pmd_ref;
307 pte_t *pte, *pte_ref;
308
309 /* Copy kernel mappings over when needed. This can also
310 happen within a race in page table update. In the later
311 case just flush. */
312
313 pgd = pgd_offset(current->mm ?: &init_mm, address);
314 pgd_ref = pgd_offset_k(address);
315 if (pgd_none(*pgd_ref))
316 return -1;
317 if (pgd_none(*pgd))
318 set_pgd(pgd, *pgd_ref);
8c914cb7 319 else
46a82b2d 320 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
1da177e4
LT
321
322 /* Below here mismatches are bugs because these lower tables
323 are shared */
324
325 pud = pud_offset(pgd, address);
326 pud_ref = pud_offset(pgd_ref, address);
327 if (pud_none(*pud_ref))
328 return -1;
46a82b2d 329 if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
1da177e4
LT
330 BUG();
331 pmd = pmd_offset(pud, address);
332 pmd_ref = pmd_offset(pud_ref, address);
333 if (pmd_none(*pmd_ref))
334 return -1;
335 if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
336 BUG();
337 pte_ref = pte_offset_kernel(pmd_ref, address);
338 if (!pte_present(*pte_ref))
339 return -1;
340 pte = pte_offset_kernel(pmd, address);
3b9ba4d5
AK
341 /* Don't use pte_page here, because the mappings can point
342 outside mem_map, and the NUMA hash lookup cannot handle
343 that. */
344 if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
1da177e4 345 BUG();
1da177e4 346 return 0;
fdfe8aa8 347#endif
1da177e4
LT
348}
349
abd4f750 350int show_unhandled_signals = 1;
1da177e4
LT
351
352/*
353 * This routine handles page faults. It determines the address,
354 * and the problem, and then passes it off to one of the appropriate
355 * routines.
1da177e4 356 */
0f2fbdcb
PP
357asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
358 unsigned long error_code)
1da177e4
LT
359{
360 struct task_struct *tsk;
361 struct mm_struct *mm;
33cb5243 362 struct vm_area_struct *vma;
1da177e4 363 unsigned long address;
83c54070 364 int write, fault;
1209140c 365 unsigned long flags;
c4aba4a8 366 int si_code;
1da177e4 367
143a5d32
PZ
368 /*
369 * We can fault from pretty much anywhere, with unknown IRQ state.
370 */
371 trace_hardirqs_fixup();
372
a9ba9a3b
AV
373 tsk = current;
374 mm = tsk->mm;
375 prefetchw(&mm->mmap_sem);
376
1da177e4 377 /* get the address */
f51c9452 378 address = read_cr2();
1da177e4 379
c4aba4a8 380 si_code = SEGV_MAPERR;
1da177e4 381
608566b4
HH
382 if (notify_page_fault(regs))
383 return;
1da177e4
LT
384
385 /*
386 * We fault-in kernel-space virtual memory on-demand. The
387 * 'reference' page table is init_mm.pgd.
388 *
389 * NOTE! We MUST NOT take any locks for this case. We may
390 * be in an interrupt or a critical region, and should
391 * only copy the information from the master page table,
392 * nothing more.
393 *
394 * This verifies that the fault happens in kernel space
395 * (error_code & 4) == 0, and that the fault was not a
8b1bde93 396 * protection error (error_code & 9) == 0.
1da177e4 397 */
84929801 398 if (unlikely(address >= TASK_SIZE64)) {
f95190b2
AK
399 /*
400 * Don't check for the module range here: its PML4
401 * is always initialized because it's shared with the main
402 * kernel text. Only vmalloc may need PML4 syncups.
403 */
66c58156 404 if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
f95190b2 405 ((address >= VMALLOC_START && address < VMALLOC_END))) {
8c914cb7
JB
406 if (vmalloc_fault(address) >= 0)
407 return;
1da177e4
LT
408 }
409 /*
410 * Don't take the mm semaphore here. If we fixup a prefetch
411 * fault we could otherwise deadlock.
412 */
413 goto bad_area_nosemaphore;
414 }
415
65ea5b03 416 if (likely(regs->flags & X86_EFLAGS_IF))
8c914cb7
JB
417 local_irq_enable();
418
66c58156 419 if (unlikely(error_code & PF_RSVD))
1da177e4
LT
420 pgtable_bad(address, regs, error_code);
421
422 /*
33cb5243
HH
423 * If we're in an interrupt, have no user context or are running in an
424 * atomic region then we must not take the fault.
1da177e4
LT
425 */
426 if (unlikely(in_atomic() || !mm))
427 goto bad_area_nosemaphore;
428
dbe3ed1c
LT
429 /*
430 * User-mode registers count as a user access even for any
431 * potential system fault or CPU buglet.
432 */
433 if (user_mode_vm(regs))
434 error_code |= PF_USER;
435
1da177e4
LT
436 again:
437 /* When running in the kernel we expect faults to occur only to
438 * addresses in user space. All other faults represent errors in the
676b1855 439 * kernel and should generate an OOPS. Unfortunately, in the case of an
80f7228b 440 * erroneous fault occurring in a code path which already holds mmap_sem
1da177e4
LT
441 * we will deadlock attempting to validate the fault against the
442 * address space. Luckily the kernel only validly references user
443 * space from well defined areas of code, which are listed in the
444 * exceptions table.
445 *
446 * As the vast majority of faults will be valid we will only perform
676b1855 447 * the source reference check when there is a possibility of a deadlock.
1da177e4
LT
448 * Attempt to lock the address space, if we cannot we then validate the
449 * source. If this is invalid we can skip the address space check,
450 * thus avoiding the deadlock.
451 */
452 if (!down_read_trylock(&mm->mmap_sem)) {
66c58156 453 if ((error_code & PF_USER) == 0 &&
65ea5b03 454 !search_exception_tables(regs->ip))
1da177e4
LT
455 goto bad_area_nosemaphore;
456 down_read(&mm->mmap_sem);
457 }
458
459 vma = find_vma(mm, address);
460 if (!vma)
461 goto bad_area;
462 if (likely(vma->vm_start <= address))
463 goto good_area;
464 if (!(vma->vm_flags & VM_GROWSDOWN))
465 goto bad_area;
33cb5243 466 if (error_code & PF_USER) {
6f4d368e
HH
467 /*
468 * Accessing the stack below %sp is always a bug.
469 * The large cushion allows instructions like enter
470 * and pusha to work. ("enter $65535,$31" pushes
471 * 32 pointers and then decrements %sp by 65535.)
03fdc2c2 472 */
65ea5b03 473 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
1da177e4
LT
474 goto bad_area;
475 }
476 if (expand_stack(vma, address))
477 goto bad_area;
478/*
479 * Ok, we have a good vm_area for this memory access, so
480 * we can handle it..
481 */
482good_area:
c4aba4a8 483 si_code = SEGV_ACCERR;
1da177e4 484 write = 0;
66c58156 485 switch (error_code & (PF_PROT|PF_WRITE)) {
33cb5243
HH
486 default: /* 3: write, present */
487 /* fall through */
488 case PF_WRITE: /* write, not present */
489 if (!(vma->vm_flags & VM_WRITE))
490 goto bad_area;
491 write++;
492 break;
493 case PF_PROT: /* read, present */
494 goto bad_area;
495 case 0: /* read, not present */
496 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4 497 goto bad_area;
1da177e4
LT
498 }
499
500 /*
501 * If for any reason at all we couldn't handle the fault,
502 * make sure we exit gracefully rather than endlessly redo
503 * the fault.
504 */
83c54070
NP
505 fault = handle_mm_fault(mm, vma, address, write);
506 if (unlikely(fault & VM_FAULT_ERROR)) {
507 if (fault & VM_FAULT_OOM)
508 goto out_of_memory;
509 else if (fault & VM_FAULT_SIGBUS)
510 goto do_sigbus;
511 BUG();
1da177e4 512 }
83c54070
NP
513 if (fault & VM_FAULT_MAJOR)
514 tsk->maj_flt++;
515 else
516 tsk->min_flt++;
1da177e4
LT
517 up_read(&mm->mmap_sem);
518 return;
519
520/*
521 * Something tried to access memory that isn't in our memory map..
522 * Fix it, but check if it's kernel or user first..
523 */
524bad_area:
525 up_read(&mm->mmap_sem);
526
527bad_area_nosemaphore:
1da177e4 528 /* User mode accesses just cause a SIGSEGV */
66c58156 529 if (error_code & PF_USER) {
e5e3c84b
SR
530
531 /*
532 * It's possible to have interrupts off here.
533 */
534 local_irq_enable();
535
1da177e4
LT
536 if (is_prefetch(regs, address, error_code))
537 return;
538
539 /* Work around K8 erratum #100 K8 in compat mode
540 occasionally jumps to illegal addresses >4GB. We
541 catch this here in the page fault handler because
542 these addresses are not reachable. Just detect this
543 case and return. Any code segment in LDT is
544 compatibility mode. */
545 if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
546 (address >> 32))
547 return;
548
abd4f750
MAS
549 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
550 printk_ratelimit()) {
1da177e4 551 printk(
6f4d368e 552#ifdef CONFIG_X86_32
edcd8119 553 "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
6f4d368e
HH
554#else
555 "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx\n",
556#endif
557 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
558 tsk->comm, task_pid_nr(tsk), address, regs->ip,
559 regs->sp, error_code);
1da177e4 560 }
33cb5243 561
1da177e4
LT
562 tsk->thread.cr2 = address;
563 /* Kernel addresses are always protection faults */
564 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
565 tsk->thread.trap_no = 14;
c4aba4a8
HH
566
567 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
1da177e4
LT
568 return;
569 }
570
571no_context:
1da177e4 572 /* Are we prepared to handle this kernel fault? */
33cb5243 573 if (fixup_exception(regs))
1da177e4 574 return;
1da177e4 575
33cb5243 576 /*
1da177e4
LT
577 * Hall of shame of CPU/BIOS bugs.
578 */
579
33cb5243
HH
580 if (is_prefetch(regs, address, error_code))
581 return;
1da177e4
LT
582
583 if (is_errata93(regs, address))
33cb5243 584 return;
1da177e4
LT
585
586/*
587 * Oops. The kernel tried to access some bad page. We'll have to
588 * terminate things with extreme prejudice.
589 */
590
1209140c 591 flags = oops_begin();
1da177e4
LT
592
593 if (address < PAGE_SIZE)
594 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
595 else
596 printk(KERN_ALERT "Unable to handle kernel paging request");
33cb5243 597 printk(" at %016lx RIP: \n" KERN_ALERT, address);
bc850d6b 598 printk_address(regs->ip, regs->bp);
1da177e4 599 dump_pagetable(address);
6e3f3617
JB
600 tsk->thread.cr2 = address;
601 tsk->thread.trap_no = 14;
602 tsk->thread.error_code = error_code;
22f5991c
JB
603 if (__die("Oops", regs, error_code))
604 regs = NULL;
1da177e4
LT
605 /* Executive summary in case the body of the oops scrolled away */
606 printk(KERN_EMERG "CR2: %016lx\n", address);
22f5991c 607 oops_end(flags, regs, SIGKILL);
1da177e4
LT
608
609/*
610 * We ran out of memory, or some other thing happened to us that made
611 * us unable to handle the page fault gracefully.
612 */
613out_of_memory:
614 up_read(&mm->mmap_sem);
b460cbc5 615 if (is_global_init(current)) {
1da177e4
LT
616 yield();
617 goto again;
618 }
619 printk("VM: killing process %s\n", tsk->comm);
318aa296 620 if (error_code & PF_USER)
021daae2 621 do_group_exit(SIGKILL);
1da177e4
LT
622 goto no_context;
623
624do_sigbus:
625 up_read(&mm->mmap_sem);
626
627 /* Kernel mode? Handle exceptions or die */
66c58156 628 if (!(error_code & PF_USER))
1da177e4
LT
629 goto no_context;
630
631 tsk->thread.cr2 = address;
632 tsk->thread.error_code = error_code;
633 tsk->thread.trap_no = 14;
c4aba4a8 634 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
1da177e4
LT
635 return;
636}
9e43e1b7 637
8c914cb7 638DEFINE_SPINLOCK(pgd_lock);
2bff7383 639LIST_HEAD(pgd_list);
8c914cb7
JB
640
641void vmalloc_sync_all(void)
642{
6f4d368e
HH
643 /*
644 * Note that races in the updates of insync and start aren't
645 * problematic: insync can only get set bits added, and updates to
646 * start are only improving performance (without affecting correctness
647 * if undone).
648 */
8c914cb7
JB
649 static DECLARE_BITMAP(insync, PTRS_PER_PGD);
650 static unsigned long start = VMALLOC_START & PGDIR_MASK;
651 unsigned long address;
652
653 for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
654 if (!test_bit(pgd_index(address), insync)) {
655 const pgd_t *pgd_ref = pgd_offset_k(address);
656 struct page *page;
657
658 if (pgd_none(*pgd_ref))
659 continue;
660 spin_lock(&pgd_lock);
2bff7383 661 list_for_each_entry(page, &pgd_list, lru) {
8c914cb7
JB
662 pgd_t *pgd;
663 pgd = (pgd_t *)page_address(page) + pgd_index(address);
664 if (pgd_none(*pgd))
665 set_pgd(pgd, *pgd_ref);
666 else
46a82b2d 667 BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
8c914cb7
JB
668 }
669 spin_unlock(&pgd_lock);
670 set_bit(pgd_index(address), insync);
671 }
672 if (address == start)
673 start = address + PGDIR_SIZE;
674 }
675 /* Check that there is no need to do the same for the modules area. */
676 BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
33cb5243 677 BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
8c914cb7
JB
678 (__START_KERNEL & PGDIR_MASK)));
679}