]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - arch/i386/mm/fault.c
Fix lazy mode vmalloc synchronization for paravirt
[net-next-2.6.git] / arch / i386 / mm / fault.c
index b8c4e259fc8b7929e5ebeb1ea028c9d25a033646..fcb38e7f35434113896abc3caad887c9aad665d1 100644 (file)
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/tty.h>
 #include <linux/vt_kern.h>             /* For unblank_screen() */
 #include <linux/highmem.h>
+#include <linux/bootmem.h>             /* for max_low_pfn */
+#include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
+#include <linux/kdebug.h>
 
 #include <asm/system.h>
 #include <asm/desc.h>
-#include <asm/kdebug.h>
 #include <asm/segment.h>
 
 extern void die(const char *,struct pt_regs *,long);
@@ -248,9 +249,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
        pmd_k = pmd_offset(pud_k, address);
        if (!pmd_present(*pmd_k))
                return NULL;
-       if (!pmd_present(*pmd))
+       if (!pmd_present(*pmd)) {
                set_pmd(pmd, *pmd_k);
-       else
+               arch_flush_lazy_mmu_mode();
+       } else
                BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
        return pmd_k;
 }
@@ -282,6 +284,8 @@ static inline int vmalloc_fault(unsigned long address)
        return 0;
 }
 
+int show_unhandled_signals = 1;
+
 /*
  * This routine handles page faults.  It determines the address,
  * and the problem, and then passes it off to one of the appropriate
@@ -301,8 +305,8 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
        struct mm_struct *mm;
        struct vm_area_struct * vma;
        unsigned long address;
-       unsigned long page;
        int write, si_code;
+       int fault;
 
        /* get the address */
         address = read_cr2();
@@ -422,20 +426,18 @@ good_area:
         * make sure we exit gracefully rather than endlessly redo
         * the fault.
         */
-       switch (handle_mm_fault(mm, vma, address, write)) {
-               case VM_FAULT_MINOR:
-                       tsk->min_flt++;
-                       break;
-               case VM_FAULT_MAJOR:
-                       tsk->maj_flt++;
-                       break;
-               case VM_FAULT_SIGBUS:
-                       goto do_sigbus;
-               case VM_FAULT_OOM:
+       fault = handle_mm_fault(mm, vma, address, write);
+       if (unlikely(fault & VM_FAULT_ERROR)) {
+               if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
-               default:
-                       BUG();
+               else if (fault & VM_FAULT_SIGBUS)
+                       goto do_sigbus;
+               BUG();
        }
+       if (fault & VM_FAULT_MAJOR)
+               tsk->maj_flt++;
+       else
+               tsk->min_flt++;
 
        /*
         * Did it hit the DOS screen memory VA from vm86 mode?
@@ -458,6 +460,11 @@ bad_area:
 bad_area_nosemaphore:
        /* User mode accesses just cause a SIGSEGV */
        if (error_code & 4) {
+               /*
+                * It's possible to have interrupts off here.
+                */
+               local_irq_enable();
+
                /* 
                 * Valid to do another page fault here because this one came 
                 * from user space.
@@ -465,6 +472,14 @@ bad_area_nosemaphore:
                if (is_prefetch(regs, address, error_code))
                        return;
 
+               if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+                   printk_ratelimit()) {
+                       printk("%s%s[%d]: segfault at %08lx eip %08lx "
+                           "esp %08lx error %lx\n",
+                           tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
+                           tsk->comm, tsk->pid, address, regs->eip,
+                           regs->esp, error_code);
+               }
                tsk->thread.cr2 = address;
                /* Kernel addresses are always protection faults */
                tsk->thread.error_code = error_code | (address >= TASK_SIZE);
@@ -510,7 +525,9 @@ no_context:
        bust_spinlocks(1);
 
        if (oops_may_print()) {
-       #ifdef CONFIG_X86_PAE
+               __typeof__(pte_val(__pte(0))) page;
+
+#ifdef CONFIG_X86_PAE
                if (error_code & 16) {
                        pte_t *pte = lookup_address(address);
 
@@ -519,7 +536,7 @@ no_context:
                                        "NX-protected page - exploit attempt? "
                                        "(uid: %d)\n", current->uid);
                }
-       #endif
+#endif
                if (address < PAGE_SIZE)
                        printk(KERN_ALERT "BUG: unable to handle kernel NULL "
                                        "pointer dereference");
@@ -529,25 +546,38 @@ no_context:
                printk(" at virtual address %08lx\n",address);
                printk(KERN_ALERT " printing eip:\n");
                printk("%08lx\n", regs->eip);
-       }
-       page = read_cr3();
-       page = ((unsigned long *) __va(page))[address >> 22];
-       if (oops_may_print())
+
+               page = read_cr3();
+               page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+#ifdef CONFIG_X86_PAE
+               printk(KERN_ALERT "*pdpt = %016Lx\n", page);
+               if ((page >> PAGE_SHIFT) < max_low_pfn
+                   && page & _PAGE_PRESENT) {
+                       page &= PAGE_MASK;
+                       page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+                                                                & (PTRS_PER_PMD - 1)];
+                       printk(KERN_ALERT "*pde = %016Lx\n", page);
+                       page &= ~_PAGE_NX;
+               }
+#else
                printk(KERN_ALERT "*pde = %08lx\n", page);
-       /*
-        * We must not directly access the pte in the highpte
-        * case, the page table might be allocated in highmem.
-        * And lets rather not kmap-atomic the pte, just in case
-        * it's allocated already.
-        */
-#ifndef CONFIG_HIGHPTE
-       if ((page & 1) && oops_may_print()) {
-               page &= PAGE_MASK;
-               address &= 0x003ff000;
-               page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
-               printk(KERN_ALERT "*pte = %08lx\n", page);
-       }
 #endif
+
+               /*
+                * We must not directly access the pte in the highpte
+                * case if the page table is located in highmem.
+                * And let's rather not kmap-atomic the pte, just in case
+                * it's allocated already.
+                */
+               if ((page >> PAGE_SHIFT) < max_low_pfn
+                   && (page & _PAGE_PRESENT)) {
+                       page &= PAGE_MASK;
+                       page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+                                                                & (PTRS_PER_PTE - 1)];
+                       printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page);
+               }
+       }
+
        tsk->thread.cr2 = address;
        tsk->thread.trap_no = 14;
        tsk->thread.error_code = error_code;
@@ -588,7 +618,6 @@ do_sigbus:
        force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
 }
 
-#ifndef CONFIG_X86_PAE
 void vmalloc_sync_all(void)
 {
        /*
@@ -601,6 +630,9 @@ void vmalloc_sync_all(void)
        static unsigned long start = TASK_SIZE;
        unsigned long address;
 
+       if (SHARED_KERNEL_PMD)
+               return;
+
        BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
        for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
                if (!test_bit(pgd_index(address), insync)) {
@@ -623,4 +655,3 @@ void vmalloc_sync_all(void)
                        start = address + PGDIR_SIZE;
        }
 }
-#endif