]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/ppc/mm/fault.c
pid namespaces: define is_global_init() and is_container_init()
[net-next-2.6.git] / arch / ppc / mm / fault.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * PowerPC version
3 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
4 *
5 * Derived from "arch/i386/mm/fault.c"
6 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
7 *
8 * Modified by Cort Dougan and Paul Mackerras.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
1da177e4
LT
16#include <linux/signal.h>
17#include <linux/sched.h>
18#include <linux/kernel.h>
19#include <linux/errno.h>
20#include <linux/string.h>
21#include <linux/types.h>
22#include <linux/ptrace.h>
23#include <linux/mman.h>
24#include <linux/mm.h>
25#include <linux/interrupt.h>
26#include <linux/highmem.h>
27#include <linux/module.h>
28
29#include <asm/page.h>
30#include <asm/pgtable.h>
31#include <asm/mmu.h>
32#include <asm/mmu_context.h>
33#include <asm/system.h>
34#include <asm/uaccess.h>
35#include <asm/tlbflush.h>
36
37#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
38extern void (*debugger)(struct pt_regs *);
39extern void (*debugger_fault_handler)(struct pt_regs *);
40extern int (*debugger_dabr_match)(struct pt_regs *);
41int debugger_kernel_faults = 1;
42#endif
43
44unsigned long htab_reloads; /* updated by hashtable.S:hash_page() */
45unsigned long htab_evicts; /* updated by hashtable.S:hash_page() */
46unsigned long htab_preloads; /* updated by hashtable.S:add_hash_page() */
47unsigned long pte_misses; /* updated by do_page_fault() */
48unsigned long pte_errors; /* updated by do_page_fault() */
49unsigned int probingmem;
50
51/*
52 * Check whether the instruction at regs->nip is a store using
53 * an update addressing form which will update r1.
54 */
55static int store_updates_sp(struct pt_regs *regs)
56{
57 unsigned int inst;
58
59 if (get_user(inst, (unsigned int __user *)regs->nip))
60 return 0;
61 /* check for 1 in the rA field */
62 if (((inst >> 16) & 0x1f) != 1)
63 return 0;
64 /* check major opcode */
65 switch (inst >> 26) {
66 case 37: /* stwu */
67 case 39: /* stbu */
68 case 45: /* sthu */
69 case 53: /* stfsu */
70 case 55: /* stfdu */
71 return 1;
72 case 31:
73 /* check minor opcode */
74 switch ((inst >> 1) & 0x3ff) {
75 case 183: /* stwux */
76 case 247: /* stbux */
77 case 439: /* sthux */
78 case 695: /* stfsux */
79 case 759: /* stfdux */
80 return 1;
81 }
82 }
83 return 0;
84}
85
86/*
87 * For 600- and 800-family processors, the error_code parameter is DSISR
88 * for a data fault, SRR1 for an instruction fault. For 400-family processors
89 * the error_code parameter is ESR for a data fault, 0 for an instruction
90 * fault.
91 */
92int do_page_fault(struct pt_regs *regs, unsigned long address,
93 unsigned long error_code)
94{
95 struct vm_area_struct * vma;
96 struct mm_struct *mm = current->mm;
97 siginfo_t info;
98 int code = SEGV_MAPERR;
83c54070 99 int fault;
1da177e4
LT
100#if defined(CONFIG_4xx) || defined (CONFIG_BOOKE)
101 int is_write = error_code & ESR_DST;
102#else
103 int is_write = 0;
104
105 /*
106 * Fortunately the bit assignments in SRR1 for an instruction
107 * fault and DSISR for a data fault are mostly the same for the
108 * bits we are interested in. But there are some bits which
109 * indicate errors in DSISR but can validly be set in SRR1.
110 */
111 if (TRAP(regs) == 0x400)
112 error_code &= 0x48200000;
113 else
114 is_write = error_code & 0x02000000;
115#endif /* CONFIG_4xx || CONFIG_BOOKE */
116
117#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
118 if (debugger_fault_handler && TRAP(regs) == 0x300) {
119 debugger_fault_handler(regs);
120 return 0;
121 }
122#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
123 if (error_code & 0x00400000) {
124 /* DABR match */
125 if (debugger_dabr_match(regs))
126 return 0;
127 }
128#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
129#endif /* CONFIG_XMON || CONFIG_KGDB */
130
131 if (in_atomic() || mm == NULL)
132 return SIGSEGV;
133
134 down_read(&mm->mmap_sem);
135 vma = find_vma(mm, address);
136 if (!vma)
137 goto bad_area;
138 if (vma->vm_start <= address)
139 goto good_area;
140 if (!(vma->vm_flags & VM_GROWSDOWN))
141 goto bad_area;
142 if (!is_write)
143 goto bad_area;
144
145 /*
146 * N.B. The rs6000/xcoff ABI allows programs to access up to
147 * a few hundred bytes below the stack pointer.
148 * The kernel signal delivery code writes up to about 1.5kB
149 * below the stack pointer (r1) before decrementing it.
150 * The exec code can write slightly over 640kB to the stack
151 * before setting the user r1. Thus we allow the stack to
152 * expand to 1MB without further checks.
153 */
154 if (address + 0x100000 < vma->vm_end) {
155 /* get user regs even if this fault is in kernel mode */
156 struct pt_regs *uregs = current->thread.regs;
157 if (uregs == NULL)
158 goto bad_area;
159
160 /*
161 * A user-mode access to an address a long way below
162 * the stack pointer is only valid if the instruction
163 * is one which would update the stack pointer to the
164 * address accessed if the instruction completed,
165 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
166 * (or the byte, halfword, float or double forms).
167 *
168 * If we don't check this then any write to the area
169 * between the last mapped region and the stack will
170 * expand the stack rather than segfaulting.
171 */
172 if (address + 2048 < uregs->gpr[1]
173 && (!user_mode(regs) || !store_updates_sp(regs)))
174 goto bad_area;
175 }
176 if (expand_stack(vma, address))
177 goto bad_area;
178
179good_area:
180 code = SEGV_ACCERR;
181#if defined(CONFIG_6xx)
182 if (error_code & 0x95700000)
183 /* an error such as lwarx to I/O controller space,
184 address matching DABR, eciwx, etc. */
185 goto bad_area;
186#endif /* CONFIG_6xx */
187#if defined(CONFIG_8xx)
188 /* The MPC8xx seems to always set 0x80000000, which is
189 * "undefined". Of those that can be set, this is the only
190 * one which seems bad.
191 */
192 if (error_code & 0x10000000)
193 /* Guarded storage error. */
194 goto bad_area;
195#endif /* CONFIG_8xx */
196
197 /* a write */
198 if (is_write) {
199 if (!(vma->vm_flags & VM_WRITE))
200 goto bad_area;
201#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
202 /* an exec - 4xx/Book-E allows for per-page execute permission */
203 } else if (TRAP(regs) == 0x400) {
204 pte_t *ptep;
bab70a4a 205 pmd_t *pmdp;
1da177e4
LT
206
207#if 0
208 /* It would be nice to actually enforce the VM execute
209 permission on CPUs which can do so, but far too
210 much stuff in userspace doesn't get the permissions
211 right, so we let any page be executed for now. */
212 if (! (vma->vm_flags & VM_EXEC))
213 goto bad_area;
214#endif
215
216 /* Since 4xx/Book-E supports per-page execute permission,
217 * we lazily flush dcache to icache. */
218 ptep = NULL;
bab70a4a
ES
219 if (get_pteptr(mm, address, &ptep, &pmdp)) {
220 spinlock_t *ptl = pte_lockptr(mm, pmdp);
221 spin_lock(ptl);
222 if (pte_present(*ptep)) {
223 struct page *page = pte_page(*ptep);
1da177e4 224
bab70a4a
ES
225 if (!test_bit(PG_arch_1, &page->flags)) {
226 flush_dcache_icache_page(page);
227 set_bit(PG_arch_1, &page->flags);
228 }
229 pte_update(ptep, 0, _PAGE_HWEXEC);
230 _tlbie(address);
231 pte_unmap_unlock(ptep, ptl);
232 up_read(&mm->mmap_sem);
233 return 0;
1da177e4 234 }
bab70a4a 235 pte_unmap_unlock(ptep, ptl);
1da177e4 236 }
1da177e4
LT
237#endif
238 /* a read */
239 } else {
240 /* protection fault */
241 if (error_code & 0x08000000)
242 goto bad_area;
df67b3da 243 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
1da177e4
LT
244 goto bad_area;
245 }
246
247 /*
248 * If for any reason at all we couldn't handle the fault,
249 * make sure we exit gracefully rather than endlessly redo
250 * the fault.
251 */
252 survive:
83c54070
NP
253 fault = handle_mm_fault(mm, vma, address, is_write);
254 if (unlikely(fault & VM_FAULT_ERROR)) {
255 if (fault & VM_FAULT_OOM)
256 goto out_of_memory;
257 else if (fault & VM_FAULT_SIGBUS)
258 goto do_sigbus;
1da177e4
LT
259 BUG();
260 }
83c54070
NP
261 if (fault & VM_FAULT_MAJOR)
262 current->maj_flt++;
263 else
264 current->min_flt++;
1da177e4
LT
265
266 up_read(&mm->mmap_sem);
267 /*
268 * keep track of tlb+htab misses that are good addrs but
269 * just need pte's created via handle_mm_fault()
270 * -- Cort
271 */
272 pte_misses++;
273 return 0;
274
275bad_area:
276 up_read(&mm->mmap_sem);
277 pte_errors++;
278
279 /* User mode accesses cause a SIGSEGV */
280 if (user_mode(regs)) {
bb0bb3b6 281 _exception(SIGSEGV, regs, code, address);
1da177e4
LT
282 return 0;
283 }
284
285 return SIGSEGV;
286
287/*
288 * We ran out of memory, or some other thing happened to us that made
289 * us unable to handle the page fault gracefully.
290 */
291out_of_memory:
292 up_read(&mm->mmap_sem);
b460cbc5 293 if (is_global_init(current)) {
1da177e4
LT
294 yield();
295 down_read(&mm->mmap_sem);
296 goto survive;
297 }
298 printk("VM: killing process %s\n", current->comm);
299 if (user_mode(regs))
dcca2bde 300 do_group_exit(SIGKILL);
1da177e4
LT
301 return SIGKILL;
302
303do_sigbus:
304 up_read(&mm->mmap_sem);
305 info.si_signo = SIGBUS;
306 info.si_errno = 0;
307 info.si_code = BUS_ADRERR;
308 info.si_addr = (void __user *)address;
309 force_sig_info (SIGBUS, &info, current);
310 if (!user_mode(regs))
311 return SIGBUS;
312 return 0;
313}
314
315/*
316 * bad_page_fault is called when we have a bad access from the kernel.
317 * It is called from the DSI and ISI handlers in head.S and from some
318 * of the procedures in traps.c.
319 */
320void
321bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
322{
323 const struct exception_table_entry *entry;
324
325 /* Are we prepared to handle this fault? */
326 if ((entry = search_exception_tables(regs->nip)) != NULL) {
327 regs->nip = entry->fixup;
328 return;
329 }
330
331 /* kernel has accessed a bad area */
332#if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
333 if (debugger_kernel_faults)
334 debugger(regs);
335#endif
336 die("kernel access of bad area", regs, sig);
337}
338
339#ifdef CONFIG_8xx
340
341/* The pgtable.h claims some functions generically exist, but I
342 * can't find them......
343 */
344pte_t *va_to_pte(unsigned long address)
345{
346 pgd_t *dir;
347 pmd_t *pmd;
348 pte_t *pte;
349
350 if (address < TASK_SIZE)
351 return NULL;
352
353 dir = pgd_offset(&init_mm, address);
354 if (dir) {
355 pmd = pmd_offset(dir, address & PAGE_MASK);
356 if (pmd && pmd_present(*pmd)) {
357 pte = pte_offset_kernel(pmd, address & PAGE_MASK);
358 if (pte && pte_present(*pte))
359 return(pte);
360 }
361 }
362 return NULL;
363}
364
365unsigned long va_to_phys(unsigned long address)
366{
367 pte_t *pte;
368
369 pte = va_to_pte(address);
370 if (pte)
371 return(((unsigned long)(pte_val(*pte)) & PAGE_MASK) | (address & ~(PAGE_MASK)));
372 return (0);
373}
374
375void
376print_8xx_pte(struct mm_struct *mm, unsigned long addr)
377{
378 pgd_t * pgd;
379 pmd_t * pmd;
380 pte_t * pte;
381
382 printk(" pte @ 0x%8lx: ", addr);
383 pgd = pgd_offset(mm, addr & PAGE_MASK);
384 if (pgd) {
385 pmd = pmd_offset(pgd, addr & PAGE_MASK);
386 if (pmd && pmd_present(*pmd)) {
387 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
388 if (pte) {
389 printk(" (0x%08lx)->(0x%08lx)->0x%08lx\n",
390 (long)pgd, (long)pte, (long)pte_val(*pte));
391#define pp ((long)pte_val(*pte))
392 printk(" RPN: %05lx PP: %lx SPS: %lx SH: %lx "
393 "CI: %lx v: %lx\n",
394 pp>>12, /* rpn */
395 (pp>>10)&3, /* pp */
396 (pp>>3)&1, /* small */
397 (pp>>2)&1, /* shared */
398 (pp>>1)&1, /* cache inhibit */
399 pp&1 /* valid */
400 );
401#undef pp
402 }
403 else {
404 printk("no pte\n");
405 }
406 }
407 else {
408 printk("no pmd\n");
409 }
410 }
411 else {
412 printk("no pgd\n");
413 }
414}
415
416int
417get_8xx_pte(struct mm_struct *mm, unsigned long addr)
418{
419 pgd_t * pgd;
420 pmd_t * pmd;
421 pte_t * pte;
422 int retval = 0;
423
424 pgd = pgd_offset(mm, addr & PAGE_MASK);
425 if (pgd) {
426 pmd = pmd_offset(pgd, addr & PAGE_MASK);
427 if (pmd && pmd_present(*pmd)) {
428 pte = pte_offset_kernel(pmd, addr & PAGE_MASK);
429 if (pte) {
430 retval = (int)pte_val(*pte);
431 }
432 }
433 }
434 return(retval);
435}
436#endif /* CONFIG_8xx */