]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86_64/ia32/ia32_aout.c
[PATCH] Avoiding mmap fragmentation
[net-next-2.6.git] / arch / x86_64 / ia32 / ia32_aout.c
CommitLineData
1da177e4
LT
1/*
2 * a.out loader for x86-64
3 *
4 * Copyright (C) 1991, 1992, 1996 Linus Torvalds
5 * Hacked together by Andi Kleen
6 */
7
8#include <linux/module.h>
9
10#include <linux/time.h>
11#include <linux/kernel.h>
12#include <linux/mm.h>
13#include <linux/mman.h>
14#include <linux/a.out.h>
15#include <linux/errno.h>
16#include <linux/signal.h>
17#include <linux/string.h>
18#include <linux/fs.h>
19#include <linux/file.h>
20#include <linux/stat.h>
21#include <linux/fcntl.h>
22#include <linux/ptrace.h>
23#include <linux/user.h>
24#include <linux/slab.h>
25#include <linux/binfmts.h>
26#include <linux/personality.h>
27#include <linux/init.h>
28
29#include <asm/system.h>
30#include <asm/uaccess.h>
31#include <asm/pgalloc.h>
32#include <asm/cacheflush.h>
33#include <asm/user32.h>
34#include <asm/ia32.h>
35
36#undef WARN_OLD
37#undef CORE_DUMP /* probably broken */
38
39extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
40 unsigned long stack_top, int exec_stack);
41
42static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
43static int load_aout_library(struct file*);
44
45#if CORE_DUMP
46static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file);
47
48/*
49 * fill in the user structure for a core dump..
50 */
51static void dump_thread32(struct pt_regs * regs, struct user32 * dump)
52{
53 u32 fs,gs;
54
55/* changed the size calculations - should hopefully work better. lbt */
56 dump->magic = CMAGIC;
57 dump->start_code = 0;
58 dump->start_stack = regs->rsp & ~(PAGE_SIZE - 1);
59 dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
60 dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
61 dump->u_dsize -= dump->u_tsize;
62 dump->u_ssize = 0;
63 dump->u_debugreg[0] = current->thread.debugreg0;
64 dump->u_debugreg[1] = current->thread.debugreg1;
65 dump->u_debugreg[2] = current->thread.debugreg2;
66 dump->u_debugreg[3] = current->thread.debugreg3;
67 dump->u_debugreg[4] = 0;
68 dump->u_debugreg[5] = 0;
69 dump->u_debugreg[6] = current->thread.debugreg6;
70 dump->u_debugreg[7] = current->thread.debugreg7;
71
72 if (dump->start_stack < 0xc0000000)
73 dump->u_ssize = ((unsigned long) (0xc0000000 - dump->start_stack)) >> PAGE_SHIFT;
74
75 dump->regs.ebx = regs->rbx;
76 dump->regs.ecx = regs->rcx;
77 dump->regs.edx = regs->rdx;
78 dump->regs.esi = regs->rsi;
79 dump->regs.edi = regs->rdi;
80 dump->regs.ebp = regs->rbp;
81 dump->regs.eax = regs->rax;
82 dump->regs.ds = current->thread.ds;
83 dump->regs.es = current->thread.es;
84 asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs;
85 asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs;
86 dump->regs.orig_eax = regs->orig_rax;
87 dump->regs.eip = regs->rip;
88 dump->regs.cs = regs->cs;
89 dump->regs.eflags = regs->eflags;
90 dump->regs.esp = regs->rsp;
91 dump->regs.ss = regs->ss;
92
93#if 1 /* FIXME */
94 dump->u_fpvalid = 0;
95#else
96 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
97#endif
98}
99
100#endif
101
102static struct linux_binfmt aout_format = {
103 .module = THIS_MODULE,
104 .load_binary = load_aout_binary,
105 .load_shlib = load_aout_library,
106#if CORE_DUMP
107 .core_dump = aout_core_dump,
108#endif
109 .min_coredump = PAGE_SIZE
110};
111
112static void set_brk(unsigned long start, unsigned long end)
113{
114 start = PAGE_ALIGN(start);
115 end = PAGE_ALIGN(end);
116 if (end <= start)
117 return;
118 down_write(&current->mm->mmap_sem);
119 do_brk(start, end - start);
120 up_write(&current->mm->mmap_sem);
121}
122
123#if CORE_DUMP
124/*
125 * These are the only things you should do on a core-file: use only these
126 * macros to write out all the necessary info.
127 */
128
129static int dump_write(struct file *file, const void *addr, int nr)
130{
131 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
132}
133
134#define DUMP_WRITE(addr, nr) \
135 if (!dump_write(file, (void *)(addr), (nr))) \
136 goto end_coredump;
137
138#define DUMP_SEEK(offset) \
139if (file->f_op->llseek) { \
140 if (file->f_op->llseek(file,(offset),0) != (offset)) \
141 goto end_coredump; \
142} else file->f_pos = (offset)
143
144/*
145 * Routine writes a core dump image in the current directory.
146 * Currently only a stub-function.
147 *
148 * Note that setuid/setgid files won't make a core-dump if the uid/gid
149 * changed due to the set[u|g]id. It's enforced by the "current->mm->dumpable"
150 * field, which also makes sure the core-dumps won't be recursive if the
151 * dumping of the process results in another error..
152 */
153
154static int aout_core_dump(long signr, struct pt_regs * regs, struct file *file)
155{
156 mm_segment_t fs;
157 int has_dumped = 0;
158 unsigned long dump_start, dump_size;
159 struct user32 dump;
160# define START_DATA(u) (u.u_tsize << PAGE_SHIFT)
161# define START_STACK(u) (u.start_stack)
162
163 fs = get_fs();
164 set_fs(KERNEL_DS);
165 has_dumped = 1;
166 current->flags |= PF_DUMPCORE;
167 strncpy(dump.u_comm, current->comm, sizeof(current->comm));
168 dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) - ((unsigned long)(&dump)));
169 dump.signal = signr;
170 dump_thread32(regs, &dump);
171
172/* If the size of the dump file exceeds the rlimit, then see what would happen
173 if we wrote the stack, but not the data area. */
174 if ((dump.u_dsize+dump.u_ssize+1) * PAGE_SIZE >
175 current->signal->rlim[RLIMIT_CORE].rlim_cur)
176 dump.u_dsize = 0;
177
178/* Make sure we have enough room to write the stack and data areas. */
179 if ((dump.u_ssize+1) * PAGE_SIZE >
180 current->signal->rlim[RLIMIT_CORE].rlim_cur)
181 dump.u_ssize = 0;
182
183/* make sure we actually have a data and stack area to dump */
184 set_fs(USER_DS);
185 if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_DATA(dump), dump.u_dsize << PAGE_SHIFT))
186 dump.u_dsize = 0;
187 if (!access_ok(VERIFY_READ, (void *) (unsigned long)START_STACK(dump), dump.u_ssize << PAGE_SHIFT))
188 dump.u_ssize = 0;
189
190 set_fs(KERNEL_DS);
191/* struct user */
192 DUMP_WRITE(&dump,sizeof(dump));
193/* Now dump all of the user data. Include malloced stuff as well */
194 DUMP_SEEK(PAGE_SIZE);
195/* now we start writing out the user space info */
196 set_fs(USER_DS);
197/* Dump the data area */
198 if (dump.u_dsize != 0) {
199 dump_start = START_DATA(dump);
200 dump_size = dump.u_dsize << PAGE_SHIFT;
201 DUMP_WRITE(dump_start,dump_size);
202 }
203/* Now prepare to dump the stack area */
204 if (dump.u_ssize != 0) {
205 dump_start = START_STACK(dump);
206 dump_size = dump.u_ssize << PAGE_SHIFT;
207 DUMP_WRITE(dump_start,dump_size);
208 }
209/* Finally dump the task struct. Not be used by gdb, but could be useful */
210 set_fs(KERNEL_DS);
211 DUMP_WRITE(current,sizeof(*current));
212end_coredump:
213 set_fs(fs);
214 return has_dumped;
215}
216#endif
217
218/*
219 * create_aout_tables() parses the env- and arg-strings in new user
220 * memory and creates the pointer tables from them, and puts their
221 * addresses on the "stack", returning the new stack pointer value.
222 */
223static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
224{
225 u32 __user *argv;
226 u32 __user *envp;
227 u32 __user *sp;
228 int argc = bprm->argc;
229 int envc = bprm->envc;
230
231 sp = (u32 __user *) ((-(unsigned long)sizeof(u32)) & (unsigned long) p);
232 sp -= envc+1;
233 envp = sp;
234 sp -= argc+1;
235 argv = sp;
236 put_user((unsigned long) envp,--sp);
237 put_user((unsigned long) argv,--sp);
238 put_user(argc,--sp);
239 current->mm->arg_start = (unsigned long) p;
240 while (argc-->0) {
241 char c;
242 put_user((u32)(unsigned long)p,argv++);
243 do {
244 get_user(c,p++);
245 } while (c);
246 }
247 put_user(NULL,argv);
248 current->mm->arg_end = current->mm->env_start = (unsigned long) p;
249 while (envc-->0) {
250 char c;
251 put_user((u32)(unsigned long)p,envp++);
252 do {
253 get_user(c,p++);
254 } while (c);
255 }
256 put_user(NULL,envp);
257 current->mm->env_end = (unsigned long) p;
258 return sp;
259}
260
261/*
262 * These are the functions used to load a.out style executables and shared
263 * libraries. There is no binary dependent code anywhere else.
264 */
265
266static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
267{
268 struct exec ex;
269 unsigned long error;
270 unsigned long fd_offset;
271 unsigned long rlim;
272 int retval;
273
274 ex = *((struct exec *) bprm->buf); /* exec-header */
275 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
276 N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
277 N_TRSIZE(ex) || N_DRSIZE(ex) ||
278 i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
279 return -ENOEXEC;
280 }
281
282 fd_offset = N_TXTOFF(ex);
283
284 /* Check initial limits. This avoids letting people circumvent
285 * size limits imposed on them by creating programs with large
286 * arrays in the data or bss.
287 */
288 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
289 if (rlim >= RLIM_INFINITY)
290 rlim = ~0;
291 if (ex.a_data + ex.a_bss > rlim)
292 return -ENOMEM;
293
294 /* Flush all traces of the currently running executable */
295 retval = flush_old_exec(bprm);
296 if (retval)
297 return retval;
298
299 regs->cs = __USER32_CS;
300 regs->r8 = regs->r9 = regs->r10 = regs->r11 = regs->r12 =
301 regs->r13 = regs->r14 = regs->r15 = 0;
302
303 /* OK, This is the point of no return */
304 set_personality(PER_LINUX);
305 set_thread_flag(TIF_IA32);
306 clear_thread_flag(TIF_ABI_PENDING);
307
308 current->mm->end_code = ex.a_text +
309 (current->mm->start_code = N_TXTADDR(ex));
310 current->mm->end_data = ex.a_data +
311 (current->mm->start_data = N_DATADDR(ex));
312 current->mm->brk = ex.a_bss +
313 (current->mm->start_brk = N_BSSADDR(ex));
314 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
1363c3cd 315 current->mm->cached_hole_size = 0;
1da177e4
LT
316
317 set_mm_counter(current->mm, rss, 0);
318 current->mm->mmap = NULL;
319 compute_creds(bprm);
320 current->flags &= ~PF_FORKNOEXEC;
321
322 if (N_MAGIC(ex) == OMAGIC) {
323 unsigned long text_addr, map_size;
324 loff_t pos;
325
326 text_addr = N_TXTADDR(ex);
327
328 pos = 32;
329 map_size = ex.a_text+ex.a_data;
330
331 down_write(&current->mm->mmap_sem);
332 error = do_brk(text_addr & PAGE_MASK, map_size);
333 up_write(&current->mm->mmap_sem);
334
335 if (error != (text_addr & PAGE_MASK)) {
336 send_sig(SIGKILL, current, 0);
337 return error;
338 }
339
340 error = bprm->file->f_op->read(bprm->file, (char *)text_addr,
341 ex.a_text+ex.a_data, &pos);
342 if ((signed long)error < 0) {
343 send_sig(SIGKILL, current, 0);
344 return error;
345 }
346
347 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
348 } else {
349#ifdef WARN_OLD
350 static unsigned long error_time, error_time2;
351 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
352 (N_MAGIC(ex) != NMAGIC) && (jiffies-error_time2) > 5*HZ)
353 {
354 printk(KERN_NOTICE "executable not page aligned\n");
355 error_time2 = jiffies;
356 }
357
358 if ((fd_offset & ~PAGE_MASK) != 0 &&
359 (jiffies-error_time) > 5*HZ)
360 {
361 printk(KERN_WARNING
362 "fd_offset is not page aligned. Please convert program: %s\n",
363 bprm->file->f_dentry->d_name.name);
364 error_time = jiffies;
365 }
366#endif
367
368 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
369 loff_t pos = fd_offset;
370 down_write(&current->mm->mmap_sem);
371 do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
372 up_write(&current->mm->mmap_sem);
373 bprm->file->f_op->read(bprm->file,(char *)N_TXTADDR(ex),
374 ex.a_text+ex.a_data, &pos);
375 flush_icache_range((unsigned long) N_TXTADDR(ex),
376 (unsigned long) N_TXTADDR(ex) +
377 ex.a_text+ex.a_data);
378 goto beyond_if;
379 }
380
381 down_write(&current->mm->mmap_sem);
382 error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
383 PROT_READ | PROT_EXEC,
384 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
385 fd_offset);
386 up_write(&current->mm->mmap_sem);
387
388 if (error != N_TXTADDR(ex)) {
389 send_sig(SIGKILL, current, 0);
390 return error;
391 }
392
393 down_write(&current->mm->mmap_sem);
394 error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
395 PROT_READ | PROT_WRITE | PROT_EXEC,
396 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT,
397 fd_offset + ex.a_text);
398 up_write(&current->mm->mmap_sem);
399 if (error != N_DATADDR(ex)) {
400 send_sig(SIGKILL, current, 0);
401 return error;
402 }
403 }
404beyond_if:
405 set_binfmt(&aout_format);
406
407 set_brk(current->mm->start_brk, current->mm->brk);
408
409 retval = ia32_setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
410 if (retval < 0) {
411 /* Someone check-me: is this error path enough? */
412 send_sig(SIGKILL, current, 0);
413 return retval;
414 }
415
416 current->mm->start_stack =
417 (unsigned long)create_aout_tables((char __user *)bprm->p, bprm);
418 /* start thread */
419 asm volatile("movl %0,%%fs" :: "r" (0)); \
420 asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS));
421 load_gs_index(0);
422 (regs)->rip = ex.a_entry;
423 (regs)->rsp = current->mm->start_stack;
424 (regs)->eflags = 0x200;
425 (regs)->cs = __USER32_CS;
426 (regs)->ss = __USER32_DS;
427 set_fs(USER_DS);
428 if (unlikely(current->ptrace & PT_PTRACED)) {
429 if (current->ptrace & PT_TRACE_EXEC)
430 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
431 else
432 send_sig(SIGTRAP, current, 0);
433 }
434 return 0;
435}
436
437static int load_aout_library(struct file *file)
438{
439 struct inode * inode;
440 unsigned long bss, start_addr, len;
441 unsigned long error;
442 int retval;
443 struct exec ex;
444
445 inode = file->f_dentry->d_inode;
446
447 retval = -ENOEXEC;
448 error = kernel_read(file, 0, (char *) &ex, sizeof(ex));
449 if (error != sizeof(ex))
450 goto out;
451
452 /* We come in here for the regular a.out style of shared libraries */
453 if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
454 N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
455 i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
456 goto out;
457 }
458
459 if (N_FLAGS(ex))
460 goto out;
461
462 /* For QMAGIC, the starting address is 0x20 into the page. We mask
463 this off to get the starting address for the page */
464
465 start_addr = ex.a_entry & 0xfffff000;
466
467 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
468 loff_t pos = N_TXTOFF(ex);
469
470#ifdef WARN_OLD
471 static unsigned long error_time;
472 if ((jiffies-error_time) > 5*HZ)
473 {
474 printk(KERN_WARNING
475 "N_TXTOFF is not page aligned. Please convert library: %s\n",
476 file->f_dentry->d_name.name);
477 error_time = jiffies;
478 }
479#endif
480 down_write(&current->mm->mmap_sem);
481 do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
482 up_write(&current->mm->mmap_sem);
483
484 file->f_op->read(file, (char *)start_addr,
485 ex.a_text + ex.a_data, &pos);
486 flush_icache_range((unsigned long) start_addr,
487 (unsigned long) start_addr + ex.a_text + ex.a_data);
488
489 retval = 0;
490 goto out;
491 }
492 /* Now use mmap to map the library into memory. */
493 down_write(&current->mm->mmap_sem);
494 error = do_mmap(file, start_addr, ex.a_text + ex.a_data,
495 PROT_READ | PROT_WRITE | PROT_EXEC,
496 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT,
497 N_TXTOFF(ex));
498 up_write(&current->mm->mmap_sem);
499 retval = error;
500 if (error != start_addr)
501 goto out;
502
503 len = PAGE_ALIGN(ex.a_text + ex.a_data);
504 bss = ex.a_text + ex.a_data + ex.a_bss;
505 if (bss > len) {
506 down_write(&current->mm->mmap_sem);
507 error = do_brk(start_addr + len, bss - len);
508 up_write(&current->mm->mmap_sem);
509 retval = error;
510 if (error != start_addr + len)
511 goto out;
512 }
513 retval = 0;
514out:
515 return retval;
516}
517
518static int __init init_aout_binfmt(void)
519{
520 return register_binfmt(&aout_format);
521}
522
523static void __exit exit_aout_binfmt(void)
524{
525 unregister_binfmt(&aout_format);
526}
527
528module_init(init_aout_binfmt);
529module_exit(exit_aout_binfmt);
530MODULE_LICENSE("GPL");