fs: introduce vfs_path_lookup
[deliverable/linux.git] / fs / binfmt_elf.c
1 /*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/a.out.h>
20 #include <linux/errno.h>
21 #include <linux/signal.h>
22 #include <linux/binfmts.h>
23 #include <linux/string.h>
24 #include <linux/file.h>
25 #include <linux/fcntl.h>
26 #include <linux/ptrace.h>
27 #include <linux/slab.h>
28 #include <linux/shm.h>
29 #include <linux/personality.h>
30 #include <linux/elfcore.h>
31 #include <linux/init.h>
32 #include <linux/highuid.h>
33 #include <linux/smp.h>
34 #include <linux/compiler.h>
35 #include <linux/highmem.h>
36 #include <linux/pagemap.h>
37 #include <linux/security.h>
38 #include <linux/syscalls.h>
39 #include <linux/random.h>
40 #include <linux/elf.h>
41 #include <linux/utsname.h>
42 #include <asm/uaccess.h>
43 #include <asm/param.h>
44 #include <asm/page.h>
45
46 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
47 static int load_elf_library(struct file *);
48 static unsigned long elf_map (struct file *, unsigned long, struct elf_phdr *, int, int, unsigned long);
49
50 /*
51 * If we don't support core dumping, then supply a NULL so we
52 * don't even try.
53 */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file);
56 #else
57 #define elf_core_dump NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75 .module = THIS_MODULE,
76 .load_binary = load_elf_binary,
77 .load_shlib = load_elf_library,
78 .core_dump = elf_core_dump,
79 .min_coredump = ELF_EXEC_PAGESIZE,
80 .hasvdso = 1
81 };
82
83 #define BAD_ADDR(x) IS_ERR_VALUE(x)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87 start = ELF_PAGEALIGN(start);
88 end = ELF_PAGEALIGN(end);
89 if (end > start) {
90 unsigned long addr;
91 down_write(&current->mm->mmap_sem);
92 addr = do_brk(start, end - start);
93 up_write(&current->mm->mmap_sem);
94 if (BAD_ADDR(addr))
95 return addr;
96 }
97 current->mm->start_brk = current->mm->brk = end;
98 return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102 after the data section (i.e. bss). This would
103 contain the junk from the file that should not
104 be in memory
105 */
106 static int padzero(unsigned long elf_bss)
107 {
108 unsigned long nbyte;
109
110 nbyte = ELF_PAGEOFFSET(elf_bss);
111 if (nbyte) {
112 nbyte = ELF_MIN_ALIGN - nbyte;
113 if (clear_user((void __user *) elf_bss, nbyte))
114 return -EFAULT;
115 }
116 return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a litle clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126 old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130 (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 int interp_aout, unsigned long load_addr,
137 unsigned long interp_load_addr)
138 {
139 unsigned long p = bprm->p;
140 int argc = bprm->argc;
141 int envc = bprm->envc;
142 elf_addr_t __user *argv;
143 elf_addr_t __user *envp;
144 elf_addr_t __user *sp;
145 elf_addr_t __user *u_platform;
146 const char *k_platform = ELF_PLATFORM;
147 int items;
148 elf_addr_t *elf_info;
149 int ei_index = 0;
150 struct task_struct *tsk = current;
151 struct vm_area_struct *vma;
152
153 /*
154 * If this architecture has a platform capability string, copy it
155 * to userspace. In some cases (Sparc), this info is impossible
156 * for userspace to get any other way, in others (i386) it is
157 * merely difficult.
158 */
159 u_platform = NULL;
160 if (k_platform) {
161 size_t len = strlen(k_platform) + 1;
162
163 /*
164 * In some cases (e.g. Hyper-Threading), we want to avoid L1
165 * evictions by the processes running on the same package. One
166 * thing we can do is to shuffle the initial stack for them.
167 */
168
169 p = arch_align_stack(p);
170
171 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
172 if (__copy_to_user(u_platform, k_platform, len))
173 return -EFAULT;
174 }
175
176 /* Create the ELF interpreter info */
177 elf_info = (elf_addr_t *)current->mm->saved_auxv;
178 #define NEW_AUX_ENT(id, val) \
179 do { \
180 elf_info[ei_index++] = id; \
181 elf_info[ei_index++] = val; \
182 } while (0)
183
184 #ifdef ARCH_DLINFO
185 /*
186 * ARCH_DLINFO must come first so PPC can do its special alignment of
187 * AUXV.
188 */
189 ARCH_DLINFO;
190 #endif
191 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
192 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
193 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
194 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
195 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
196 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
197 NEW_AUX_ENT(AT_BASE, interp_load_addr);
198 NEW_AUX_ENT(AT_FLAGS, 0);
199 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
200 NEW_AUX_ENT(AT_UID, tsk->uid);
201 NEW_AUX_ENT(AT_EUID, tsk->euid);
202 NEW_AUX_ENT(AT_GID, tsk->gid);
203 NEW_AUX_ENT(AT_EGID, tsk->egid);
204 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
205 if (k_platform) {
206 NEW_AUX_ENT(AT_PLATFORM,
207 (elf_addr_t)(unsigned long)u_platform);
208 }
209 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
210 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
211 }
212 #undef NEW_AUX_ENT
213 /* AT_NULL is zero; clear the rest too */
214 memset(&elf_info[ei_index], 0,
215 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
216
217 /* And advance past the AT_NULL entry. */
218 ei_index += 2;
219
220 sp = STACK_ADD(p, ei_index);
221
222 items = (argc + 1) + (envc + 1);
223 if (interp_aout) {
224 items += 3; /* a.out interpreters require argv & envp too */
225 } else {
226 items += 1; /* ELF interpreters only put argc on the stack */
227 }
228 bprm->p = STACK_ROUND(sp, items);
229
230 /* Point sp at the lowest address on the stack */
231 #ifdef CONFIG_STACK_GROWSUP
232 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
233 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
234 #else
235 sp = (elf_addr_t __user *)bprm->p;
236 #endif
237
238
239 /*
240 * Grow the stack manually; some architectures have a limit on how
241 * far ahead a user-space access may be in order to grow the stack.
242 */
243 vma = find_extend_vma(current->mm, bprm->p);
244 if (!vma)
245 return -EFAULT;
246
247 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
248 if (__put_user(argc, sp++))
249 return -EFAULT;
250 if (interp_aout) {
251 argv = sp + 2;
252 envp = argv + argc + 1;
253 if (__put_user((elf_addr_t)(unsigned long)argv, sp++) ||
254 __put_user((elf_addr_t)(unsigned long)envp, sp++))
255 return -EFAULT;
256 } else {
257 argv = sp;
258 envp = argv + argc + 1;
259 }
260
261 /* Populate argv and envp */
262 p = current->mm->arg_end = current->mm->arg_start;
263 while (argc-- > 0) {
264 size_t len;
265 if (__put_user((elf_addr_t)p, argv++))
266 return -EFAULT;
267 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
268 if (!len || len > MAX_ARG_STRLEN)
269 return 0;
270 p += len;
271 }
272 if (__put_user(0, argv))
273 return -EFAULT;
274 current->mm->arg_end = current->mm->env_start = p;
275 while (envc-- > 0) {
276 size_t len;
277 if (__put_user((elf_addr_t)p, envp++))
278 return -EFAULT;
279 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
280 if (!len || len > MAX_ARG_STRLEN)
281 return 0;
282 p += len;
283 }
284 if (__put_user(0, envp))
285 return -EFAULT;
286 current->mm->env_end = p;
287
288 /* Put the elf_info on the stack in the right place. */
289 sp = (elf_addr_t __user *)envp + 1;
290 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
291 return -EFAULT;
292 return 0;
293 }
294
295 #ifndef elf_map
296
297 static unsigned long elf_map(struct file *filep, unsigned long addr,
298 struct elf_phdr *eppnt, int prot, int type,
299 unsigned long total_size)
300 {
301 unsigned long map_addr;
302 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
303 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
304 addr = ELF_PAGESTART(addr);
305 size = ELF_PAGEALIGN(size);
306
307 /* mmap() will return -EINVAL if given a zero size, but a
308 * segment with zero filesize is perfectly valid */
309 if (!size)
310 return addr;
311
312 down_write(&current->mm->mmap_sem);
313 /*
314 * total_size is the size of the ELF (interpreter) image.
315 * The _first_ mmap needs to know the full size, otherwise
316 * randomization might put this image into an overlapping
317 * position with the ELF binary image. (since size < total_size)
318 * So we first map the 'big' image - and unmap the remainder at
319 * the end. (which unmap is needed for ELF images with holes.)
320 */
321 if (total_size) {
322 total_size = ELF_PAGEALIGN(total_size);
323 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
324 if (!BAD_ADDR(map_addr))
325 do_munmap(current->mm, map_addr+size, total_size-size);
326 } else
327 map_addr = do_mmap(filep, addr, size, prot, type, off);
328
329 up_write(&current->mm->mmap_sem);
330 return(map_addr);
331 }
332
333 #endif /* !elf_map */
334
335 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
336 {
337 int i, first_idx = -1, last_idx = -1;
338
339 for (i = 0; i < nr; i++) {
340 if (cmds[i].p_type == PT_LOAD) {
341 last_idx = i;
342 if (first_idx == -1)
343 first_idx = i;
344 }
345 }
346 if (first_idx == -1)
347 return 0;
348
349 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
350 ELF_PAGESTART(cmds[first_idx].p_vaddr);
351 }
352
353
354 /* This is much more generalized than the library routine read function,
355 so we keep this separate. Technically the library read function
356 is only provided so that we can read a.out libraries that have
357 an ELF header */
358
359 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
360 struct file *interpreter, unsigned long *interp_map_addr,
361 unsigned long no_base)
362 {
363 struct elf_phdr *elf_phdata;
364 struct elf_phdr *eppnt;
365 unsigned long load_addr = 0;
366 int load_addr_set = 0;
367 unsigned long last_bss = 0, elf_bss = 0;
368 unsigned long error = ~0UL;
369 unsigned long total_size;
370 int retval, i, size;
371
372 /* First of all, some simple consistency checks */
373 if (interp_elf_ex->e_type != ET_EXEC &&
374 interp_elf_ex->e_type != ET_DYN)
375 goto out;
376 if (!elf_check_arch(interp_elf_ex))
377 goto out;
378 if (!interpreter->f_op || !interpreter->f_op->mmap)
379 goto out;
380
381 /*
382 * If the size of this structure has changed, then punt, since
383 * we will be doing the wrong thing.
384 */
385 if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
386 goto out;
387 if (interp_elf_ex->e_phnum < 1 ||
388 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
389 goto out;
390
391 /* Now read in all of the header information */
392 size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
393 if (size > ELF_MIN_ALIGN)
394 goto out;
395 elf_phdata = kmalloc(size, GFP_KERNEL);
396 if (!elf_phdata)
397 goto out;
398
399 retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
400 (char *)elf_phdata,size);
401 error = -EIO;
402 if (retval != size) {
403 if (retval < 0)
404 error = retval;
405 goto out_close;
406 }
407
408 total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
409 if (!total_size) {
410 error = -EINVAL;
411 goto out_close;
412 }
413
414 eppnt = elf_phdata;
415 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
416 if (eppnt->p_type == PT_LOAD) {
417 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
418 int elf_prot = 0;
419 unsigned long vaddr = 0;
420 unsigned long k, map_addr;
421
422 if (eppnt->p_flags & PF_R)
423 elf_prot = PROT_READ;
424 if (eppnt->p_flags & PF_W)
425 elf_prot |= PROT_WRITE;
426 if (eppnt->p_flags & PF_X)
427 elf_prot |= PROT_EXEC;
428 vaddr = eppnt->p_vaddr;
429 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
430 elf_type |= MAP_FIXED;
431 else if (no_base && interp_elf_ex->e_type == ET_DYN)
432 load_addr = -vaddr;
433
434 map_addr = elf_map(interpreter, load_addr + vaddr,
435 eppnt, elf_prot, elf_type, total_size);
436 total_size = 0;
437 if (!*interp_map_addr)
438 *interp_map_addr = map_addr;
439 error = map_addr;
440 if (BAD_ADDR(map_addr))
441 goto out_close;
442
443 if (!load_addr_set &&
444 interp_elf_ex->e_type == ET_DYN) {
445 load_addr = map_addr - ELF_PAGESTART(vaddr);
446 load_addr_set = 1;
447 }
448
449 /*
450 * Check to see if the section's size will overflow the
451 * allowed task size. Note that p_filesz must always be
452 * <= p_memsize so it's only necessary to check p_memsz.
453 */
454 k = load_addr + eppnt->p_vaddr;
455 if (BAD_ADDR(k) ||
456 eppnt->p_filesz > eppnt->p_memsz ||
457 eppnt->p_memsz > TASK_SIZE ||
458 TASK_SIZE - eppnt->p_memsz < k) {
459 error = -ENOMEM;
460 goto out_close;
461 }
462
463 /*
464 * Find the end of the file mapping for this phdr, and
465 * keep track of the largest address we see for this.
466 */
467 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
468 if (k > elf_bss)
469 elf_bss = k;
470
471 /*
472 * Do the same thing for the memory mapping - between
473 * elf_bss and last_bss is the bss section.
474 */
475 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
476 if (k > last_bss)
477 last_bss = k;
478 }
479 }
480
481 /*
482 * Now fill out the bss section. First pad the last page up
483 * to the page boundary, and then perform a mmap to make sure
484 * that there are zero-mapped pages up to and including the
485 * last bss page.
486 */
487 if (padzero(elf_bss)) {
488 error = -EFAULT;
489 goto out_close;
490 }
491
492 /* What we have mapped so far */
493 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
494
495 /* Map the last of the bss segment */
496 if (last_bss > elf_bss) {
497 down_write(&current->mm->mmap_sem);
498 error = do_brk(elf_bss, last_bss - elf_bss);
499 up_write(&current->mm->mmap_sem);
500 if (BAD_ADDR(error))
501 goto out_close;
502 }
503
504 error = load_addr;
505
506 out_close:
507 kfree(elf_phdata);
508 out:
509 return error;
510 }
511
512 static unsigned long load_aout_interp(struct exec *interp_ex,
513 struct file *interpreter)
514 {
515 unsigned long text_data, elf_entry = ~0UL;
516 char __user * addr;
517 loff_t offset;
518
519 current->mm->end_code = interp_ex->a_text;
520 text_data = interp_ex->a_text + interp_ex->a_data;
521 current->mm->end_data = text_data;
522 current->mm->brk = interp_ex->a_bss + text_data;
523
524 switch (N_MAGIC(*interp_ex)) {
525 case OMAGIC:
526 offset = 32;
527 addr = (char __user *)0;
528 break;
529 case ZMAGIC:
530 case QMAGIC:
531 offset = N_TXTOFF(*interp_ex);
532 addr = (char __user *)N_TXTADDR(*interp_ex);
533 break;
534 default:
535 goto out;
536 }
537
538 down_write(&current->mm->mmap_sem);
539 do_brk(0, text_data);
540 up_write(&current->mm->mmap_sem);
541 if (!interpreter->f_op || !interpreter->f_op->read)
542 goto out;
543 if (interpreter->f_op->read(interpreter, addr, text_data, &offset) < 0)
544 goto out;
545 flush_icache_range((unsigned long)addr,
546 (unsigned long)addr + text_data);
547
548 down_write(&current->mm->mmap_sem);
549 do_brk(ELF_PAGESTART(text_data + ELF_MIN_ALIGN - 1),
550 interp_ex->a_bss);
551 up_write(&current->mm->mmap_sem);
552 elf_entry = interp_ex->a_entry;
553
554 out:
555 return elf_entry;
556 }
557
558 /*
559 * These are the functions used to load ELF style executables and shared
560 * libraries. There is no binary dependent code anywhere else.
561 */
562
563 #define INTERPRETER_NONE 0
564 #define INTERPRETER_AOUT 1
565 #define INTERPRETER_ELF 2
566
567 #ifndef STACK_RND_MASK
568 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
569 #endif
570
571 static unsigned long randomize_stack_top(unsigned long stack_top)
572 {
573 unsigned int random_variable = 0;
574
575 if ((current->flags & PF_RANDOMIZE) &&
576 !(current->personality & ADDR_NO_RANDOMIZE)) {
577 random_variable = get_random_int() & STACK_RND_MASK;
578 random_variable <<= PAGE_SHIFT;
579 }
580 #ifdef CONFIG_STACK_GROWSUP
581 return PAGE_ALIGN(stack_top) + random_variable;
582 #else
583 return PAGE_ALIGN(stack_top) - random_variable;
584 #endif
585 }
586
587 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
588 {
589 struct file *interpreter = NULL; /* to shut gcc up */
590 unsigned long load_addr = 0, load_bias = 0;
591 int load_addr_set = 0;
592 char * elf_interpreter = NULL;
593 unsigned int interpreter_type = INTERPRETER_NONE;
594 unsigned char ibcs2_interpreter = 0;
595 unsigned long error;
596 struct elf_phdr *elf_ppnt, *elf_phdata;
597 unsigned long elf_bss, elf_brk;
598 int elf_exec_fileno;
599 int retval, i;
600 unsigned int size;
601 unsigned long elf_entry;
602 unsigned long interp_load_addr = 0;
603 unsigned long start_code, end_code, start_data, end_data;
604 unsigned long reloc_func_desc = 0;
605 char passed_fileno[6];
606 struct files_struct *files;
607 int executable_stack = EXSTACK_DEFAULT;
608 unsigned long def_flags = 0;
609 struct {
610 struct elfhdr elf_ex;
611 struct elfhdr interp_elf_ex;
612 struct exec interp_ex;
613 } *loc;
614
615 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
616 if (!loc) {
617 retval = -ENOMEM;
618 goto out_ret;
619 }
620
621 /* Get the exec-header */
622 loc->elf_ex = *((struct elfhdr *)bprm->buf);
623
624 retval = -ENOEXEC;
625 /* First of all, some simple consistency checks */
626 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
627 goto out;
628
629 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
630 goto out;
631 if (!elf_check_arch(&loc->elf_ex))
632 goto out;
633 if (!bprm->file->f_op||!bprm->file->f_op->mmap)
634 goto out;
635
636 /* Now read in all of the header information */
637 if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
638 goto out;
639 if (loc->elf_ex.e_phnum < 1 ||
640 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
641 goto out;
642 size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
643 retval = -ENOMEM;
644 elf_phdata = kmalloc(size, GFP_KERNEL);
645 if (!elf_phdata)
646 goto out;
647
648 retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
649 (char *)elf_phdata, size);
650 if (retval != size) {
651 if (retval >= 0)
652 retval = -EIO;
653 goto out_free_ph;
654 }
655
656 files = current->files; /* Refcounted so ok */
657 retval = unshare_files();
658 if (retval < 0)
659 goto out_free_ph;
660 if (files == current->files) {
661 put_files_struct(files);
662 files = NULL;
663 }
664
665 /* exec will make our files private anyway, but for the a.out
666 loader stuff we need to do it earlier */
667 retval = get_unused_fd();
668 if (retval < 0)
669 goto out_free_fh;
670 get_file(bprm->file);
671 fd_install(elf_exec_fileno = retval, bprm->file);
672
673 elf_ppnt = elf_phdata;
674 elf_bss = 0;
675 elf_brk = 0;
676
677 start_code = ~0UL;
678 end_code = 0;
679 start_data = 0;
680 end_data = 0;
681
682 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
683 if (elf_ppnt->p_type == PT_INTERP) {
684 /* This is the program interpreter used for
685 * shared libraries - for now assume that this
686 * is an a.out format binary
687 */
688 retval = -ENOEXEC;
689 if (elf_ppnt->p_filesz > PATH_MAX ||
690 elf_ppnt->p_filesz < 2)
691 goto out_free_file;
692
693 retval = -ENOMEM;
694 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
695 GFP_KERNEL);
696 if (!elf_interpreter)
697 goto out_free_file;
698
699 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
700 elf_interpreter,
701 elf_ppnt->p_filesz);
702 if (retval != elf_ppnt->p_filesz) {
703 if (retval >= 0)
704 retval = -EIO;
705 goto out_free_interp;
706 }
707 /* make sure path is NULL terminated */
708 retval = -ENOEXEC;
709 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
710 goto out_free_interp;
711
712 /* If the program interpreter is one of these two,
713 * then assume an iBCS2 image. Otherwise assume
714 * a native linux image.
715 */
716 if (strcmp(elf_interpreter,"/usr/lib/libc.so.1") == 0 ||
717 strcmp(elf_interpreter,"/usr/lib/ld.so.1") == 0)
718 ibcs2_interpreter = 1;
719
720 /*
721 * The early SET_PERSONALITY here is so that the lookup
722 * for the interpreter happens in the namespace of the
723 * to-be-execed image. SET_PERSONALITY can select an
724 * alternate root.
725 *
726 * However, SET_PERSONALITY is NOT allowed to switch
727 * this task into the new images's memory mapping
728 * policy - that is, TASK_SIZE must still evaluate to
729 * that which is appropriate to the execing application.
730 * This is because exit_mmap() needs to have TASK_SIZE
731 * evaluate to the size of the old image.
732 *
733 * So if (say) a 64-bit application is execing a 32-bit
734 * application it is the architecture's responsibility
735 * to defer changing the value of TASK_SIZE until the
736 * switch really is going to happen - do this in
737 * flush_thread(). - akpm
738 */
739 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
740
741 interpreter = open_exec(elf_interpreter);
742 retval = PTR_ERR(interpreter);
743 if (IS_ERR(interpreter))
744 goto out_free_interp;
745
746 /*
747 * If the binary is not readable then enforce
748 * mm->dumpable = 0 regardless of the interpreter's
749 * permissions.
750 */
751 if (file_permission(interpreter, MAY_READ) < 0)
752 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
753
754 retval = kernel_read(interpreter, 0, bprm->buf,
755 BINPRM_BUF_SIZE);
756 if (retval != BINPRM_BUF_SIZE) {
757 if (retval >= 0)
758 retval = -EIO;
759 goto out_free_dentry;
760 }
761
762 /* Get the exec headers */
763 loc->interp_ex = *((struct exec *)bprm->buf);
764 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
765 break;
766 }
767 elf_ppnt++;
768 }
769
770 elf_ppnt = elf_phdata;
771 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
772 if (elf_ppnt->p_type == PT_GNU_STACK) {
773 if (elf_ppnt->p_flags & PF_X)
774 executable_stack = EXSTACK_ENABLE_X;
775 else
776 executable_stack = EXSTACK_DISABLE_X;
777 break;
778 }
779
780 /* Some simple consistency checks for the interpreter */
781 if (elf_interpreter) {
782 interpreter_type = INTERPRETER_ELF | INTERPRETER_AOUT;
783
784 /* Now figure out which format our binary is */
785 if ((N_MAGIC(loc->interp_ex) != OMAGIC) &&
786 (N_MAGIC(loc->interp_ex) != ZMAGIC) &&
787 (N_MAGIC(loc->interp_ex) != QMAGIC))
788 interpreter_type = INTERPRETER_ELF;
789
790 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
791 interpreter_type &= ~INTERPRETER_ELF;
792
793 retval = -ELIBBAD;
794 if (!interpreter_type)
795 goto out_free_dentry;
796
797 /* Make sure only one type was selected */
798 if ((interpreter_type & INTERPRETER_ELF) &&
799 interpreter_type != INTERPRETER_ELF) {
800 // FIXME - ratelimit this before re-enabling
801 // printk(KERN_WARNING "ELF: Ambiguous type, using ELF\n");
802 interpreter_type = INTERPRETER_ELF;
803 }
804 /* Verify the interpreter has a valid arch */
805 if ((interpreter_type == INTERPRETER_ELF) &&
806 !elf_check_arch(&loc->interp_elf_ex))
807 goto out_free_dentry;
808 } else {
809 /* Executables without an interpreter also need a personality */
810 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
811 }
812
813 /* OK, we are done with that, now set up the arg stuff,
814 and then start this sucker up */
815 if ((!bprm->sh_bang) && (interpreter_type == INTERPRETER_AOUT)) {
816 char *passed_p = passed_fileno;
817 sprintf(passed_fileno, "%d", elf_exec_fileno);
818
819 if (elf_interpreter) {
820 retval = copy_strings_kernel(1, &passed_p, bprm);
821 if (retval)
822 goto out_free_dentry;
823 bprm->argc++;
824 }
825 }
826
827 /* Flush all traces of the currently running executable */
828 retval = flush_old_exec(bprm);
829 if (retval)
830 goto out_free_dentry;
831
832 /* Discard our unneeded old files struct */
833 if (files) {
834 put_files_struct(files);
835 files = NULL;
836 }
837
838 /* OK, This is the point of no return */
839 current->flags &= ~PF_FORKNOEXEC;
840 current->mm->def_flags = def_flags;
841
842 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
843 may depend on the personality. */
844 SET_PERSONALITY(loc->elf_ex, ibcs2_interpreter);
845 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
846 current->personality |= READ_IMPLIES_EXEC;
847
848 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
849 current->flags |= PF_RANDOMIZE;
850 arch_pick_mmap_layout(current->mm);
851
852 /* Do this so that we can load the interpreter, if need be. We will
853 change some of these later */
854 current->mm->free_area_cache = current->mm->mmap_base;
855 current->mm->cached_hole_size = 0;
856 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
857 executable_stack);
858 if (retval < 0) {
859 send_sig(SIGKILL, current, 0);
860 goto out_free_dentry;
861 }
862
863 current->mm->start_stack = bprm->p;
864
865 /* Now we do a little grungy work by mmaping the ELF image into
866 the correct location in memory. */
867 for(i = 0, elf_ppnt = elf_phdata;
868 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
869 int elf_prot = 0, elf_flags;
870 unsigned long k, vaddr;
871
872 if (elf_ppnt->p_type != PT_LOAD)
873 continue;
874
875 if (unlikely (elf_brk > elf_bss)) {
876 unsigned long nbyte;
877
878 /* There was a PT_LOAD segment with p_memsz > p_filesz
879 before this one. Map anonymous pages, if needed,
880 and clear the area. */
881 retval = set_brk (elf_bss + load_bias,
882 elf_brk + load_bias);
883 if (retval) {
884 send_sig(SIGKILL, current, 0);
885 goto out_free_dentry;
886 }
887 nbyte = ELF_PAGEOFFSET(elf_bss);
888 if (nbyte) {
889 nbyte = ELF_MIN_ALIGN - nbyte;
890 if (nbyte > elf_brk - elf_bss)
891 nbyte = elf_brk - elf_bss;
892 if (clear_user((void __user *)elf_bss +
893 load_bias, nbyte)) {
894 /*
895 * This bss-zeroing can fail if the ELF
896 * file specifies odd protections. So
897 * we don't check the return value
898 */
899 }
900 }
901 }
902
903 if (elf_ppnt->p_flags & PF_R)
904 elf_prot |= PROT_READ;
905 if (elf_ppnt->p_flags & PF_W)
906 elf_prot |= PROT_WRITE;
907 if (elf_ppnt->p_flags & PF_X)
908 elf_prot |= PROT_EXEC;
909
910 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
911
912 vaddr = elf_ppnt->p_vaddr;
913 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
914 elf_flags |= MAP_FIXED;
915 } else if (loc->elf_ex.e_type == ET_DYN) {
916 /* Try and get dynamic programs out of the way of the
917 * default mmap base, as well as whatever program they
918 * might try to exec. This is because the brk will
919 * follow the loader, and is not movable. */
920 #ifdef CONFIG_X86
921 load_bias = 0;
922 #else
923 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
924 #endif
925 }
926
927 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
928 elf_prot, elf_flags,0);
929 if (BAD_ADDR(error)) {
930 send_sig(SIGKILL, current, 0);
931 retval = IS_ERR((void *)error) ?
932 PTR_ERR((void*)error) : -EINVAL;
933 goto out_free_dentry;
934 }
935
936 if (!load_addr_set) {
937 load_addr_set = 1;
938 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
939 if (loc->elf_ex.e_type == ET_DYN) {
940 load_bias += error -
941 ELF_PAGESTART(load_bias + vaddr);
942 load_addr += load_bias;
943 reloc_func_desc = load_bias;
944 }
945 }
946 k = elf_ppnt->p_vaddr;
947 if (k < start_code)
948 start_code = k;
949 if (start_data < k)
950 start_data = k;
951
952 /*
953 * Check to see if the section's size will overflow the
954 * allowed task size. Note that p_filesz must always be
955 * <= p_memsz so it is only necessary to check p_memsz.
956 */
957 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
958 elf_ppnt->p_memsz > TASK_SIZE ||
959 TASK_SIZE - elf_ppnt->p_memsz < k) {
960 /* set_brk can never work. Avoid overflows. */
961 send_sig(SIGKILL, current, 0);
962 retval = -EINVAL;
963 goto out_free_dentry;
964 }
965
966 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
967
968 if (k > elf_bss)
969 elf_bss = k;
970 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 end_code = k;
972 if (end_data < k)
973 end_data = k;
974 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
975 if (k > elf_brk)
976 elf_brk = k;
977 }
978
979 loc->elf_ex.e_entry += load_bias;
980 elf_bss += load_bias;
981 elf_brk += load_bias;
982 start_code += load_bias;
983 end_code += load_bias;
984 start_data += load_bias;
985 end_data += load_bias;
986
987 /* Calling set_brk effectively mmaps the pages that we need
988 * for the bss and break sections. We must do this before
989 * mapping in the interpreter, to make sure it doesn't wind
990 * up getting placed where the bss needs to go.
991 */
992 retval = set_brk(elf_bss, elf_brk);
993 if (retval) {
994 send_sig(SIGKILL, current, 0);
995 goto out_free_dentry;
996 }
997 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
998 send_sig(SIGSEGV, current, 0);
999 retval = -EFAULT; /* Nobody gets to see this, but.. */
1000 goto out_free_dentry;
1001 }
1002
1003 if (elf_interpreter) {
1004 if (interpreter_type == INTERPRETER_AOUT) {
1005 elf_entry = load_aout_interp(&loc->interp_ex,
1006 interpreter);
1007 } else {
1008 unsigned long uninitialized_var(interp_map_addr);
1009
1010 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1011 interpreter,
1012 &interp_map_addr,
1013 load_bias);
1014 if (!BAD_ADDR(elf_entry)) {
1015 /*
1016 * load_elf_interp() returns relocation
1017 * adjustment
1018 */
1019 interp_load_addr = elf_entry;
1020 elf_entry += loc->interp_elf_ex.e_entry;
1021 }
1022 }
1023 if (BAD_ADDR(elf_entry)) {
1024 force_sig(SIGSEGV, current);
1025 retval = IS_ERR((void *)elf_entry) ?
1026 (int)elf_entry : -EINVAL;
1027 goto out_free_dentry;
1028 }
1029 reloc_func_desc = interp_load_addr;
1030
1031 allow_write_access(interpreter);
1032 fput(interpreter);
1033 kfree(elf_interpreter);
1034 } else {
1035 elf_entry = loc->elf_ex.e_entry;
1036 if (BAD_ADDR(elf_entry)) {
1037 force_sig(SIGSEGV, current);
1038 retval = -EINVAL;
1039 goto out_free_dentry;
1040 }
1041 }
1042
1043 kfree(elf_phdata);
1044
1045 if (interpreter_type != INTERPRETER_AOUT)
1046 sys_close(elf_exec_fileno);
1047
1048 set_binfmt(&elf_format);
1049
1050 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1051 retval = arch_setup_additional_pages(bprm, executable_stack);
1052 if (retval < 0) {
1053 send_sig(SIGKILL, current, 0);
1054 goto out;
1055 }
1056 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1057
1058 compute_creds(bprm);
1059 current->flags &= ~PF_FORKNOEXEC;
1060 retval = create_elf_tables(bprm, &loc->elf_ex,
1061 (interpreter_type == INTERPRETER_AOUT),
1062 load_addr, interp_load_addr);
1063 if (retval < 0) {
1064 send_sig(SIGKILL, current, 0);
1065 goto out;
1066 }
1067 /* N.B. passed_fileno might not be initialized? */
1068 if (interpreter_type == INTERPRETER_AOUT)
1069 current->mm->arg_start += strlen(passed_fileno) + 1;
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1075
1076 if (current->personality & MMAP_PAGE_ZERO) {
1077 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1078 and some applications "depend" upon this behavior.
1079 Since we do not have the power to recompile these, we
1080 emulate the SVr4 behavior. Sigh. */
1081 down_write(&current->mm->mmap_sem);
1082 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1083 MAP_FIXED | MAP_PRIVATE, 0);
1084 up_write(&current->mm->mmap_sem);
1085 }
1086
1087 #ifdef ELF_PLAT_INIT
1088 /*
1089 * The ABI may specify that certain registers be set up in special
1090 * ways (on i386 %edx is the address of a DT_FINI function, for
1091 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1092 * that the e_entry field is the address of the function descriptor
1093 * for the startup routine, rather than the address of the startup
1094 * routine itself. This macro performs whatever initialization to
1095 * the regs structure is required as well as any relocations to the
1096 * function descriptor entries when executing dynamically links apps.
1097 */
1098 ELF_PLAT_INIT(regs, reloc_func_desc);
1099 #endif
1100
1101 start_thread(regs, elf_entry, bprm->p);
1102 if (unlikely(current->ptrace & PT_PTRACED)) {
1103 if (current->ptrace & PT_TRACE_EXEC)
1104 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
1105 else
1106 send_sig(SIGTRAP, current, 0);
1107 }
1108 retval = 0;
1109 out:
1110 kfree(loc);
1111 out_ret:
1112 return retval;
1113
1114 /* error cleanup */
1115 out_free_dentry:
1116 allow_write_access(interpreter);
1117 if (interpreter)
1118 fput(interpreter);
1119 out_free_interp:
1120 kfree(elf_interpreter);
1121 out_free_file:
1122 sys_close(elf_exec_fileno);
1123 out_free_fh:
1124 if (files)
1125 reset_files_struct(current, files);
1126 out_free_ph:
1127 kfree(elf_phdata);
1128 goto out;
1129 }
1130
1131 /* This is really simpleminded and specialized - we are loading an
1132 a.out library that is given an ELF header. */
1133 static int load_elf_library(struct file *file)
1134 {
1135 struct elf_phdr *elf_phdata;
1136 struct elf_phdr *eppnt;
1137 unsigned long elf_bss, bss, len;
1138 int retval, error, i, j;
1139 struct elfhdr elf_ex;
1140
1141 error = -ENOEXEC;
1142 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1143 if (retval != sizeof(elf_ex))
1144 goto out;
1145
1146 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1147 goto out;
1148
1149 /* First of all, some simple consistency checks */
1150 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1151 !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1152 goto out;
1153
1154 /* Now read in all of the header information */
1155
1156 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1157 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1158
1159 error = -ENOMEM;
1160 elf_phdata = kmalloc(j, GFP_KERNEL);
1161 if (!elf_phdata)
1162 goto out;
1163
1164 eppnt = elf_phdata;
1165 error = -ENOEXEC;
1166 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1167 if (retval != j)
1168 goto out_free_ph;
1169
1170 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1171 if ((eppnt + i)->p_type == PT_LOAD)
1172 j++;
1173 if (j != 1)
1174 goto out_free_ph;
1175
1176 while (eppnt->p_type != PT_LOAD)
1177 eppnt++;
1178
1179 /* Now use mmap to map the library into memory. */
1180 down_write(&current->mm->mmap_sem);
1181 error = do_mmap(file,
1182 ELF_PAGESTART(eppnt->p_vaddr),
1183 (eppnt->p_filesz +
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1185 PROT_READ | PROT_WRITE | PROT_EXEC,
1186 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1187 (eppnt->p_offset -
1188 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1189 up_write(&current->mm->mmap_sem);
1190 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1191 goto out_free_ph;
1192
1193 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1194 if (padzero(elf_bss)) {
1195 error = -EFAULT;
1196 goto out_free_ph;
1197 }
1198
1199 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1200 ELF_MIN_ALIGN - 1);
1201 bss = eppnt->p_memsz + eppnt->p_vaddr;
1202 if (bss > len) {
1203 down_write(&current->mm->mmap_sem);
1204 do_brk(len, bss - len);
1205 up_write(&current->mm->mmap_sem);
1206 }
1207 error = 0;
1208
1209 out_free_ph:
1210 kfree(elf_phdata);
1211 out:
1212 return error;
1213 }
1214
1215 /*
1216 * Note that some platforms still use traditional core dumps and not
1217 * the ELF core dump. Each platform can select it as appropriate.
1218 */
1219 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1220
1221 /*
1222 * ELF core dumper
1223 *
1224 * Modelled on fs/exec.c:aout_core_dump()
1225 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1226 */
1227 /*
1228 * These are the only things you should do on a core-file: use only these
1229 * functions to write out all the necessary info.
1230 */
1231 static int dump_write(struct file *file, const void *addr, int nr)
1232 {
1233 return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1234 }
1235
1236 static int dump_seek(struct file *file, loff_t off)
1237 {
1238 if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1239 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1240 return 0;
1241 } else {
1242 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1243 if (!buf)
1244 return 0;
1245 while (off > 0) {
1246 unsigned long n = off;
1247 if (n > PAGE_SIZE)
1248 n = PAGE_SIZE;
1249 if (!dump_write(file, buf, n))
1250 return 0;
1251 off -= n;
1252 }
1253 free_page((unsigned long)buf);
1254 }
1255 return 1;
1256 }
1257
1258 /*
1259 * Decide whether a segment is worth dumping; default is yes to be
1260 * sure (missing info is worse than too much; etc).
1261 * Personally I'd include everything, and use the coredump limit...
1262 *
1263 * I think we should skip something. But I am not sure how. H.J.
1264 */
1265 static int maydump(struct vm_area_struct *vma)
1266 {
1267 /* The vma can be set up to tell us the answer directly. */
1268 if (vma->vm_flags & VM_ALWAYSDUMP)
1269 return 1;
1270
1271 /* Do not dump I/O mapped devices or special mappings */
1272 if (vma->vm_flags & (VM_IO | VM_RESERVED))
1273 return 0;
1274
1275 /* Dump shared memory only if mapped from an anonymous file. */
1276 if (vma->vm_flags & VM_SHARED)
1277 return vma->vm_file->f_path.dentry->d_inode->i_nlink == 0;
1278
1279 /* If it hasn't been written to, don't write it out */
1280 if (!vma->anon_vma)
1281 return 0;
1282
1283 return 1;
1284 }
1285
1286 /* An ELF note in memory */
1287 struct memelfnote
1288 {
1289 const char *name;
1290 int type;
1291 unsigned int datasz;
1292 void *data;
1293 };
1294
1295 static int notesize(struct memelfnote *en)
1296 {
1297 int sz;
1298
1299 sz = sizeof(struct elf_note);
1300 sz += roundup(strlen(en->name) + 1, 4);
1301 sz += roundup(en->datasz, 4);
1302
1303 return sz;
1304 }
1305
1306 #define DUMP_WRITE(addr, nr, foffset) \
1307 do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1308
1309 static int alignfile(struct file *file, loff_t *foffset)
1310 {
1311 static const char buf[4] = { 0, };
1312 DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1313 return 1;
1314 }
1315
1316 static int writenote(struct memelfnote *men, struct file *file,
1317 loff_t *foffset)
1318 {
1319 struct elf_note en;
1320 en.n_namesz = strlen(men->name) + 1;
1321 en.n_descsz = men->datasz;
1322 en.n_type = men->type;
1323
1324 DUMP_WRITE(&en, sizeof(en), foffset);
1325 DUMP_WRITE(men->name, en.n_namesz, foffset);
1326 if (!alignfile(file, foffset))
1327 return 0;
1328 DUMP_WRITE(men->data, men->datasz, foffset);
1329 if (!alignfile(file, foffset))
1330 return 0;
1331
1332 return 1;
1333 }
1334 #undef DUMP_WRITE
1335
1336 #define DUMP_WRITE(addr, nr) \
1337 if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1338 goto end_coredump;
1339 #define DUMP_SEEK(off) \
1340 if (!dump_seek(file, (off))) \
1341 goto end_coredump;
1342
1343 static void fill_elf_header(struct elfhdr *elf, int segs)
1344 {
1345 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1346 elf->e_ident[EI_CLASS] = ELF_CLASS;
1347 elf->e_ident[EI_DATA] = ELF_DATA;
1348 elf->e_ident[EI_VERSION] = EV_CURRENT;
1349 elf->e_ident[EI_OSABI] = ELF_OSABI;
1350 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD);
1351
1352 elf->e_type = ET_CORE;
1353 elf->e_machine = ELF_ARCH;
1354 elf->e_version = EV_CURRENT;
1355 elf->e_entry = 0;
1356 elf->e_phoff = sizeof(struct elfhdr);
1357 elf->e_shoff = 0;
1358 elf->e_flags = ELF_CORE_EFLAGS;
1359 elf->e_ehsize = sizeof(struct elfhdr);
1360 elf->e_phentsize = sizeof(struct elf_phdr);
1361 elf->e_phnum = segs;
1362 elf->e_shentsize = 0;
1363 elf->e_shnum = 0;
1364 elf->e_shstrndx = 0;
1365 return;
1366 }
1367
1368 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1369 {
1370 phdr->p_type = PT_NOTE;
1371 phdr->p_offset = offset;
1372 phdr->p_vaddr = 0;
1373 phdr->p_paddr = 0;
1374 phdr->p_filesz = sz;
1375 phdr->p_memsz = 0;
1376 phdr->p_flags = 0;
1377 phdr->p_align = 0;
1378 return;
1379 }
1380
1381 static void fill_note(struct memelfnote *note, const char *name, int type,
1382 unsigned int sz, void *data)
1383 {
1384 note->name = name;
1385 note->type = type;
1386 note->datasz = sz;
1387 note->data = data;
1388 return;
1389 }
1390
1391 /*
1392 * fill up all the fields in prstatus from the given task struct, except
1393 * registers which need to be filled up separately.
1394 */
1395 static void fill_prstatus(struct elf_prstatus *prstatus,
1396 struct task_struct *p, long signr)
1397 {
1398 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1399 prstatus->pr_sigpend = p->pending.signal.sig[0];
1400 prstatus->pr_sighold = p->blocked.sig[0];
1401 prstatus->pr_pid = p->pid;
1402 prstatus->pr_ppid = p->parent->pid;
1403 prstatus->pr_pgrp = process_group(p);
1404 prstatus->pr_sid = process_session(p);
1405 if (thread_group_leader(p)) {
1406 /*
1407 * This is the record for the group leader. Add in the
1408 * cumulative times of previous dead threads. This total
1409 * won't include the time of each live thread whose state
1410 * is included in the core dump. The final total reported
1411 * to our parent process when it calls wait4 will include
1412 * those sums as well as the little bit more time it takes
1413 * this and each other thread to finish dying after the
1414 * core dump synchronization phase.
1415 */
1416 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1417 &prstatus->pr_utime);
1418 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1419 &prstatus->pr_stime);
1420 } else {
1421 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1422 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1423 }
1424 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1425 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1426 }
1427
1428 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1429 struct mm_struct *mm)
1430 {
1431 unsigned int i, len;
1432
1433 /* first copy the parameters from user space */
1434 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1435
1436 len = mm->arg_end - mm->arg_start;
1437 if (len >= ELF_PRARGSZ)
1438 len = ELF_PRARGSZ-1;
1439 if (copy_from_user(&psinfo->pr_psargs,
1440 (const char __user *)mm->arg_start, len))
1441 return -EFAULT;
1442 for(i = 0; i < len; i++)
1443 if (psinfo->pr_psargs[i] == 0)
1444 psinfo->pr_psargs[i] = ' ';
1445 psinfo->pr_psargs[len] = 0;
1446
1447 psinfo->pr_pid = p->pid;
1448 psinfo->pr_ppid = p->parent->pid;
1449 psinfo->pr_pgrp = process_group(p);
1450 psinfo->pr_sid = process_session(p);
1451
1452 i = p->state ? ffz(~p->state) + 1 : 0;
1453 psinfo->pr_state = i;
1454 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1455 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1456 psinfo->pr_nice = task_nice(p);
1457 psinfo->pr_flag = p->flags;
1458 SET_UID(psinfo->pr_uid, p->uid);
1459 SET_GID(psinfo->pr_gid, p->gid);
1460 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1461
1462 return 0;
1463 }
1464
1465 /* Here is the structure in which status of each thread is captured. */
1466 struct elf_thread_status
1467 {
1468 struct list_head list;
1469 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1470 elf_fpregset_t fpu; /* NT_PRFPREG */
1471 struct task_struct *thread;
1472 #ifdef ELF_CORE_COPY_XFPREGS
1473 elf_fpxregset_t xfpu; /* NT_PRXFPREG */
1474 #endif
1475 struct memelfnote notes[3];
1476 int num_notes;
1477 };
1478
1479 /*
1480 * In order to add the specific thread information for the elf file format,
1481 * we need to keep a linked list of every threads pr_status and then create
1482 * a single section for them in the final core file.
1483 */
1484 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1485 {
1486 int sz = 0;
1487 struct task_struct *p = t->thread;
1488 t->num_notes = 0;
1489
1490 fill_prstatus(&t->prstatus, p, signr);
1491 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1492
1493 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1494 &(t->prstatus));
1495 t->num_notes++;
1496 sz += notesize(&t->notes[0]);
1497
1498 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1499 &t->fpu))) {
1500 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1501 &(t->fpu));
1502 t->num_notes++;
1503 sz += notesize(&t->notes[1]);
1504 }
1505
1506 #ifdef ELF_CORE_COPY_XFPREGS
1507 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1508 fill_note(&t->notes[2], "LINUX", NT_PRXFPREG, sizeof(t->xfpu),
1509 &t->xfpu);
1510 t->num_notes++;
1511 sz += notesize(&t->notes[2]);
1512 }
1513 #endif
1514 return sz;
1515 }
1516
1517 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1518 struct vm_area_struct *gate_vma)
1519 {
1520 struct vm_area_struct *ret = tsk->mm->mmap;
1521
1522 if (ret)
1523 return ret;
1524 return gate_vma;
1525 }
1526 /*
1527 * Helper function for iterating across a vma list. It ensures that the caller
1528 * will visit `gate_vma' prior to terminating the search.
1529 */
1530 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1531 struct vm_area_struct *gate_vma)
1532 {
1533 struct vm_area_struct *ret;
1534
1535 ret = this_vma->vm_next;
1536 if (ret)
1537 return ret;
1538 if (this_vma == gate_vma)
1539 return NULL;
1540 return gate_vma;
1541 }
1542
1543 /*
1544 * Actual dumper
1545 *
1546 * This is a two-pass process; first we find the offsets of the bits,
1547 * and then they are actually written out. If we run out of core limit
1548 * we just truncate.
1549 */
1550 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file)
1551 {
1552 #define NUM_NOTES 6
1553 int has_dumped = 0;
1554 mm_segment_t fs;
1555 int segs;
1556 size_t size = 0;
1557 int i;
1558 struct vm_area_struct *vma, *gate_vma;
1559 struct elfhdr *elf = NULL;
1560 loff_t offset = 0, dataoff, foffset;
1561 unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
1562 int numnote;
1563 struct memelfnote *notes = NULL;
1564 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1565 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1566 struct task_struct *g, *p;
1567 LIST_HEAD(thread_list);
1568 struct list_head *t;
1569 elf_fpregset_t *fpu = NULL;
1570 #ifdef ELF_CORE_COPY_XFPREGS
1571 elf_fpxregset_t *xfpu = NULL;
1572 #endif
1573 int thread_status_size = 0;
1574 elf_addr_t *auxv;
1575 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1576 int extra_notes_size;
1577 #endif
1578
1579 /*
1580 * We no longer stop all VM operations.
1581 *
1582 * This is because those proceses that could possibly change map_count
1583 * or the mmap / vma pages are now blocked in do_exit on current
1584 * finishing this core dump.
1585 *
1586 * Only ptrace can touch these memory addresses, but it doesn't change
1587 * the map_count or the pages allocated. So no possibility of crashing
1588 * exists while dumping the mm->vm_next areas to the core file.
1589 */
1590
1591 /* alloc memory for large data structures: too large to be on stack */
1592 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1593 if (!elf)
1594 goto cleanup;
1595 prstatus = kmalloc(sizeof(*prstatus), GFP_KERNEL);
1596 if (!prstatus)
1597 goto cleanup;
1598 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1599 if (!psinfo)
1600 goto cleanup;
1601 notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote), GFP_KERNEL);
1602 if (!notes)
1603 goto cleanup;
1604 fpu = kmalloc(sizeof(*fpu), GFP_KERNEL);
1605 if (!fpu)
1606 goto cleanup;
1607 #ifdef ELF_CORE_COPY_XFPREGS
1608 xfpu = kmalloc(sizeof(*xfpu), GFP_KERNEL);
1609 if (!xfpu)
1610 goto cleanup;
1611 #endif
1612
1613 if (signr) {
1614 struct elf_thread_status *tmp;
1615 rcu_read_lock();
1616 do_each_thread(g,p)
1617 if (current->mm == p->mm && current != p) {
1618 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC);
1619 if (!tmp) {
1620 rcu_read_unlock();
1621 goto cleanup;
1622 }
1623 tmp->thread = p;
1624 list_add(&tmp->list, &thread_list);
1625 }
1626 while_each_thread(g,p);
1627 rcu_read_unlock();
1628 list_for_each(t, &thread_list) {
1629 struct elf_thread_status *tmp;
1630 int sz;
1631
1632 tmp = list_entry(t, struct elf_thread_status, list);
1633 sz = elf_dump_thread_status(signr, tmp);
1634 thread_status_size += sz;
1635 }
1636 }
1637 /* now collect the dump for the current */
1638 memset(prstatus, 0, sizeof(*prstatus));
1639 fill_prstatus(prstatus, current, signr);
1640 elf_core_copy_regs(&prstatus->pr_reg, regs);
1641
1642 segs = current->mm->map_count;
1643 #ifdef ELF_CORE_EXTRA_PHDRS
1644 segs += ELF_CORE_EXTRA_PHDRS;
1645 #endif
1646
1647 gate_vma = get_gate_vma(current);
1648 if (gate_vma != NULL)
1649 segs++;
1650
1651 /* Set up header */
1652 fill_elf_header(elf, segs + 1); /* including notes section */
1653
1654 has_dumped = 1;
1655 current->flags |= PF_DUMPCORE;
1656
1657 /*
1658 * Set up the notes in similar form to SVR4 core dumps made
1659 * with info from their /proc.
1660 */
1661
1662 fill_note(notes + 0, "CORE", NT_PRSTATUS, sizeof(*prstatus), prstatus);
1663 fill_psinfo(psinfo, current->group_leader, current->mm);
1664 fill_note(notes + 1, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1665
1666 numnote = 2;
1667
1668 auxv = (elf_addr_t *)current->mm->saved_auxv;
1669
1670 i = 0;
1671 do
1672 i += 2;
1673 while (auxv[i - 2] != AT_NULL);
1674 fill_note(&notes[numnote++], "CORE", NT_AUXV,
1675 i * sizeof(elf_addr_t), auxv);
1676
1677 /* Try to dump the FPU. */
1678 if ((prstatus->pr_fpvalid =
1679 elf_core_copy_task_fpregs(current, regs, fpu)))
1680 fill_note(notes + numnote++,
1681 "CORE", NT_PRFPREG, sizeof(*fpu), fpu);
1682 #ifdef ELF_CORE_COPY_XFPREGS
1683 if (elf_core_copy_task_xfpregs(current, xfpu))
1684 fill_note(notes + numnote++,
1685 "LINUX", NT_PRXFPREG, sizeof(*xfpu), xfpu);
1686 #endif
1687
1688 fs = get_fs();
1689 set_fs(KERNEL_DS);
1690
1691 DUMP_WRITE(elf, sizeof(*elf));
1692 offset += sizeof(*elf); /* Elf header */
1693 offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1694 foffset = offset;
1695
1696 /* Write notes phdr entry */
1697 {
1698 struct elf_phdr phdr;
1699 int sz = 0;
1700
1701 for (i = 0; i < numnote; i++)
1702 sz += notesize(notes + i);
1703
1704 sz += thread_status_size;
1705
1706 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1707 extra_notes_size = ELF_CORE_EXTRA_NOTES_SIZE;
1708 sz += extra_notes_size;
1709 #endif
1710
1711 fill_elf_note_phdr(&phdr, sz, offset);
1712 offset += sz;
1713 DUMP_WRITE(&phdr, sizeof(phdr));
1714 }
1715
1716 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1717
1718 /* Write program headers for segments dump */
1719 for (vma = first_vma(current, gate_vma); vma != NULL;
1720 vma = next_vma(vma, gate_vma)) {
1721 struct elf_phdr phdr;
1722 size_t sz;
1723
1724 sz = vma->vm_end - vma->vm_start;
1725
1726 phdr.p_type = PT_LOAD;
1727 phdr.p_offset = offset;
1728 phdr.p_vaddr = vma->vm_start;
1729 phdr.p_paddr = 0;
1730 phdr.p_filesz = maydump(vma) ? sz : 0;
1731 phdr.p_memsz = sz;
1732 offset += phdr.p_filesz;
1733 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1734 if (vma->vm_flags & VM_WRITE)
1735 phdr.p_flags |= PF_W;
1736 if (vma->vm_flags & VM_EXEC)
1737 phdr.p_flags |= PF_X;
1738 phdr.p_align = ELF_EXEC_PAGESIZE;
1739
1740 DUMP_WRITE(&phdr, sizeof(phdr));
1741 }
1742
1743 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1744 ELF_CORE_WRITE_EXTRA_PHDRS;
1745 #endif
1746
1747 /* write out the notes section */
1748 for (i = 0; i < numnote; i++)
1749 if (!writenote(notes + i, file, &foffset))
1750 goto end_coredump;
1751
1752 #ifdef ELF_CORE_WRITE_EXTRA_NOTES
1753 ELF_CORE_WRITE_EXTRA_NOTES;
1754 foffset += extra_notes_size;
1755 #endif
1756
1757 /* write out the thread status notes section */
1758 list_for_each(t, &thread_list) {
1759 struct elf_thread_status *tmp =
1760 list_entry(t, struct elf_thread_status, list);
1761
1762 for (i = 0; i < tmp->num_notes; i++)
1763 if (!writenote(&tmp->notes[i], file, &foffset))
1764 goto end_coredump;
1765 }
1766
1767 /* Align to page */
1768 DUMP_SEEK(dataoff - foffset);
1769
1770 for (vma = first_vma(current, gate_vma); vma != NULL;
1771 vma = next_vma(vma, gate_vma)) {
1772 unsigned long addr;
1773
1774 if (!maydump(vma))
1775 continue;
1776
1777 for (addr = vma->vm_start;
1778 addr < vma->vm_end;
1779 addr += PAGE_SIZE) {
1780 struct page *page;
1781 struct vm_area_struct *vma;
1782
1783 if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1784 &page, &vma) <= 0) {
1785 DUMP_SEEK(PAGE_SIZE);
1786 } else {
1787 if (page == ZERO_PAGE(addr)) {
1788 if (!dump_seek(file, PAGE_SIZE)) {
1789 page_cache_release(page);
1790 goto end_coredump;
1791 }
1792 } else {
1793 void *kaddr;
1794 flush_cache_page(vma, addr,
1795 page_to_pfn(page));
1796 kaddr = kmap(page);
1797 if ((size += PAGE_SIZE) > limit ||
1798 !dump_write(file, kaddr,
1799 PAGE_SIZE)) {
1800 kunmap(page);
1801 page_cache_release(page);
1802 goto end_coredump;
1803 }
1804 kunmap(page);
1805 }
1806 page_cache_release(page);
1807 }
1808 }
1809 }
1810
1811 #ifdef ELF_CORE_WRITE_EXTRA_DATA
1812 ELF_CORE_WRITE_EXTRA_DATA;
1813 #endif
1814
1815 end_coredump:
1816 set_fs(fs);
1817
1818 cleanup:
1819 while (!list_empty(&thread_list)) {
1820 struct list_head *tmp = thread_list.next;
1821 list_del(tmp);
1822 kfree(list_entry(tmp, struct elf_thread_status, list));
1823 }
1824
1825 kfree(elf);
1826 kfree(prstatus);
1827 kfree(psinfo);
1828 kfree(notes);
1829 kfree(fpu);
1830 #ifdef ELF_CORE_COPY_XFPREGS
1831 kfree(xfpu);
1832 #endif
1833 return has_dumped;
1834 #undef NUM_NOTES
1835 }
1836
1837 #endif /* USE_ELF_CORE_DUMP */
1838
1839 static int __init init_elf_binfmt(void)
1840 {
1841 return register_binfmt(&elf_format);
1842 }
1843
1844 static void __exit exit_elf_binfmt(void)
1845 {
1846 /* Remove the COFF and ELF loaders. */
1847 unregister_binfmt(&elf_format);
1848 }
1849
1850 core_initcall(init_elf_binfmt);
1851 module_exit(exit_elf_binfmt);
1852 MODULE_LICENSE("GPL");
This page took 0.071039 seconds and 5 git commands to generate.