ia64/xen-unstable

view linux-2.4.27-xen-sparse/fs/exec.c @ 2694:82d4a403106d

bitkeeper revision 1.1159.122.2 (4177d249OfMUUU4w3P-8H14U7koYXQ)

Remove netbsd domain builder.
author cl349@freefall.cl.cam.ac.uk
date Thu Oct 21 15:14:17 2004 +0000 (2004-10-21)
parents 869c20f2977b
children
line source
1 /*
2 * linux/fs/exec.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * #!-checking implemented by tytso.
9 */
10 /*
11 * Demand-loading implemented 01.12.91 - no need to read anything but
12 * the header into memory. The inode of the executable is put into
13 * "current->executable", and page faults do the actual loading. Clean.
14 *
15 * Once more I can proudly say that linux stood up to being changed: it
16 * was less than 2 hours work to get demand-loading completely implemented.
17 *
18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary
22 * formats.
23 */
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/pagemap.h>
35 #include <linux/highmem.h>
36 #include <linux/spinlock.h>
37 #include <linux/personality.h>
38 #include <linux/swap.h>
39 #include <linux/utsname.h>
40 #define __NO_VERSION__
41 #include <linux/module.h>
43 #include <asm/uaccess.h>
44 #include <asm/pgalloc.h>
45 #include <asm/mmu_context.h>
47 #ifdef CONFIG_KMOD
48 #include <linux/kmod.h>
49 #endif
51 int core_uses_pid;
52 char core_pattern[65] = "core";
53 int core_setuid_ok = 0;
54 /* The maximal length of core_pattern is also specified in sysctl.c */
56 static struct linux_binfmt *formats;
57 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
59 int register_binfmt(struct linux_binfmt * fmt)
60 {
61 struct linux_binfmt ** tmp = &formats;
63 if (!fmt)
64 return -EINVAL;
65 if (fmt->next)
66 return -EBUSY;
67 write_lock(&binfmt_lock);
68 while (*tmp) {
69 if (fmt == *tmp) {
70 write_unlock(&binfmt_lock);
71 return -EBUSY;
72 }
73 tmp = &(*tmp)->next;
74 }
75 fmt->next = formats;
76 formats = fmt;
77 write_unlock(&binfmt_lock);
78 return 0;
79 }
81 int unregister_binfmt(struct linux_binfmt * fmt)
82 {
83 struct linux_binfmt ** tmp = &formats;
85 write_lock(&binfmt_lock);
86 while (*tmp) {
87 if (fmt == *tmp) {
88 *tmp = fmt->next;
89 write_unlock(&binfmt_lock);
90 return 0;
91 }
92 tmp = &(*tmp)->next;
93 }
94 write_unlock(&binfmt_lock);
95 return -EINVAL;
96 }
98 static inline void put_binfmt(struct linux_binfmt * fmt)
99 {
100 if (fmt->module)
101 __MOD_DEC_USE_COUNT(fmt->module);
102 }
104 /*
105 * Note that a shared library must be both readable and executable due to
106 * security reasons.
107 *
108 * Also note that we take the address to load from from the file itself.
109 */
110 asmlinkage long sys_uselib(const char * library)
111 {
112 struct file * file;
113 struct nameidata nd;
114 int error;
116 error = user_path_walk(library, &nd);
117 if (error)
118 goto out;
120 error = -EINVAL;
121 if (!S_ISREG(nd.dentry->d_inode->i_mode))
122 goto exit;
124 error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
125 if (error)
126 goto exit;
128 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
129 error = PTR_ERR(file);
130 if (IS_ERR(file))
131 goto out;
133 error = -ENOEXEC;
134 if(file->f_op && file->f_op->read) {
135 struct linux_binfmt * fmt;
137 read_lock(&binfmt_lock);
138 for (fmt = formats ; fmt ; fmt = fmt->next) {
139 if (!fmt->load_shlib)
140 continue;
141 if (!try_inc_mod_count(fmt->module))
142 continue;
143 read_unlock(&binfmt_lock);
144 error = fmt->load_shlib(file);
145 read_lock(&binfmt_lock);
146 put_binfmt(fmt);
147 if (error != -ENOEXEC)
148 break;
149 }
150 read_unlock(&binfmt_lock);
151 }
152 fput(file);
153 out:
154 return error;
155 exit:
156 path_release(&nd);
157 goto out;
158 }
160 /*
161 * count() counts the number of arguments/envelopes
162 */
163 static int count(char ** argv, int max)
164 {
165 int i = 0;
167 if (argv != NULL) {
168 for (;;) {
169 char * p;
171 if (get_user(p, argv))
172 return -EFAULT;
173 if (!p)
174 break;
175 argv++;
176 if(++i > max)
177 return -E2BIG;
178 }
179 }
180 return i;
181 }
183 /*
184 * 'copy_strings()' copies argument/envelope strings from user
185 * memory to free pages in kernel mem. These are in a format ready
186 * to be put directly into the top of new user memory.
187 */
188 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
189 {
190 struct page *kmapped_page = NULL;
191 char *kaddr = NULL;
192 int ret;
194 while (argc-- > 0) {
195 char *str;
196 int len;
197 unsigned long pos;
199 if (get_user(str, argv+argc) ||
200 !(len = strnlen_user(str, bprm->p))) {
201 ret = -EFAULT;
202 goto out;
203 }
205 if (bprm->p < len) {
206 ret = -E2BIG;
207 goto out;
208 }
210 bprm->p -= len;
211 /* XXX: add architecture specific overflow check here. */
212 pos = bprm->p;
214 while (len > 0) {
215 int i, new, err;
216 int offset, bytes_to_copy;
217 struct page *page;
219 offset = pos % PAGE_SIZE;
220 i = pos/PAGE_SIZE;
221 page = bprm->page[i];
222 new = 0;
223 if (!page) {
224 page = alloc_page(GFP_HIGHUSER);
225 bprm->page[i] = page;
226 if (!page) {
227 ret = -ENOMEM;
228 goto out;
229 }
230 new = 1;
231 }
233 if (page != kmapped_page) {
234 if (kmapped_page)
235 kunmap(kmapped_page);
236 kmapped_page = page;
237 kaddr = kmap(kmapped_page);
238 }
239 if (new && offset)
240 memset(kaddr, 0, offset);
241 bytes_to_copy = PAGE_SIZE - offset;
242 if (bytes_to_copy > len) {
243 bytes_to_copy = len;
244 if (new)
245 memset(kaddr+offset+len, 0,
246 PAGE_SIZE-offset-len);
247 }
248 err = copy_from_user(kaddr+offset, str, bytes_to_copy);
249 if (err) {
250 ret = -EFAULT;
251 goto out;
252 }
254 pos += bytes_to_copy;
255 str += bytes_to_copy;
256 len -= bytes_to_copy;
257 }
258 }
259 ret = 0;
260 out:
261 if (kmapped_page)
262 kunmap(kmapped_page);
263 return ret;
264 }
266 /*
267 * Like copy_strings, but get argv and its values from kernel memory.
268 */
269 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
270 {
271 int r;
272 mm_segment_t oldfs = get_fs();
273 set_fs(KERNEL_DS);
274 r = copy_strings(argc, argv, bprm);
275 set_fs(oldfs);
276 return r;
277 }
279 /*
280 * This routine is used to map in a page into an address space: needed by
281 * execve() for the initial stack and environment pages.
282 *
283 * tsk->mmap_sem is held for writing.
284 */
285 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
286 {
287 pgd_t * pgd;
288 pmd_t * pmd;
289 pte_t * pte;
290 struct vm_area_struct *vma;
291 pgprot_t prot = PAGE_COPY;
293 if (page_count(page) != 1)
294 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
295 pgd = pgd_offset(tsk->mm, address);
297 spin_lock(&tsk->mm->page_table_lock);
298 pmd = pmd_alloc(tsk->mm, pgd, address);
299 if (!pmd)
300 goto out;
301 pte = pte_alloc(tsk->mm, pmd, address);
302 if (!pte)
303 goto out;
304 if (!pte_none(*pte))
305 goto out;
306 lru_cache_add(page);
307 flush_dcache_page(page);
308 flush_page_to_ram(page);
309 /* lookup is cheap because there is only a single entry in the list */
310 vma = find_vma(tsk->mm, address);
311 if (vma)
312 prot = vma->vm_page_prot;
313 set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
314 XEN_flush_page_update_queue();
315 tsk->mm->rss++;
316 spin_unlock(&tsk->mm->page_table_lock);
318 /* no need for flush_tlb */
319 return;
320 out:
321 spin_unlock(&tsk->mm->page_table_lock);
322 __free_page(page);
323 force_sig(SIGKILL, tsk);
324 return;
325 }
327 int setup_arg_pages(struct linux_binprm *bprm)
328 {
329 unsigned long stack_base;
330 struct vm_area_struct *mpnt;
331 int i;
333 stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
335 bprm->p += stack_base;
336 if (bprm->loader)
337 bprm->loader += stack_base;
338 bprm->exec += stack_base;
340 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
341 if (!mpnt)
342 return -ENOMEM;
344 down_write(&current->mm->mmap_sem);
345 {
346 mpnt->vm_mm = current->mm;
347 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
348 mpnt->vm_end = STACK_TOP;
349 mpnt->vm_flags = VM_STACK_FLAGS;
350 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
351 mpnt->vm_ops = NULL;
352 mpnt->vm_pgoff = 0;
353 mpnt->vm_file = NULL;
354 mpnt->vm_private_data = (void *) 0;
355 insert_vm_struct(current->mm, mpnt);
356 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
357 }
359 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
360 struct page *page = bprm->page[i];
361 if (page) {
362 bprm->page[i] = NULL;
363 put_dirty_page(current,page,stack_base);
364 }
365 stack_base += PAGE_SIZE;
366 }
367 up_write(&current->mm->mmap_sem);
369 return 0;
370 }
372 struct file *open_exec(const char *name)
373 {
374 struct nameidata nd;
375 struct inode *inode;
376 struct file *file;
377 int err = 0;
379 err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
380 file = ERR_PTR(err);
381 if (!err) {
382 inode = nd.dentry->d_inode;
383 file = ERR_PTR(-EACCES);
384 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
385 S_ISREG(inode->i_mode)) {
386 int err = permission(inode, MAY_EXEC);
387 if (!err && !(inode->i_mode & 0111))
388 err = -EACCES;
389 file = ERR_PTR(err);
390 if (!err) {
391 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
392 if (!IS_ERR(file)) {
393 err = deny_write_access(file);
394 if (err) {
395 fput(file);
396 file = ERR_PTR(err);
397 }
398 }
399 out:
400 return file;
401 }
402 }
403 path_release(&nd);
404 }
405 goto out;
406 }
408 int kernel_read(struct file *file, unsigned long offset,
409 char * addr, unsigned long count)
410 {
411 mm_segment_t old_fs;
412 loff_t pos = offset;
413 int result = -ENOSYS;
415 if (!file->f_op->read)
416 goto fail;
417 old_fs = get_fs();
418 set_fs(get_ds());
419 result = file->f_op->read(file, addr, count, &pos);
420 set_fs(old_fs);
421 fail:
422 return result;
423 }
425 static int exec_mmap(void)
426 {
427 struct mm_struct * mm, * old_mm;
429 old_mm = current->mm;
431 if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
432 mm_release();
433 down_write(&old_mm->mmap_sem);
434 exit_mmap(old_mm);
435 up_write(&old_mm->mmap_sem);
436 return 0;
437 }
440 mm = mm_alloc();
441 if (mm) {
442 struct mm_struct *active_mm;
444 if (init_new_context(current, mm)) {
445 mmdrop(mm);
446 return -ENOMEM;
447 }
449 /* Add it to the list of mm's */
450 spin_lock(&mmlist_lock);
451 list_add(&mm->mmlist, &init_mm.mmlist);
452 mmlist_nr++;
453 spin_unlock(&mmlist_lock);
455 task_lock(current);
456 active_mm = current->active_mm;
457 current->mm = mm;
458 current->active_mm = mm;
459 task_unlock(current);
460 activate_mm(active_mm, mm);
461 mm_release();
462 if (old_mm) {
463 if (active_mm != old_mm) BUG();
464 mmput(old_mm);
465 return 0;
466 }
467 mmdrop(active_mm);
468 return 0;
469 }
470 return -ENOMEM;
471 }
473 /*
474 * This function makes sure the current process has its own signal table,
475 * so that flush_signal_handlers can later reset the handlers without
476 * disturbing other processes. (Other processes might share the signal
477 * table via the CLONE_SIGNAL option to clone().)
478 */
480 static inline int make_private_signals(void)
481 {
482 struct signal_struct * newsig;
484 if (atomic_read(&current->sig->count) <= 1)
485 return 0;
486 newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
487 if (newsig == NULL)
488 return -ENOMEM;
489 spin_lock_init(&newsig->siglock);
490 atomic_set(&newsig->count, 1);
491 memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
492 spin_lock_irq(&current->sigmask_lock);
493 current->sig = newsig;
494 spin_unlock_irq(&current->sigmask_lock);
495 return 0;
496 }
498 /*
499 * If make_private_signals() made a copy of the signal table, decrement the
500 * refcount of the original table, and free it if necessary.
501 * We don't do that in make_private_signals() so that we can back off
502 * in flush_old_exec() if an error occurs after calling make_private_signals().
503 */
505 static inline void release_old_signals(struct signal_struct * oldsig)
506 {
507 if (current->sig == oldsig)
508 return;
509 if (atomic_dec_and_test(&oldsig->count))
510 kmem_cache_free(sigact_cachep, oldsig);
511 }
513 /*
514 * These functions flushes out all traces of the currently running executable
515 * so that a new one can be started
516 */
518 static inline void flush_old_files(struct files_struct * files)
519 {
520 long j = -1;
522 write_lock(&files->file_lock);
523 for (;;) {
524 unsigned long set, i;
526 j++;
527 i = j * __NFDBITS;
528 if (i >= files->max_fds || i >= files->max_fdset)
529 break;
530 set = files->close_on_exec->fds_bits[j];
531 if (!set)
532 continue;
533 files->close_on_exec->fds_bits[j] = 0;
534 write_unlock(&files->file_lock);
535 for ( ; set ; i++,set >>= 1) {
536 if (set & 1) {
537 sys_close(i);
538 }
539 }
540 write_lock(&files->file_lock);
542 }
543 write_unlock(&files->file_lock);
544 }
546 /*
547 * An execve() will automatically "de-thread" the process.
548 * Note: we don't have to hold the tasklist_lock to test
549 * whether we migth need to do this. If we're not part of
550 * a thread group, there is no way we can become one
551 * dynamically. And if we are, we only need to protect the
552 * unlink - even if we race with the last other thread exit,
553 * at worst the list_del_init() might end up being a no-op.
554 */
555 static inline void de_thread(struct task_struct *tsk)
556 {
557 if (!list_empty(&tsk->thread_group)) {
558 write_lock_irq(&tasklist_lock);
559 list_del_init(&tsk->thread_group);
560 write_unlock_irq(&tasklist_lock);
561 }
563 /* Minor oddity: this might stay the same. */
564 tsk->tgid = tsk->pid;
565 }
567 int flush_old_exec(struct linux_binprm * bprm)
568 {
569 char * name;
570 int i, ch, retval;
571 struct signal_struct * oldsig;
572 struct files_struct * files;
574 /*
575 * Make sure we have a private signal table
576 */
577 oldsig = current->sig;
578 retval = make_private_signals();
579 if (retval) goto flush_failed;
581 /*
582 * Make sure we have private file handles. Ask the
583 * fork helper to do the work for us and the exit
584 * helper to do the cleanup of the old one.
585 */
587 files = current->files; /* refcounted so safe to hold */
588 retval = unshare_files();
589 if(retval)
590 goto flush_failed;
592 /*
593 * Release all of the old mmap stuff
594 */
595 retval = exec_mmap();
596 if (retval) goto mmap_failed;
598 /* This is the point of no return */
599 steal_locks(files);
600 put_files_struct(files);
601 release_old_signals(oldsig);
603 current->sas_ss_sp = current->sas_ss_size = 0;
605 if (current->euid == current->uid && current->egid == current->gid) {
606 current->mm->dumpable = 1;
607 current->task_dumpable = 1;
608 }
609 name = bprm->filename;
610 for (i=0; (ch = *(name++)) != '\0';) {
611 if (ch == '/')
612 i = 0;
613 else
614 if (i < 15)
615 current->comm[i++] = ch;
616 }
617 current->comm[i] = '\0';
619 flush_thread();
621 de_thread(current);
623 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
624 permission(bprm->file->f_dentry->d_inode,MAY_READ))
625 current->mm->dumpable = 0;
627 /* An exec changes our domain. We are no longer part of the thread
628 group */
630 current->self_exec_id++;
632 flush_signal_handlers(current);
633 flush_old_files(current->files);
635 return 0;
637 mmap_failed:
638 put_files_struct(current->files);
639 current->files = files;
640 flush_failed:
641 spin_lock_irq(&current->sigmask_lock);
642 if (current->sig != oldsig) {
643 kmem_cache_free(sigact_cachep, current->sig);
644 current->sig = oldsig;
645 }
646 spin_unlock_irq(&current->sigmask_lock);
647 return retval;
648 }
650 /*
651 * We mustn't allow tracing of suid binaries, unless
652 * the tracer has the capability to trace anything..
653 */
654 static inline int must_not_trace_exec(struct task_struct * p)
655 {
656 return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
657 }
659 /*
660 * Fill the binprm structure from the inode.
661 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
662 */
663 int prepare_binprm(struct linux_binprm *bprm)
664 {
665 int mode;
666 struct inode * inode = bprm->file->f_dentry->d_inode;
668 mode = inode->i_mode;
669 /*
670 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
671 * vfs_permission lets a non-executable through
672 */
673 if (!(mode & 0111)) /* with at least _one_ execute bit set */
674 return -EACCES;
675 if (bprm->file->f_op == NULL)
676 return -EACCES;
678 bprm->e_uid = current->euid;
679 bprm->e_gid = current->egid;
681 if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
682 /* Set-uid? */
683 if (mode & S_ISUID)
684 bprm->e_uid = inode->i_uid;
686 /* Set-gid? */
687 /*
688 * If setgid is set but no group execute bit then this
689 * is a candidate for mandatory locking, not a setgid
690 * executable.
691 */
692 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
693 bprm->e_gid = inode->i_gid;
694 }
696 /* We don't have VFS support for capabilities yet */
697 cap_clear(bprm->cap_inheritable);
698 cap_clear(bprm->cap_permitted);
699 cap_clear(bprm->cap_effective);
701 /* To support inheritance of root-permissions and suid-root
702 * executables under compatibility mode, we raise all three
703 * capability sets for the file.
704 *
705 * If only the real uid is 0, we only raise the inheritable
706 * and permitted sets of the executable file.
707 */
709 if (!issecure(SECURE_NOROOT)) {
710 if (bprm->e_uid == 0 || current->uid == 0) {
711 cap_set_full(bprm->cap_inheritable);
712 cap_set_full(bprm->cap_permitted);
713 }
714 if (bprm->e_uid == 0)
715 cap_set_full(bprm->cap_effective);
716 }
718 memset(bprm->buf,0,BINPRM_BUF_SIZE);
719 return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
720 }
722 /*
723 * This function is used to produce the new IDs and capabilities
724 * from the old ones and the file's capabilities.
725 *
726 * The formula used for evolving capabilities is:
727 *
728 * pI' = pI
729 * (***) pP' = (fP & X) | (fI & pI)
730 * pE' = pP' & fE [NB. fE is 0 or ~0]
731 *
732 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
733 * ' indicates post-exec(), and X is the global 'cap_bset'.
734 *
735 */
737 void compute_creds(struct linux_binprm *bprm)
738 {
739 kernel_cap_t new_permitted, working;
740 int do_unlock = 0;
742 new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
743 working = cap_intersect(bprm->cap_inheritable,
744 current->cap_inheritable);
745 new_permitted = cap_combine(new_permitted, working);
747 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
748 !cap_issubset(new_permitted, current->cap_permitted)) {
749 current->mm->dumpable = 0;
751 lock_kernel();
752 if (must_not_trace_exec(current)
753 || atomic_read(&current->fs->count) > 1
754 || atomic_read(&current->files->count) > 1
755 || atomic_read(&current->sig->count) > 1) {
756 if(!capable(CAP_SETUID)) {
757 bprm->e_uid = current->uid;
758 bprm->e_gid = current->gid;
759 }
760 if(!capable(CAP_SETPCAP)) {
761 new_permitted = cap_intersect(new_permitted,
762 current->cap_permitted);
763 }
764 }
765 do_unlock = 1;
766 }
769 /* For init, we want to retain the capabilities set
770 * in the init_task struct. Thus we skip the usual
771 * capability rules */
772 if (current->pid != 1) {
773 current->cap_permitted = new_permitted;
774 current->cap_effective =
775 cap_intersect(new_permitted, bprm->cap_effective);
776 }
778 /* AUD: Audit candidate if current->cap_effective is set */
780 current->suid = current->euid = current->fsuid = bprm->e_uid;
781 current->sgid = current->egid = current->fsgid = bprm->e_gid;
783 if(do_unlock)
784 unlock_kernel();
785 current->keep_capabilities = 0;
786 }
789 void remove_arg_zero(struct linux_binprm *bprm)
790 {
791 if (bprm->argc) {
792 unsigned long offset;
793 char * kaddr;
794 struct page *page;
796 offset = bprm->p % PAGE_SIZE;
797 goto inside;
799 while (bprm->p++, *(kaddr+offset++)) {
800 if (offset != PAGE_SIZE)
801 continue;
802 offset = 0;
803 kunmap(page);
804 inside:
805 page = bprm->page[bprm->p/PAGE_SIZE];
806 kaddr = kmap(page);
807 }
808 kunmap(page);
809 bprm->argc--;
810 }
811 }
813 /*
814 * cycle the list of binary formats handler, until one recognizes the image
815 */
816 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
817 {
818 int try,retval=0;
819 struct linux_binfmt *fmt;
820 #ifdef __alpha__
821 /* handle /sbin/loader.. */
822 {
823 struct exec * eh = (struct exec *) bprm->buf;
825 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
826 (eh->fh.f_flags & 0x3000) == 0x3000)
827 {
828 struct file * file;
829 unsigned long loader;
831 allow_write_access(bprm->file);
832 fput(bprm->file);
833 bprm->file = NULL;
835 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
837 file = open_exec("/sbin/loader");
838 retval = PTR_ERR(file);
839 if (IS_ERR(file))
840 return retval;
842 /* Remember if the application is TASO. */
843 bprm->sh_bang = eh->ah.entry < 0x100000000;
845 bprm->file = file;
846 bprm->loader = loader;
847 retval = prepare_binprm(bprm);
848 if (retval<0)
849 return retval;
850 /* should call search_binary_handler recursively here,
851 but it does not matter */
852 }
853 }
854 #endif
855 /* kernel module loader fixup */
856 /* so we don't try to load run modprobe in kernel space. */
857 set_fs(USER_DS);
858 for (try=0; try<2; try++) {
859 read_lock(&binfmt_lock);
860 for (fmt = formats ; fmt ; fmt = fmt->next) {
861 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
862 if (!fn)
863 continue;
864 if (!try_inc_mod_count(fmt->module))
865 continue;
866 read_unlock(&binfmt_lock);
867 retval = fn(bprm, regs);
868 if (retval >= 0) {
869 put_binfmt(fmt);
870 allow_write_access(bprm->file);
871 if (bprm->file)
872 fput(bprm->file);
873 bprm->file = NULL;
874 current->did_exec = 1;
875 return retval;
876 }
877 read_lock(&binfmt_lock);
878 put_binfmt(fmt);
879 if (retval != -ENOEXEC)
880 break;
881 if (!bprm->file) {
882 read_unlock(&binfmt_lock);
883 return retval;
884 }
885 }
886 read_unlock(&binfmt_lock);
887 if (retval != -ENOEXEC) {
888 break;
889 #ifdef CONFIG_KMOD
890 }else{
891 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
892 char modname[20];
893 if (printable(bprm->buf[0]) &&
894 printable(bprm->buf[1]) &&
895 printable(bprm->buf[2]) &&
896 printable(bprm->buf[3]))
897 break; /* -ENOEXEC */
898 sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
899 request_module(modname);
900 #endif
901 }
902 }
903 return retval;
904 }
907 /*
908 * sys_execve() executes a new program.
909 */
910 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
911 {
912 struct linux_binprm bprm;
913 struct file *file;
914 int retval;
915 int i;
917 file = open_exec(filename);
919 retval = PTR_ERR(file);
920 if (IS_ERR(file))
921 return retval;
923 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
924 memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
926 bprm.file = file;
927 bprm.filename = filename;
928 bprm.sh_bang = 0;
929 bprm.loader = 0;
930 bprm.exec = 0;
931 if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
932 allow_write_access(file);
933 fput(file);
934 return bprm.argc;
935 }
937 if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
938 allow_write_access(file);
939 fput(file);
940 return bprm.envc;
941 }
943 retval = prepare_binprm(&bprm);
944 if (retval < 0)
945 goto out;
947 retval = copy_strings_kernel(1, &bprm.filename, &bprm);
948 if (retval < 0)
949 goto out;
951 bprm.exec = bprm.p;
952 retval = copy_strings(bprm.envc, envp, &bprm);
953 if (retval < 0)
954 goto out;
956 retval = copy_strings(bprm.argc, argv, &bprm);
957 if (retval < 0)
958 goto out;
960 retval = search_binary_handler(&bprm,regs);
961 if (retval >= 0)
962 /* execve success */
963 return retval;
965 out:
966 /* Something went wrong, return the inode and free the argument pages*/
967 allow_write_access(bprm.file);
968 if (bprm.file)
969 fput(bprm.file);
971 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
972 struct page * page = bprm.page[i];
973 if (page)
974 __free_page(page);
975 }
977 return retval;
978 }
980 void set_binfmt(struct linux_binfmt *new)
981 {
982 struct linux_binfmt *old = current->binfmt;
983 if (new && new->module)
984 __MOD_INC_USE_COUNT(new->module);
985 current->binfmt = new;
986 if (old && old->module)
987 __MOD_DEC_USE_COUNT(old->module);
988 }
990 #define CORENAME_MAX_SIZE 64
992 /* format_corename will inspect the pattern parameter, and output a
993 * name into corename, which must have space for at least
994 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
995 */
996 void format_corename(char *corename, const char *pattern, long signr)
997 {
998 const char *pat_ptr = pattern;
999 char *out_ptr = corename;
1000 char *const out_end = corename + CORENAME_MAX_SIZE;
1001 int rc;
1002 int pid_in_pattern = 0;
1004 /* Repeat as long as we have more pattern to process and more output
1005 space */
1006 while (*pat_ptr) {
1007 if (*pat_ptr != '%') {
1008 if (out_ptr == out_end)
1009 goto out;
1010 *out_ptr++ = *pat_ptr++;
1011 } else {
1012 switch (*++pat_ptr) {
1013 case 0:
1014 goto out;
1015 /* Double percent, output one percent */
1016 case '%':
1017 if (out_ptr == out_end)
1018 goto out;
1019 *out_ptr++ = '%';
1020 break;
1021 /* pid */
1022 case 'p':
1023 pid_in_pattern = 1;
1024 rc = snprintf(out_ptr, out_end - out_ptr,
1025 "%d", current->pid);
1026 if (rc > out_end - out_ptr)
1027 goto out;
1028 out_ptr += rc;
1029 break;
1030 /* uid */
1031 case 'u':
1032 rc = snprintf(out_ptr, out_end - out_ptr,
1033 "%d", current->uid);
1034 if (rc > out_end - out_ptr)
1035 goto out;
1036 out_ptr += rc;
1037 break;
1038 /* gid */
1039 case 'g':
1040 rc = snprintf(out_ptr, out_end - out_ptr,
1041 "%d", current->gid);
1042 if (rc > out_end - out_ptr)
1043 goto out;
1044 out_ptr += rc;
1045 break;
1046 /* signal that caused the coredump */
1047 case 's':
1048 rc = snprintf(out_ptr, out_end - out_ptr,
1049 "%ld", signr);
1050 if (rc > out_end - out_ptr)
1051 goto out;
1052 out_ptr += rc;
1053 break;
1054 /* UNIX time of coredump */
1055 case 't': {
1056 struct timeval tv;
1057 do_gettimeofday(&tv);
1058 rc = snprintf(out_ptr, out_end - out_ptr,
1059 "%ld", tv.tv_sec);
1060 if (rc > out_end - out_ptr)
1061 goto out;
1062 out_ptr += rc;
1063 break;
1065 /* hostname */
1066 case 'h':
1067 down_read(&uts_sem);
1068 rc = snprintf(out_ptr, out_end - out_ptr,
1069 "%s", system_utsname.nodename);
1070 up_read(&uts_sem);
1071 if (rc > out_end - out_ptr)
1072 goto out;
1073 out_ptr += rc;
1074 break;
1075 /* executable */
1076 case 'e':
1077 rc = snprintf(out_ptr, out_end - out_ptr,
1078 "%s", current->comm);
1079 if (rc > out_end - out_ptr)
1080 goto out;
1081 out_ptr += rc;
1082 break;
1083 default:
1084 break;
1086 ++pat_ptr;
1089 /* Backward compatibility with core_uses_pid:
1091 * If core_pattern does not include a %p (as is the default)
1092 * and core_uses_pid is set, then .%pid will be appended to
1093 * the filename */
1094 if (!pid_in_pattern
1095 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1096 rc = snprintf(out_ptr, out_end - out_ptr,
1097 ".%d", current->pid);
1098 if (rc > out_end - out_ptr)
1099 goto out;
1100 out_ptr += rc;
1102 out:
1103 *out_ptr = 0;
1106 int do_coredump(long signr, struct pt_regs * regs)
1108 struct linux_binfmt * binfmt;
1109 char corename[CORENAME_MAX_SIZE + 1];
1110 struct file * file;
1111 struct inode * inode;
1112 int retval = 0;
1113 int fsuid = current->fsuid;
1115 lock_kernel();
1116 binfmt = current->binfmt;
1117 if (!binfmt || !binfmt->core_dump)
1118 goto fail;
1119 if (!is_dumpable(current))
1121 if(!core_setuid_ok || !current->task_dumpable)
1122 goto fail;
1123 current->fsuid = 0;
1125 current->mm->dumpable = 0;
1126 if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
1127 goto fail;
1129 format_corename(corename, core_pattern, signr);
1130 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
1131 if (IS_ERR(file))
1132 goto fail;
1133 inode = file->f_dentry->d_inode;
1134 if (inode->i_nlink > 1)
1135 goto close_fail; /* multiple links - don't dump */
1136 if (d_unhashed(file->f_dentry))
1137 goto close_fail;
1139 if (!S_ISREG(inode->i_mode))
1140 goto close_fail;
1141 if (!file->f_op)
1142 goto close_fail;
1143 if (!file->f_op->write)
1144 goto close_fail;
1145 if (do_truncate(file->f_dentry, 0) != 0)
1146 goto close_fail;
1148 retval = binfmt->core_dump(signr, regs, file);
1150 close_fail:
1151 filp_close(file, NULL);
1152 fail:
1153 if (fsuid != current->fsuid)
1154 current->fsuid = fsuid;
1155 unlock_kernel();
1156 return retval;