ia64/xen-unstable

view linux-2.4.29-xen-sparse/fs/exec.c @ 3660:f9b3b23cf5ff

bitkeeper revision 1.1159.223.66 (42048ef5frMFQFFm-sKDC6PrU8yraQ)

Name: pic-lib.patch
Description: support PIC code generation
Libraries have 2 methods of being compiled; a .so must contain non-pic
objects, while the .a can contain standard objects.
Signed-off-by: Adam Heath <doogie@brainfood.com>
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Sat Feb 05 09:16:37 2005 +0000 (2005-02-05)
parents ed0d4ce83995
children d126cac32f08
line source
1 /*
2 * linux/fs/exec.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * #!-checking implemented by tytso.
9 */
10 /*
11 * Demand-loading implemented 01.12.91 - no need to read anything but
12 * the header into memory. The inode of the executable is put into
13 * "current->executable", and page faults do the actual loading. Clean.
14 *
15 * Once more I can proudly say that linux stood up to being changed: it
16 * was less than 2 hours work to get demand-loading completely implemented.
17 *
18 * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead,
19 * current->executable is only used by the procfs. This allows a dispatch
20 * table to check for several different types of binary formats. We keep
21 * trying until we recognize the file or we run out of supported binary
22 * formats.
23 */
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/pagemap.h>
35 #include <linux/highmem.h>
36 #include <linux/spinlock.h>
37 #include <linux/personality.h>
38 #include <linux/swap.h>
39 #include <linux/utsname.h>
40 #define __NO_VERSION__
41 #include <linux/module.h>
43 #include <asm/uaccess.h>
44 #include <asm/pgalloc.h>
45 #include <asm/mmu_context.h>
47 #ifdef CONFIG_KMOD
48 #include <linux/kmod.h>
49 #endif
51 int core_uses_pid;
52 char core_pattern[65] = "core";
53 int core_setuid_ok = 0;
54 /* The maximal length of core_pattern is also specified in sysctl.c */
56 static struct linux_binfmt *formats;
57 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
59 int register_binfmt(struct linux_binfmt * fmt)
60 {
61 struct linux_binfmt ** tmp = &formats;
63 if (!fmt)
64 return -EINVAL;
65 if (fmt->next)
66 return -EBUSY;
67 write_lock(&binfmt_lock);
68 while (*tmp) {
69 if (fmt == *tmp) {
70 write_unlock(&binfmt_lock);
71 return -EBUSY;
72 }
73 tmp = &(*tmp)->next;
74 }
75 fmt->next = formats;
76 formats = fmt;
77 write_unlock(&binfmt_lock);
78 return 0;
79 }
81 int unregister_binfmt(struct linux_binfmt * fmt)
82 {
83 struct linux_binfmt ** tmp = &formats;
85 write_lock(&binfmt_lock);
86 while (*tmp) {
87 if (fmt == *tmp) {
88 *tmp = fmt->next;
89 write_unlock(&binfmt_lock);
90 return 0;
91 }
92 tmp = &(*tmp)->next;
93 }
94 write_unlock(&binfmt_lock);
95 return -EINVAL;
96 }
98 static inline void put_binfmt(struct linux_binfmt * fmt)
99 {
100 if (fmt->module)
101 __MOD_DEC_USE_COUNT(fmt->module);
102 }
104 /*
105 * Note that a shared library must be both readable and executable due to
106 * security reasons.
107 *
108 * Also note that we take the address to load from from the file itself.
109 */
110 asmlinkage long sys_uselib(const char * library)
111 {
112 struct file * file;
113 struct nameidata nd;
114 int error;
116 error = user_path_walk(library, &nd);
117 if (error)
118 goto out;
120 error = -EINVAL;
121 if (!S_ISREG(nd.dentry->d_inode->i_mode))
122 goto exit;
124 error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
125 if (error)
126 goto exit;
128 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
129 error = PTR_ERR(file);
130 if (IS_ERR(file))
131 goto out;
133 error = -ENOEXEC;
134 if(file->f_op && file->f_op->read) {
135 struct linux_binfmt * fmt;
137 read_lock(&binfmt_lock);
138 for (fmt = formats ; fmt ; fmt = fmt->next) {
139 if (!fmt->load_shlib)
140 continue;
141 if (!try_inc_mod_count(fmt->module))
142 continue;
143 read_unlock(&binfmt_lock);
144 error = fmt->load_shlib(file);
145 read_lock(&binfmt_lock);
146 put_binfmt(fmt);
147 if (error != -ENOEXEC)
148 break;
149 }
150 read_unlock(&binfmt_lock);
151 }
152 fput(file);
153 out:
154 return error;
155 exit:
156 path_release(&nd);
157 goto out;
158 }
160 /*
161 * count() counts the number of arguments/envelopes
162 */
163 static int count(char ** argv, int max)
164 {
165 int i = 0;
167 if (argv != NULL) {
168 for (;;) {
169 char * p;
171 if (get_user(p, argv))
172 return -EFAULT;
173 if (!p)
174 break;
175 argv++;
176 if(++i > max)
177 return -E2BIG;
178 }
179 }
180 return i;
181 }
183 /*
184 * 'copy_strings()' copies argument/envelope strings from user
185 * memory to free pages in kernel mem. These are in a format ready
186 * to be put directly into the top of new user memory.
187 */
188 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
189 {
190 struct page *kmapped_page = NULL;
191 char *kaddr = NULL;
192 int ret;
194 while (argc-- > 0) {
195 char *str;
196 int len;
197 unsigned long pos;
199 if (get_user(str, argv+argc) ||
200 !(len = strnlen_user(str, bprm->p))) {
201 ret = -EFAULT;
202 goto out;
203 }
205 if (bprm->p < len) {
206 ret = -E2BIG;
207 goto out;
208 }
210 bprm->p -= len;
211 /* XXX: add architecture specific overflow check here. */
212 pos = bprm->p;
214 while (len > 0) {
215 int i, new, err;
216 int offset, bytes_to_copy;
217 struct page *page;
219 offset = pos % PAGE_SIZE;
220 i = pos/PAGE_SIZE;
221 page = bprm->page[i];
222 new = 0;
223 if (!page) {
224 page = alloc_page(GFP_HIGHUSER);
225 bprm->page[i] = page;
226 if (!page) {
227 ret = -ENOMEM;
228 goto out;
229 }
230 new = 1;
231 }
233 if (page != kmapped_page) {
234 if (kmapped_page)
235 kunmap(kmapped_page);
236 kmapped_page = page;
237 kaddr = kmap(kmapped_page);
238 }
239 if (new && offset)
240 memset(kaddr, 0, offset);
241 bytes_to_copy = PAGE_SIZE - offset;
242 if (bytes_to_copy > len) {
243 bytes_to_copy = len;
244 if (new)
245 memset(kaddr+offset+len, 0,
246 PAGE_SIZE-offset-len);
247 }
248 err = copy_from_user(kaddr+offset, str, bytes_to_copy);
249 if (err) {
250 ret = -EFAULT;
251 goto out;
252 }
254 pos += bytes_to_copy;
255 str += bytes_to_copy;
256 len -= bytes_to_copy;
257 }
258 }
259 ret = 0;
260 out:
261 if (kmapped_page)
262 kunmap(kmapped_page);
263 return ret;
264 }
266 /*
267 * Like copy_strings, but get argv and its values from kernel memory.
268 */
269 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
270 {
271 int r;
272 mm_segment_t oldfs = get_fs();
273 set_fs(KERNEL_DS);
274 r = copy_strings(argc, argv, bprm);
275 set_fs(oldfs);
276 return r;
277 }
279 /*
280 * This routine is used to map in a page into an address space: needed by
281 * execve() for the initial stack and environment pages.
282 *
283 * tsk->mmap_sem is held for writing.
284 */
285 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
286 {
287 pgd_t * pgd;
288 pmd_t * pmd;
289 pte_t * pte;
290 struct vm_area_struct *vma;
291 pgprot_t prot = PAGE_COPY;
293 if (page_count(page) != 1)
294 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
295 pgd = pgd_offset(tsk->mm, address);
297 spin_lock(&tsk->mm->page_table_lock);
298 pmd = pmd_alloc(tsk->mm, pgd, address);
299 if (!pmd)
300 goto out;
301 pte = pte_alloc(tsk->mm, pmd, address);
302 if (!pte)
303 goto out;
304 if (!pte_none(*pte))
305 goto out;
306 lru_cache_add(page);
307 flush_dcache_page(page);
308 flush_page_to_ram(page);
309 /* lookup is cheap because there is only a single entry in the list */
310 vma = find_vma(tsk->mm, address);
311 if (vma)
312 prot = vma->vm_page_prot;
313 set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
314 XEN_flush_page_update_queue();
315 tsk->mm->rss++;
316 spin_unlock(&tsk->mm->page_table_lock);
318 /* no need for flush_tlb */
319 return;
320 out:
321 spin_unlock(&tsk->mm->page_table_lock);
322 __free_page(page);
323 force_sig(SIGKILL, tsk);
324 return;
325 }
327 int setup_arg_pages(struct linux_binprm *bprm)
328 {
329 unsigned long stack_base;
330 struct vm_area_struct *mpnt;
331 int i, ret;
333 stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
335 bprm->p += stack_base;
336 if (bprm->loader)
337 bprm->loader += stack_base;
338 bprm->exec += stack_base;
340 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
341 if (!mpnt)
342 return -ENOMEM;
344 down_write(&current->mm->mmap_sem);
345 {
346 mpnt->vm_mm = current->mm;
347 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
348 mpnt->vm_end = STACK_TOP;
349 mpnt->vm_flags = VM_STACK_FLAGS;
350 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
351 mpnt->vm_ops = NULL;
352 mpnt->vm_pgoff = 0;
353 mpnt->vm_file = NULL;
354 mpnt->vm_private_data = (void *) 0;
355 if ((ret = insert_vm_struct(current->mm, mpnt))) {
356 up_write(&current->mm->mmap_sem);
357 kmem_cache_free(vm_area_cachep, mpnt);
358 return ret;
359 }
360 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
361 }
363 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
364 struct page *page = bprm->page[i];
365 if (page) {
366 bprm->page[i] = NULL;
367 put_dirty_page(current,page,stack_base);
368 }
369 stack_base += PAGE_SIZE;
370 }
371 up_write(&current->mm->mmap_sem);
373 return 0;
374 }
376 struct file *open_exec(const char *name)
377 {
378 struct nameidata nd;
379 struct inode *inode;
380 struct file *file;
381 int err = 0;
383 err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
384 file = ERR_PTR(err);
385 if (!err) {
386 inode = nd.dentry->d_inode;
387 file = ERR_PTR(-EACCES);
388 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
389 S_ISREG(inode->i_mode)) {
390 int err = permission(inode, MAY_EXEC);
391 if (!err && !(inode->i_mode & 0111))
392 err = -EACCES;
393 file = ERR_PTR(err);
394 if (!err) {
395 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
396 if (!IS_ERR(file)) {
397 err = deny_write_access(file);
398 if (err) {
399 fput(file);
400 file = ERR_PTR(err);
401 }
402 }
403 out:
404 return file;
405 }
406 }
407 path_release(&nd);
408 }
409 goto out;
410 }
412 int kernel_read(struct file *file, unsigned long offset,
413 char * addr, unsigned long count)
414 {
415 mm_segment_t old_fs;
416 loff_t pos = offset;
417 int result = -ENOSYS;
419 if (!file->f_op->read)
420 goto fail;
421 old_fs = get_fs();
422 set_fs(get_ds());
423 result = file->f_op->read(file, addr, count, &pos);
424 set_fs(old_fs);
425 fail:
426 return result;
427 }
429 static int exec_mmap(void)
430 {
431 struct mm_struct * mm, * old_mm;
433 old_mm = current->mm;
435 if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
436 mm_release();
437 down_write(&old_mm->mmap_sem);
438 exit_mmap(old_mm);
439 up_write(&old_mm->mmap_sem);
440 return 0;
441 }
444 mm = mm_alloc();
445 if (mm) {
446 struct mm_struct *active_mm;
448 if (init_new_context(current, mm)) {
449 mmdrop(mm);
450 return -ENOMEM;
451 }
453 /* Add it to the list of mm's */
454 spin_lock(&mmlist_lock);
455 list_add(&mm->mmlist, &init_mm.mmlist);
456 mmlist_nr++;
457 spin_unlock(&mmlist_lock);
459 task_lock(current);
460 active_mm = current->active_mm;
461 current->mm = mm;
462 current->active_mm = mm;
463 task_unlock(current);
464 activate_mm(active_mm, mm);
465 mm_release();
466 if (old_mm) {
467 if (active_mm != old_mm) BUG();
468 mmput(old_mm);
469 return 0;
470 }
471 mmdrop(active_mm);
472 return 0;
473 }
474 return -ENOMEM;
475 }
477 /*
478 * This function makes sure the current process has its own signal table,
479 * so that flush_signal_handlers can later reset the handlers without
480 * disturbing other processes. (Other processes might share the signal
481 * table via the CLONE_SIGNAL option to clone().)
482 */
484 static inline int make_private_signals(void)
485 {
486 struct signal_struct * newsig;
488 if (atomic_read(&current->sig->count) <= 1)
489 return 0;
490 newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
491 if (newsig == NULL)
492 return -ENOMEM;
493 spin_lock_init(&newsig->siglock);
494 atomic_set(&newsig->count, 1);
495 memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
496 spin_lock_irq(&current->sigmask_lock);
497 current->sig = newsig;
498 spin_unlock_irq(&current->sigmask_lock);
499 return 0;
500 }
502 /*
503 * If make_private_signals() made a copy of the signal table, decrement the
504 * refcount of the original table, and free it if necessary.
505 * We don't do that in make_private_signals() so that we can back off
506 * in flush_old_exec() if an error occurs after calling make_private_signals().
507 */
509 static inline void release_old_signals(struct signal_struct * oldsig)
510 {
511 if (current->sig == oldsig)
512 return;
513 if (atomic_dec_and_test(&oldsig->count))
514 kmem_cache_free(sigact_cachep, oldsig);
515 }
517 /*
518 * These functions flushes out all traces of the currently running executable
519 * so that a new one can be started
520 */
522 static inline void flush_old_files(struct files_struct * files)
523 {
524 long j = -1;
526 write_lock(&files->file_lock);
527 for (;;) {
528 unsigned long set, i;
530 j++;
531 i = j * __NFDBITS;
532 if (i >= files->max_fds || i >= files->max_fdset)
533 break;
534 set = files->close_on_exec->fds_bits[j];
535 if (!set)
536 continue;
537 files->close_on_exec->fds_bits[j] = 0;
538 write_unlock(&files->file_lock);
539 for ( ; set ; i++,set >>= 1) {
540 if (set & 1) {
541 sys_close(i);
542 }
543 }
544 write_lock(&files->file_lock);
546 }
547 write_unlock(&files->file_lock);
548 }
550 /*
551 * An execve() will automatically "de-thread" the process.
552 * Note: we don't have to hold the tasklist_lock to test
553 * whether we migth need to do this. If we're not part of
554 * a thread group, there is no way we can become one
555 * dynamically. And if we are, we only need to protect the
556 * unlink - even if we race with the last other thread exit,
557 * at worst the list_del_init() might end up being a no-op.
558 */
559 static inline void de_thread(struct task_struct *tsk)
560 {
561 if (!list_empty(&tsk->thread_group)) {
562 write_lock_irq(&tasklist_lock);
563 list_del_init(&tsk->thread_group);
564 write_unlock_irq(&tasklist_lock);
565 }
567 /* Minor oddity: this might stay the same. */
568 tsk->tgid = tsk->pid;
569 }
571 void get_task_comm(char *buf, struct task_struct *tsk)
572 {
573 /* buf must be at least sizeof(tsk->comm) in size */
574 task_lock(tsk);
575 memcpy(buf, tsk->comm, sizeof(tsk->comm));
576 task_unlock(tsk);
577 }
579 void set_task_comm(struct task_struct *tsk, char *buf)
580 {
581 task_lock(tsk);
582 strncpy(tsk->comm, buf, sizeof(tsk->comm));
583 tsk->comm[sizeof(tsk->comm)-1]='\0';
584 task_unlock(tsk);
585 }
587 int flush_old_exec(struct linux_binprm * bprm)
588 {
589 char * name;
590 int i, ch, retval;
591 struct signal_struct * oldsig;
592 struct files_struct * files;
593 char tcomm[sizeof(current->comm)];
595 /*
596 * Make sure we have a private signal table
597 */
598 oldsig = current->sig;
599 retval = make_private_signals();
600 if (retval) goto flush_failed;
602 /*
603 * Make sure we have private file handles. Ask the
604 * fork helper to do the work for us and the exit
605 * helper to do the cleanup of the old one.
606 */
608 files = current->files; /* refcounted so safe to hold */
609 retval = unshare_files();
610 if(retval)
611 goto flush_failed;
613 /*
614 * Release all of the old mmap stuff
615 */
616 retval = exec_mmap();
617 if (retval) goto mmap_failed;
619 /* This is the point of no return */
620 steal_locks(files);
621 put_files_struct(files);
622 release_old_signals(oldsig);
624 current->sas_ss_sp = current->sas_ss_size = 0;
626 if (current->euid == current->uid && current->egid == current->gid) {
627 current->mm->dumpable = 1;
628 current->task_dumpable = 1;
629 }
630 name = bprm->filename;
631 for (i=0; (ch = *(name++)) != '\0';) {
632 if (ch == '/')
633 i = 0;
634 else
635 if (i < (sizeof(tcomm) - 1))
636 tcomm[i++] = ch;
637 }
638 tcomm[i] = '\0';
639 set_task_comm(current, tcomm);
641 flush_thread();
643 de_thread(current);
645 if (bprm->e_uid != current->euid || bprm->e_gid != current->egid ||
646 permission(bprm->file->f_dentry->d_inode,MAY_READ))
647 current->mm->dumpable = 0;
649 /* An exec changes our domain. We are no longer part of the thread
650 group */
652 current->self_exec_id++;
654 flush_signal_handlers(current);
655 flush_old_files(current->files);
657 return 0;
659 mmap_failed:
660 put_files_struct(current->files);
661 current->files = files;
662 flush_failed:
663 spin_lock_irq(&current->sigmask_lock);
664 if (current->sig != oldsig) {
665 kmem_cache_free(sigact_cachep, current->sig);
666 current->sig = oldsig;
667 }
668 spin_unlock_irq(&current->sigmask_lock);
669 return retval;
670 }
672 /*
673 * We mustn't allow tracing of suid binaries, unless
674 * the tracer has the capability to trace anything..
675 */
676 static inline int must_not_trace_exec(struct task_struct * p)
677 {
678 return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
679 }
681 /*
682 * Fill the binprm structure from the inode.
683 * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
684 */
685 int prepare_binprm(struct linux_binprm *bprm)
686 {
687 int mode;
688 struct inode * inode = bprm->file->f_dentry->d_inode;
690 mode = inode->i_mode;
691 /*
692 * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
693 * vfs_permission lets a non-executable through
694 */
695 if (!(mode & 0111)) /* with at least _one_ execute bit set */
696 return -EACCES;
697 if (bprm->file->f_op == NULL)
698 return -EACCES;
700 bprm->e_uid = current->euid;
701 bprm->e_gid = current->egid;
703 if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
704 /* Set-uid? */
705 if (mode & S_ISUID)
706 bprm->e_uid = inode->i_uid;
708 /* Set-gid? */
709 /*
710 * If setgid is set but no group execute bit then this
711 * is a candidate for mandatory locking, not a setgid
712 * executable.
713 */
714 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
715 bprm->e_gid = inode->i_gid;
716 }
718 /* We don't have VFS support for capabilities yet */
719 cap_clear(bprm->cap_inheritable);
720 cap_clear(bprm->cap_permitted);
721 cap_clear(bprm->cap_effective);
723 /* To support inheritance of root-permissions and suid-root
724 * executables under compatibility mode, we raise all three
725 * capability sets for the file.
726 *
727 * If only the real uid is 0, we only raise the inheritable
728 * and permitted sets of the executable file.
729 */
731 if (!issecure(SECURE_NOROOT)) {
732 if (bprm->e_uid == 0 || current->uid == 0) {
733 cap_set_full(bprm->cap_inheritable);
734 cap_set_full(bprm->cap_permitted);
735 }
736 if (bprm->e_uid == 0)
737 cap_set_full(bprm->cap_effective);
738 }
740 memset(bprm->buf,0,BINPRM_BUF_SIZE);
741 return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
742 }
744 /*
745 * This function is used to produce the new IDs and capabilities
746 * from the old ones and the file's capabilities.
747 *
748 * The formula used for evolving capabilities is:
749 *
750 * pI' = pI
751 * (***) pP' = (fP & X) | (fI & pI)
752 * pE' = pP' & fE [NB. fE is 0 or ~0]
753 *
754 * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
755 * ' indicates post-exec(), and X is the global 'cap_bset'.
756 *
757 */
759 void compute_creds(struct linux_binprm *bprm)
760 {
761 kernel_cap_t new_permitted, working;
762 int do_unlock = 0;
764 new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
765 working = cap_intersect(bprm->cap_inheritable,
766 current->cap_inheritable);
767 new_permitted = cap_combine(new_permitted, working);
769 if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
770 !cap_issubset(new_permitted, current->cap_permitted)) {
771 current->mm->dumpable = 0;
773 lock_kernel();
774 if (must_not_trace_exec(current)
775 || atomic_read(&current->fs->count) > 1
776 || atomic_read(&current->files->count) > 1
777 || atomic_read(&current->sig->count) > 1) {
778 if(!capable(CAP_SETUID)) {
779 bprm->e_uid = current->uid;
780 bprm->e_gid = current->gid;
781 }
782 if(!capable(CAP_SETPCAP)) {
783 new_permitted = cap_intersect(new_permitted,
784 current->cap_permitted);
785 }
786 }
787 do_unlock = 1;
788 }
791 /* For init, we want to retain the capabilities set
792 * in the init_task struct. Thus we skip the usual
793 * capability rules */
794 if (current->pid != 1) {
795 current->cap_permitted = new_permitted;
796 current->cap_effective =
797 cap_intersect(new_permitted, bprm->cap_effective);
798 }
800 /* AUD: Audit candidate if current->cap_effective is set */
802 current->suid = current->euid = current->fsuid = bprm->e_uid;
803 current->sgid = current->egid = current->fsgid = bprm->e_gid;
805 if(do_unlock)
806 unlock_kernel();
807 current->keep_capabilities = 0;
808 }
811 void remove_arg_zero(struct linux_binprm *bprm)
812 {
813 if (bprm->argc) {
814 unsigned long offset;
815 char * kaddr;
816 struct page *page;
818 offset = bprm->p % PAGE_SIZE;
819 goto inside;
821 while (bprm->p++, *(kaddr+offset++)) {
822 if (offset != PAGE_SIZE)
823 continue;
824 offset = 0;
825 kunmap(page);
826 inside:
827 page = bprm->page[bprm->p/PAGE_SIZE];
828 kaddr = kmap(page);
829 }
830 kunmap(page);
831 bprm->argc--;
832 }
833 }
835 /*
836 * cycle the list of binary formats handler, until one recognizes the image
837 */
838 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
839 {
840 int try,retval=0;
841 struct linux_binfmt *fmt;
842 #ifdef __alpha__
843 /* handle /sbin/loader.. */
844 {
845 struct exec * eh = (struct exec *) bprm->buf;
847 if (!bprm->loader && eh->fh.f_magic == 0x183 &&
848 (eh->fh.f_flags & 0x3000) == 0x3000)
849 {
850 struct file * file;
851 unsigned long loader;
853 allow_write_access(bprm->file);
854 fput(bprm->file);
855 bprm->file = NULL;
857 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
859 file = open_exec("/sbin/loader");
860 retval = PTR_ERR(file);
861 if (IS_ERR(file))
862 return retval;
864 /* Remember if the application is TASO. */
865 bprm->sh_bang = eh->ah.entry < 0x100000000;
867 bprm->file = file;
868 bprm->loader = loader;
869 retval = prepare_binprm(bprm);
870 if (retval<0)
871 return retval;
872 /* should call search_binary_handler recursively here,
873 but it does not matter */
874 }
875 }
876 #endif
877 /* kernel module loader fixup */
878 /* so we don't try to load run modprobe in kernel space. */
879 set_fs(USER_DS);
880 for (try=0; try<2; try++) {
881 read_lock(&binfmt_lock);
882 for (fmt = formats ; fmt ; fmt = fmt->next) {
883 int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
884 if (!fn)
885 continue;
886 if (!try_inc_mod_count(fmt->module))
887 continue;
888 read_unlock(&binfmt_lock);
889 retval = fn(bprm, regs);
890 if (retval >= 0) {
891 put_binfmt(fmt);
892 allow_write_access(bprm->file);
893 if (bprm->file)
894 fput(bprm->file);
895 bprm->file = NULL;
896 current->did_exec = 1;
897 return retval;
898 }
899 read_lock(&binfmt_lock);
900 put_binfmt(fmt);
901 if (retval != -ENOEXEC)
902 break;
903 if (!bprm->file) {
904 read_unlock(&binfmt_lock);
905 return retval;
906 }
907 }
908 read_unlock(&binfmt_lock);
909 if (retval != -ENOEXEC) {
910 break;
911 #ifdef CONFIG_KMOD
912 }else{
913 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
914 char modname[20];
915 if (printable(bprm->buf[0]) &&
916 printable(bprm->buf[1]) &&
917 printable(bprm->buf[2]) &&
918 printable(bprm->buf[3]))
919 break; /* -ENOEXEC */
920 sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
921 request_module(modname);
922 #endif
923 }
924 }
925 return retval;
926 }
929 /*
930 * sys_execve() executes a new program.
931 */
932 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
933 {
934 struct linux_binprm bprm;
935 struct file *file;
936 int retval;
937 int i;
939 file = open_exec(filename);
941 retval = PTR_ERR(file);
942 if (IS_ERR(file))
943 return retval;
945 bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
946 memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0]));
948 bprm.file = file;
949 bprm.filename = filename;
950 bprm.sh_bang = 0;
951 bprm.loader = 0;
952 bprm.exec = 0;
953 if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
954 allow_write_access(file);
955 fput(file);
956 return bprm.argc;
957 }
959 if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
960 allow_write_access(file);
961 fput(file);
962 return bprm.envc;
963 }
965 retval = prepare_binprm(&bprm);
966 if (retval < 0)
967 goto out;
969 retval = copy_strings_kernel(1, &bprm.filename, &bprm);
970 if (retval < 0)
971 goto out;
973 bprm.exec = bprm.p;
974 retval = copy_strings(bprm.envc, envp, &bprm);
975 if (retval < 0)
976 goto out;
978 retval = copy_strings(bprm.argc, argv, &bprm);
979 if (retval < 0)
980 goto out;
982 retval = search_binary_handler(&bprm,regs);
983 if (retval >= 0)
984 /* execve success */
985 return retval;
987 out:
988 /* Something went wrong, return the inode and free the argument pages*/
989 allow_write_access(bprm.file);
990 if (bprm.file)
991 fput(bprm.file);
993 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
994 struct page * page = bprm.page[i];
995 if (page)
996 __free_page(page);
997 }
999 return retval;
1002 void set_binfmt(struct linux_binfmt *new)
1004 struct linux_binfmt *old = current->binfmt;
1005 if (new && new->module)
1006 __MOD_INC_USE_COUNT(new->module);
1007 current->binfmt = new;
1008 if (old && old->module)
1009 __MOD_DEC_USE_COUNT(old->module);
1012 #define CORENAME_MAX_SIZE 64
1014 /* format_corename will inspect the pattern parameter, and output a
1015 * name into corename, which must have space for at least
1016 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1017 */
1018 void format_corename(char *corename, const char *pattern, long signr)
1020 const char *pat_ptr = pattern;
1021 char *out_ptr = corename;
1022 char *const out_end = corename + CORENAME_MAX_SIZE;
1023 int rc;
1024 int pid_in_pattern = 0;
1026 /* Repeat as long as we have more pattern to process and more output
1027 space */
1028 while (*pat_ptr) {
1029 if (*pat_ptr != '%') {
1030 if (out_ptr == out_end)
1031 goto out;
1032 *out_ptr++ = *pat_ptr++;
1033 } else {
1034 switch (*++pat_ptr) {
1035 case 0:
1036 goto out;
1037 /* Double percent, output one percent */
1038 case '%':
1039 if (out_ptr == out_end)
1040 goto out;
1041 *out_ptr++ = '%';
1042 break;
1043 /* pid */
1044 case 'p':
1045 pid_in_pattern = 1;
1046 rc = snprintf(out_ptr, out_end - out_ptr,
1047 "%d", current->pid);
1048 if (rc > out_end - out_ptr)
1049 goto out;
1050 out_ptr += rc;
1051 break;
1052 /* uid */
1053 case 'u':
1054 rc = snprintf(out_ptr, out_end - out_ptr,
1055 "%d", current->uid);
1056 if (rc > out_end - out_ptr)
1057 goto out;
1058 out_ptr += rc;
1059 break;
1060 /* gid */
1061 case 'g':
1062 rc = snprintf(out_ptr, out_end - out_ptr,
1063 "%d", current->gid);
1064 if (rc > out_end - out_ptr)
1065 goto out;
1066 out_ptr += rc;
1067 break;
1068 /* signal that caused the coredump */
1069 case 's':
1070 rc = snprintf(out_ptr, out_end - out_ptr,
1071 "%ld", signr);
1072 if (rc > out_end - out_ptr)
1073 goto out;
1074 out_ptr += rc;
1075 break;
1076 /* UNIX time of coredump */
1077 case 't': {
1078 struct timeval tv;
1079 do_gettimeofday(&tv);
1080 rc = snprintf(out_ptr, out_end - out_ptr,
1081 "%ld", tv.tv_sec);
1082 if (rc > out_end - out_ptr)
1083 goto out;
1084 out_ptr += rc;
1085 break;
1087 /* hostname */
1088 case 'h':
1089 down_read(&uts_sem);
1090 rc = snprintf(out_ptr, out_end - out_ptr,
1091 "%s", system_utsname.nodename);
1092 up_read(&uts_sem);
1093 if (rc > out_end - out_ptr)
1094 goto out;
1095 out_ptr += rc;
1096 break;
1097 /* executable */
1098 case 'e':
1099 rc = snprintf(out_ptr, out_end - out_ptr,
1100 "%s", current->comm);
1101 if (rc > out_end - out_ptr)
1102 goto out;
1103 out_ptr += rc;
1104 break;
1105 default:
1106 break;
1108 ++pat_ptr;
1111 /* Backward compatibility with core_uses_pid:
1113 * If core_pattern does not include a %p (as is the default)
1114 * and core_uses_pid is set, then .%pid will be appended to
1115 * the filename */
1116 if (!pid_in_pattern
1117 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1118 rc = snprintf(out_ptr, out_end - out_ptr,
1119 ".%d", current->pid);
1120 if (rc > out_end - out_ptr)
1121 goto out;
1122 out_ptr += rc;
1124 out:
1125 *out_ptr = 0;
1128 int do_coredump(long signr, struct pt_regs * regs)
1130 struct linux_binfmt * binfmt;
1131 char corename[CORENAME_MAX_SIZE + 1];
1132 struct file * file;
1133 struct inode * inode;
1134 int retval = 0;
1135 int fsuid = current->fsuid;
1137 lock_kernel();
1138 binfmt = current->binfmt;
1139 if (!binfmt || !binfmt->core_dump)
1140 goto fail;
1141 if (!is_dumpable(current))
1143 if(!core_setuid_ok || !current->task_dumpable)
1144 goto fail;
1145 current->fsuid = 0;
1147 current->mm->dumpable = 0;
1148 if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
1149 goto fail;
1151 format_corename(corename, core_pattern, signr);
1152 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
1153 if (IS_ERR(file))
1154 goto fail;
1155 inode = file->f_dentry->d_inode;
1156 if (inode->i_nlink > 1)
1157 goto close_fail; /* multiple links - don't dump */
1158 if (d_unhashed(file->f_dentry))
1159 goto close_fail;
1161 if (!S_ISREG(inode->i_mode))
1162 goto close_fail;
1163 if (!file->f_op)
1164 goto close_fail;
1165 if (!file->f_op->write)
1166 goto close_fail;
1167 if (do_truncate(file->f_dentry, 0) != 0)
1168 goto close_fail;
1170 retval = binfmt->core_dump(signr, regs, file);
1172 close_fail:
1173 filp_close(file, NULL);
1174 fail:
1175 if (fsuid != current->fsuid)
1176 current->fsuid = fsuid;
1177 unlock_kernel();
1178 return retval;