ia64/xen-unstable

view linux-2.6-xen-sparse/arch/x86_64/kernel/process-xen.c @ 13647:057c4c2991fa

32-on-64: Fix an one-off error in compat_addr_ok().
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kaf24@localhost.localdomain
date Fri Jan 26 13:36:52 2007 +0000 (2007-01-26)
parents 3adf00179a63
children 477a3bde3b61
line source
1 /*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
11 *
12 * CPU hotplug support - ashok.raj@intel.com
13 *
14 * Jun Nakajima <jun.nakajima@intel.com>
15 * Modified for Xen
16 */
18 /*
19 * This file handles the architecture-dependent parts of process handling..
20 */
22 #include <stdarg.h>
24 #include <linux/cpu.h>
25 #include <linux/errno.h>
26 #include <linux/sched.h>
27 #include <linux/kernel.h>
28 #include <linux/mm.h>
29 #include <linux/elfcore.h>
30 #include <linux/smp.h>
31 #include <linux/slab.h>
32 #include <linux/user.h>
33 #include <linux/module.h>
34 #include <linux/a.out.h>
35 #include <linux/interrupt.h>
36 #include <linux/delay.h>
37 #include <linux/ptrace.h>
38 #include <linux/utsname.h>
39 #include <linux/random.h>
40 #include <linux/notifier.h>
41 #include <linux/kprobes.h>
43 #include <asm/uaccess.h>
44 #include <asm/pgtable.h>
45 #include <asm/system.h>
46 #include <asm/io.h>
47 #include <asm/processor.h>
48 #include <asm/i387.h>
49 #include <asm/mmu_context.h>
50 #include <asm/pda.h>
51 #include <asm/prctl.h>
52 #include <asm/kdebug.h>
53 #include <xen/interface/dom0_ops.h>
54 #include <xen/interface/physdev.h>
55 #include <xen/interface/vcpu.h>
56 #include <asm/desc.h>
57 #include <asm/proto.h>
58 #include <asm/hardirq.h>
59 #include <asm/ia32.h>
60 #include <asm/idle.h>
62 #include <xen/cpu_hotplug.h>
64 asmlinkage extern void ret_from_fork(void);
66 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
68 unsigned long boot_option_idle_override = 0;
69 EXPORT_SYMBOL(boot_option_idle_override);
71 /*
72 * Powermanagement idle function, if any..
73 */
74 void (*pm_idle)(void);
75 EXPORT_SYMBOL(pm_idle);
76 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
78 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
80 void idle_notifier_register(struct notifier_block *n)
81 {
82 atomic_notifier_chain_register(&idle_notifier, n);
83 }
84 EXPORT_SYMBOL_GPL(idle_notifier_register);
86 void idle_notifier_unregister(struct notifier_block *n)
87 {
88 atomic_notifier_chain_unregister(&idle_notifier, n);
89 }
90 EXPORT_SYMBOL(idle_notifier_unregister);
92 enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
93 static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
95 void enter_idle(void)
96 {
97 __get_cpu_var(idle_state) = CPU_IDLE;
98 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
99 }
101 static void __exit_idle(void)
102 {
103 __get_cpu_var(idle_state) = CPU_NOT_IDLE;
104 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
105 }
107 /* Called from interrupts to signify idle end */
108 void exit_idle(void)
109 {
110 if (current->pid | read_pda(irqcount))
111 return;
112 __exit_idle();
113 }
115 /*
116 * On SMP it's slightly faster (but much more power-consuming!)
117 * to poll the ->need_resched flag instead of waiting for the
118 * cross-CPU IPI to arrive. Use this option with caution.
119 */
120 static void poll_idle(void)
121 {
122 local_irq_enable();
124 asm volatile(
125 "2:"
126 "testl %0,%1;"
127 "rep; nop;"
128 "je 2b;"
129 : :
130 "i" (_TIF_NEED_RESCHED),
131 "m" (current_thread_info()->flags));
132 }
134 static void xen_idle(void)
135 {
136 local_irq_disable();
138 if (need_resched())
139 local_irq_enable();
140 else {
141 current_thread_info()->status &= ~TS_POLLING;
142 smp_mb__after_clear_bit();
143 safe_halt();
144 current_thread_info()->status |= TS_POLLING;
145 }
146 }
148 #ifdef CONFIG_HOTPLUG_CPU
149 static inline void play_dead(void)
150 {
151 idle_task_exit();
152 local_irq_disable();
153 cpu_clear(smp_processor_id(), cpu_initialized);
154 preempt_enable_no_resched();
155 HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
156 cpu_bringup();
157 }
158 #else
159 static inline void play_dead(void)
160 {
161 BUG();
162 }
163 #endif /* CONFIG_HOTPLUG_CPU */
165 /*
166 * The idle thread. There's no useful work to be
167 * done, so just try to conserve power and have a
168 * low exit latency (ie sit in a loop waiting for
169 * somebody to say that they'd like to reschedule)
170 */
171 void cpu_idle (void)
172 {
173 current_thread_info()->status |= TS_POLLING;
174 /* endless idle loop with no priority at all */
175 while (1) {
176 while (!need_resched()) {
177 void (*idle)(void);
179 if (__get_cpu_var(cpu_idle_state))
180 __get_cpu_var(cpu_idle_state) = 0;
181 rmb();
182 idle = pm_idle;
183 if (!idle)
184 idle = xen_idle;
185 if (cpu_is_offline(smp_processor_id()))
186 play_dead();
187 enter_idle();
188 idle();
189 __exit_idle();
190 }
192 preempt_enable_no_resched();
193 schedule();
194 preempt_disable();
195 }
196 }
198 void cpu_idle_wait(void)
199 {
200 unsigned int cpu, this_cpu = get_cpu();
201 cpumask_t map;
203 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
204 put_cpu();
206 cpus_clear(map);
207 for_each_online_cpu(cpu) {
208 per_cpu(cpu_idle_state, cpu) = 1;
209 cpu_set(cpu, map);
210 }
212 __get_cpu_var(cpu_idle_state) = 0;
214 wmb();
215 do {
216 ssleep(1);
217 for_each_online_cpu(cpu) {
218 if (cpu_isset(cpu, map) &&
219 !per_cpu(cpu_idle_state, cpu))
220 cpu_clear(cpu, map);
221 }
222 cpus_and(map, map, cpu_online_map);
223 } while (!cpus_empty(map));
224 }
225 EXPORT_SYMBOL_GPL(cpu_idle_wait);
227 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
228 {
229 }
231 static int __init idle_setup (char *str)
232 {
233 if (!strncmp(str, "poll", 4)) {
234 printk("using polling idle threads.\n");
235 pm_idle = poll_idle;
236 }
238 boot_option_idle_override = 1;
239 return 1;
240 }
242 __setup("idle=", idle_setup);
244 /* Prints also some state that isn't saved in the pt_regs */
245 void __show_regs(struct pt_regs * regs)
246 {
247 unsigned long fs, gs, shadowgs;
248 unsigned int fsindex,gsindex;
249 unsigned int ds,cs,es;
251 printk("\n");
252 print_modules();
253 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
254 current->pid, current->comm, print_tainted(),
255 system_utsname.release,
256 (int)strcspn(system_utsname.version, " "),
257 system_utsname.version);
258 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
259 printk_address(regs->rip);
260 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
261 regs->eflags);
262 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
263 regs->rax, regs->rbx, regs->rcx);
264 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
265 regs->rdx, regs->rsi, regs->rdi);
266 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
267 regs->rbp, regs->r8, regs->r9);
268 printk("R10: %016lx R11: %016lx R12: %016lx\n",
269 regs->r10, regs->r11, regs->r12);
270 printk("R13: %016lx R14: %016lx R15: %016lx\n",
271 regs->r13, regs->r14, regs->r15);
273 asm("mov %%ds,%0" : "=r" (ds));
274 asm("mov %%cs,%0" : "=r" (cs));
275 asm("mov %%es,%0" : "=r" (es));
276 asm("mov %%fs,%0" : "=r" (fsindex));
277 asm("mov %%gs,%0" : "=r" (gsindex));
279 rdmsrl(MSR_FS_BASE, fs);
280 rdmsrl(MSR_GS_BASE, gs);
281 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
283 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
284 fs,fsindex,gs,gsindex,shadowgs);
285 printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
287 }
289 void show_regs(struct pt_regs *regs)
290 {
291 printk("CPU %d:", smp_processor_id());
292 __show_regs(regs);
293 show_trace(NULL, regs, (void *)(regs + 1));
294 }
296 /*
297 * Free current thread data structures etc..
298 */
299 void exit_thread(void)
300 {
301 struct task_struct *me = current;
302 struct thread_struct *t = &me->thread;
304 if (me->thread.io_bitmap_ptr) {
305 #ifndef CONFIG_X86_NO_TSS
306 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
307 #endif
308 #ifdef CONFIG_XEN
309 struct physdev_set_iobitmap iobmp_op = { 0 };
310 #endif
312 kfree(t->io_bitmap_ptr);
313 t->io_bitmap_ptr = NULL;
314 /*
315 * Careful, clear this in the TSS too:
316 */
317 #ifndef CONFIG_X86_NO_TSS
318 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
319 put_cpu();
320 #endif
321 #ifdef CONFIG_XEN
322 HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobmp_op);
323 #endif
324 t->io_bitmap_max = 0;
325 }
326 }
328 void load_gs_index(unsigned gs)
329 {
330 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs);
331 }
333 void flush_thread(void)
334 {
335 struct task_struct *tsk = current;
336 struct thread_info *t = current_thread_info();
338 if (t->flags & _TIF_ABI_PENDING) {
339 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
340 if (t->flags & _TIF_IA32)
341 current_thread_info()->status |= TS_COMPAT;
342 }
344 tsk->thread.debugreg0 = 0;
345 tsk->thread.debugreg1 = 0;
346 tsk->thread.debugreg2 = 0;
347 tsk->thread.debugreg3 = 0;
348 tsk->thread.debugreg6 = 0;
349 tsk->thread.debugreg7 = 0;
350 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
351 /*
352 * Forget coprocessor state..
353 */
354 clear_fpu(tsk);
355 clear_used_math();
356 }
358 void release_thread(struct task_struct *dead_task)
359 {
360 if (dead_task->mm) {
361 if (dead_task->mm->context.size) {
362 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
363 dead_task->comm,
364 dead_task->mm->context.ldt,
365 dead_task->mm->context.size);
366 BUG();
367 }
368 }
369 }
371 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
372 {
373 struct user_desc ud = {
374 .base_addr = addr,
375 .limit = 0xfffff,
376 .seg_32bit = 1,
377 .limit_in_pages = 1,
378 .useable = 1,
379 };
380 struct n_desc_struct *desc = (void *)t->thread.tls_array;
381 desc += tls;
382 desc->a = LDT_entry_a(&ud);
383 desc->b = LDT_entry_b(&ud);
384 }
386 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
387 {
388 struct desc_struct *desc = (void *)t->thread.tls_array;
389 desc += tls;
390 return desc->base0 |
391 (((u32)desc->base1) << 16) |
392 (((u32)desc->base2) << 24);
393 }
395 /*
396 * This gets called before we allocate a new thread and copy
397 * the current task into it.
398 */
399 void prepare_to_copy(struct task_struct *tsk)
400 {
401 unlazy_fpu(tsk);
402 }
404 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
405 unsigned long unused,
406 struct task_struct * p, struct pt_regs * regs)
407 {
408 int err;
409 struct pt_regs * childregs;
410 struct task_struct *me = current;
412 childregs = ((struct pt_regs *)
413 (THREAD_SIZE + task_stack_page(p))) - 1;
414 *childregs = *regs;
416 childregs->rax = 0;
417 childregs->rsp = rsp;
418 if (rsp == ~0UL)
419 childregs->rsp = (unsigned long)childregs;
421 p->thread.rsp = (unsigned long) childregs;
422 p->thread.rsp0 = (unsigned long) (childregs+1);
423 p->thread.userrsp = me->thread.userrsp;
425 set_tsk_thread_flag(p, TIF_FORK);
427 p->thread.fs = me->thread.fs;
428 p->thread.gs = me->thread.gs;
430 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
431 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
432 asm("mov %%es,%0" : "=m" (p->thread.es));
433 asm("mov %%ds,%0" : "=m" (p->thread.ds));
435 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
436 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
437 if (!p->thread.io_bitmap_ptr) {
438 p->thread.io_bitmap_max = 0;
439 return -ENOMEM;
440 }
441 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
442 IO_BITMAP_BYTES);
443 }
445 /*
446 * Set a new TLS for the child thread?
447 */
448 if (clone_flags & CLONE_SETTLS) {
449 #ifdef CONFIG_IA32_EMULATION
450 if (test_thread_flag(TIF_IA32))
451 err = ia32_child_tls(p, childregs);
452 else
453 #endif
454 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
455 if (err)
456 goto out;
457 }
458 p->thread.iopl = current->thread.iopl;
460 err = 0;
461 out:
462 if (err && p->thread.io_bitmap_ptr) {
463 kfree(p->thread.io_bitmap_ptr);
464 p->thread.io_bitmap_max = 0;
465 }
466 return err;
467 }
469 static inline void __save_init_fpu( struct task_struct *tsk )
470 {
471 asm volatile( "rex64 ; fxsave %0 ; fnclex"
472 : "=m" (tsk->thread.i387.fxsave));
473 tsk->thread_info->status &= ~TS_USEDFPU;
474 }
476 /*
477 * switch_to(x,y) should switch tasks from x to y.
478 *
479 * This could still be optimized:
480 * - fold all the options into a flag word and test it with a single test.
481 * - could test fs/gs bitsliced
482 *
483 * Kprobes not supported here. Set the probe on schedule instead.
484 */
485 __kprobes struct task_struct *
486 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
487 {
488 struct thread_struct *prev = &prev_p->thread,
489 *next = &next_p->thread;
490 int cpu = smp_processor_id();
491 #ifndef CONFIG_X86_NO_TSS
492 struct tss_struct *tss = &per_cpu(init_tss, cpu);
493 #endif
494 struct physdev_set_iopl iopl_op;
495 struct physdev_set_iobitmap iobmp_op;
496 multicall_entry_t _mcl[8], *mcl = _mcl;
498 /*
499 * This is basically '__unlazy_fpu', except that we queue a
500 * multicall to indicate FPU task switch, rather than
501 * synchronously trapping to Xen.
502 * This must be here to ensure both math_state_restore() and
503 * kernel_fpu_begin() work consistently.
504 * The AMD workaround requires it to be after DS reload, or
505 * after DS has been cleared, which we do in __prepare_arch_switch.
506 */
507 if (prev_p->thread_info->status & TS_USEDFPU) {
508 __save_init_fpu(prev_p); /* _not_ save_init_fpu() */
509 mcl->op = __HYPERVISOR_fpu_taskswitch;
510 mcl->args[0] = 1;
511 mcl++;
512 }
514 /*
515 * Reload esp0, LDT and the page table pointer:
516 */
517 mcl->op = __HYPERVISOR_stack_switch;
518 mcl->args[0] = __KERNEL_DS;
519 mcl->args[1] = next->rsp0;
520 mcl++;
522 /*
523 * Load the per-thread Thread-Local Storage descriptor.
524 * This is load_TLS(next, cpu) with multicalls.
525 */
526 #define C(i) do { \
527 if (unlikely(next->tls_array[i] != prev->tls_array[i])) { \
528 mcl->op = __HYPERVISOR_update_descriptor; \
529 mcl->args[0] = virt_to_machine( \
530 &cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]); \
531 mcl->args[1] = next->tls_array[i]; \
532 mcl++; \
533 } \
534 } while (0)
535 C(0); C(1); C(2);
536 #undef C
538 if (unlikely(prev->iopl != next->iopl)) {
539 iopl_op.iopl = (next->iopl == 0) ? 1 : next->iopl;
540 mcl->op = __HYPERVISOR_physdev_op;
541 mcl->args[0] = PHYSDEVOP_set_iopl;
542 mcl->args[1] = (unsigned long)&iopl_op;
543 mcl++;
544 }
546 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
547 iobmp_op.bitmap = (char *)next->io_bitmap_ptr;
548 iobmp_op.nr_ports = next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
549 mcl->op = __HYPERVISOR_physdev_op;
550 mcl->args[0] = PHYSDEVOP_set_iobitmap;
551 mcl->args[1] = (unsigned long)&iobmp_op;
552 mcl++;
553 }
555 (void)HYPERVISOR_multicall(_mcl, mcl - _mcl);
556 /*
557 * Switch DS and ES.
558 * This won't pick up thread selector changes, but I guess that is ok.
559 */
560 if (unlikely(next->es))
561 loadsegment(es, next->es);
563 if (unlikely(next->ds))
564 loadsegment(ds, next->ds);
566 /*
567 * Switch FS and GS.
568 */
569 if (unlikely(next->fsindex))
570 loadsegment(fs, next->fsindex);
572 if (next->fs)
573 HYPERVISOR_set_segment_base(SEGBASE_FS, next->fs);
575 if (unlikely(next->gsindex))
576 load_gs_index(next->gsindex);
578 if (next->gs)
579 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs);
581 /*
582 * Switch the PDA and FPU contexts.
583 */
584 prev->userrsp = read_pda(oldrsp);
585 write_pda(oldrsp, next->userrsp);
586 write_pda(pcurrent, next_p);
587 write_pda(kernelstack,
588 task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
590 /*
591 * Now maybe reload the debug registers
592 */
593 if (unlikely(next->debugreg7)) {
594 set_debugreg(next->debugreg0, 0);
595 set_debugreg(next->debugreg1, 1);
596 set_debugreg(next->debugreg2, 2);
597 set_debugreg(next->debugreg3, 3);
598 /* no 4 and 5 */
599 set_debugreg(next->debugreg6, 6);
600 set_debugreg(next->debugreg7, 7);
601 }
603 return prev_p;
604 }
606 /*
607 * sys_execve() executes a new program.
608 */
609 asmlinkage
610 long sys_execve(char __user *name, char __user * __user *argv,
611 char __user * __user *envp, struct pt_regs regs)
612 {
613 long error;
614 char * filename;
616 filename = getname(name);
617 error = PTR_ERR(filename);
618 if (IS_ERR(filename))
619 return error;
620 error = do_execve(filename, argv, envp, &regs);
621 if (error == 0) {
622 task_lock(current);
623 current->ptrace &= ~PT_DTRACE;
624 task_unlock(current);
625 }
626 putname(filename);
627 return error;
628 }
630 void set_personality_64bit(void)
631 {
632 /* inherit personality from parent */
634 /* Make sure to be in 64bit mode */
635 clear_thread_flag(TIF_IA32);
637 /* TBD: overwrites user setup. Should have two bits.
638 But 64bit processes have always behaved this way,
639 so it's not too bad. The main problem is just that
640 32bit childs are affected again. */
641 current->personality &= ~READ_IMPLIES_EXEC;
642 }
644 asmlinkage long sys_fork(struct pt_regs *regs)
645 {
646 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
647 }
649 asmlinkage long
650 sys_clone(unsigned long clone_flags, unsigned long newsp,
651 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
652 {
653 if (!newsp)
654 newsp = regs->rsp;
655 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
656 }
658 /*
659 * This is trivial, and on the face of it looks like it
660 * could equally well be done in user mode.
661 *
662 * Not so, for quite unobvious reasons - register pressure.
663 * In user mode vfork() cannot have a stack frame, and if
664 * done by calling the "clone()" system call directly, you
665 * do not have enough call-clobbered registers to hold all
666 * the information you need.
667 */
668 asmlinkage long sys_vfork(struct pt_regs *regs)
669 {
670 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
671 NULL, NULL);
672 }
674 unsigned long get_wchan(struct task_struct *p)
675 {
676 unsigned long stack;
677 u64 fp,rip;
678 int count = 0;
680 if (!p || p == current || p->state==TASK_RUNNING)
681 return 0;
682 stack = (unsigned long)task_stack_page(p);
683 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
684 return 0;
685 fp = *(u64 *)(p->thread.rsp);
686 do {
687 if (fp < (unsigned long)stack ||
688 fp > (unsigned long)stack+THREAD_SIZE)
689 return 0;
690 rip = *(u64 *)(fp+8);
691 if (!in_sched_functions(rip))
692 return rip;
693 fp = *(u64 *)fp;
694 } while (count++ < 16);
695 return 0;
696 }
698 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
699 {
700 int ret = 0;
701 int doit = task == current;
702 int cpu;
704 switch (code) {
705 case ARCH_SET_GS:
706 if (addr >= TASK_SIZE_OF(task))
707 return -EPERM;
708 cpu = get_cpu();
709 /* handle small bases via the GDT because that's faster to
710 switch. */
711 if (addr <= 0xffffffff) {
712 set_32bit_tls(task, GS_TLS, addr);
713 if (doit) {
714 load_TLS(&task->thread, cpu);
715 load_gs_index(GS_TLS_SEL);
716 }
717 task->thread.gsindex = GS_TLS_SEL;
718 task->thread.gs = 0;
719 } else {
720 task->thread.gsindex = 0;
721 task->thread.gs = addr;
722 if (doit) {
723 load_gs_index(0);
724 ret = HYPERVISOR_set_segment_base(
725 SEGBASE_GS_USER, addr);
726 }
727 }
728 put_cpu();
729 break;
730 case ARCH_SET_FS:
731 /* Not strictly needed for fs, but do it for symmetry
732 with gs */
733 if (addr >= TASK_SIZE_OF(task))
734 return -EPERM;
735 cpu = get_cpu();
736 /* handle small bases via the GDT because that's faster to
737 switch. */
738 if (addr <= 0xffffffff) {
739 set_32bit_tls(task, FS_TLS, addr);
740 if (doit) {
741 load_TLS(&task->thread, cpu);
742 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
743 }
744 task->thread.fsindex = FS_TLS_SEL;
745 task->thread.fs = 0;
746 } else {
747 task->thread.fsindex = 0;
748 task->thread.fs = addr;
749 if (doit) {
750 /* set the selector to 0 to not confuse
751 __switch_to */
752 asm volatile("movl %0,%%fs" :: "r" (0));
753 ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
754 addr);
755 }
756 }
757 put_cpu();
758 break;
759 case ARCH_GET_FS: {
760 unsigned long base;
761 if (task->thread.fsindex == FS_TLS_SEL)
762 base = read_32bit_tls(task, FS_TLS);
763 else if (doit)
764 rdmsrl(MSR_FS_BASE, base);
765 else
766 base = task->thread.fs;
767 ret = put_user(base, (unsigned long __user *)addr);
768 break;
769 }
770 case ARCH_GET_GS: {
771 unsigned long base;
772 unsigned gsindex;
773 if (task->thread.gsindex == GS_TLS_SEL)
774 base = read_32bit_tls(task, GS_TLS);
775 else if (doit) {
776 asm("movl %%gs,%0" : "=r" (gsindex));
777 if (gsindex)
778 rdmsrl(MSR_KERNEL_GS_BASE, base);
779 else
780 base = task->thread.gs;
781 }
782 else
783 base = task->thread.gs;
784 ret = put_user(base, (unsigned long __user *)addr);
785 break;
786 }
788 default:
789 ret = -EINVAL;
790 break;
791 }
793 return ret;
794 }
796 long sys_arch_prctl(int code, unsigned long addr)
797 {
798 return do_arch_prctl(current, code, addr);
799 }
801 /*
802 * Capture the user space registers if the task is not running (in user space)
803 */
804 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
805 {
806 struct pt_regs *pp, ptregs;
808 pp = task_pt_regs(tsk);
810 ptregs = *pp;
811 ptregs.cs &= 0xffff;
812 ptregs.ss &= 0xffff;
814 elf_core_copy_regs(regs, &ptregs);
816 boot_option_idle_override = 1;
817 return 1;
818 }
820 unsigned long arch_align_stack(unsigned long sp)
821 {
822 if (randomize_va_space)
823 sp -= get_random_int() % 8192;
824 return sp & ~0xf;
825 }
827 #ifndef CONFIG_SMP
828 void _restore_vcpu(void)
829 {
830 }
831 #endif