ia64/xen-unstable

changeset 6291:87dec3b9c546

merge?
author cl349@firebug.cl.cam.ac.uk
date Fri Aug 19 15:22:05 2005 +0000 (2005-08-19)
parents 6c8c3df37bfe 3c1cd2486b7f
children 47d49e8b8042
files linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/swiotlb.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6-xen-sparse/arch/xen/i386/mm/init.c linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6-xen-sparse/drivers/xen/blkback/Makefile linux-2.6-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6-xen-sparse/drivers/xen/blkback/common.h linux-2.6-xen-sparse/drivers/xen/blkback/interface.c linux-2.6-xen-sparse/drivers/xen/blkback/vbd.c linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6-xen-sparse/drivers/xen/blkfront/block.h linux-2.6-xen-sparse/drivers/xen/blkfront/vbd.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h linux-2.6-xen-sparse/include/asm-xen/hypervisor.h linux-2.6-xen-sparse/include/asm-xen/xenbus.h tools/examples/network-bridge tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/server/event.py tools/python/xen/xend/server/relocate.py tools/python/xen/xm/create.py tools/python/xen/xm/main.py xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/setup.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/traps.c xen/common/domain.c xen/common/event_channel.c xen/common/schedule.c xen/drivers/char/console.c xen/include/asm-x86/e820.h xen/include/asm-x86/uaccess.h xen/include/public/io/blkif.h xen/include/public/xen.h
line diff
     7.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Fri Aug 19 15:21:12 2005 +0000
     7.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Fri Aug 19 15:22:05 2005 +0000
     7.3 @@ -115,20 +115,12 @@ void xen_idle(void)
     7.4  /* We don't actually take CPU down, just spin without interrupts. */
     7.5  static inline void play_dead(void)
     7.6  {
     7.7 -	/* Ack it */
     7.8 -	__get_cpu_var(cpu_state) = CPU_DEAD;
     7.9 -
    7.10 -	/* We shouldn't have to disable interrupts while dead, but
    7.11 -	 * some interrupts just don't seem to go away, and this makes
    7.12 -	 * it "work" for testing purposes. */
    7.13  	/* Death loop */
    7.14  	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
    7.15  		HYPERVISOR_yield();
    7.16  
    7.17 -	local_irq_disable();
    7.18  	__flush_tlb_all();
    7.19  	cpu_set(smp_processor_id(), cpu_online_map);
    7.20 -	local_irq_enable();
    7.21  }
    7.22  #else
    7.23  static inline void play_dead(void)
    7.24 @@ -156,12 +148,19 @@ void cpu_idle (void)
    7.25  			rmb();
    7.26  
    7.27  			if (cpu_is_offline(cpu)) {
    7.28 +				local_irq_disable();
    7.29 +				/* Ack it.  From this point on until
    7.30 +				   we get woken up, we're not allowed
    7.31 +				   to take any locks.  In particular,
    7.32 +				   don't printk. */
    7.33 +				__get_cpu_var(cpu_state) = CPU_DEAD;
    7.34  #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
    7.35  				/* Tell hypervisor to take vcpu down. */
    7.36  				HYPERVISOR_vcpu_down(cpu);
    7.37  #endif
    7.38  				play_dead();
    7.39 -         }
    7.40 +				local_irq_enable();
    7.41 +			}
    7.42  
    7.43  			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
    7.44  			xen_idle();
    7.45 @@ -791,3 +790,10 @@ unsigned long arch_align_stack(unsigned 
    7.46  		sp -= get_random_int() % 8192;
    7.47  	return sp & ~0xf;
    7.48  }
    7.49 +
    7.50 +
    7.51 +#ifndef CONFIG_X86_SMP
    7.52 +void _restore_vcpu(void)
    7.53 +{
    7.54 +}
    7.55 +#endif
     9.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Fri Aug 19 15:21:12 2005 +0000
     9.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Fri Aug 19 15:22:05 2005 +0000
     9.3 @@ -1616,3 +1616,21 @@ void smp_resume(void)
     9.4  	smp_intr_init();
     9.5  	local_setup_timer_irq();
     9.6  }
     9.7 +
     9.8 +DECLARE_PER_CPU(int, timer_irq);
     9.9 +
    9.10 +void _restore_vcpu(void)
    9.11 +{
    9.12 +	int cpu = smp_processor_id();
    9.13 +	extern atomic_t vcpus_rebooting;
    9.14 +
    9.15 +	/* We are the first thing the vcpu runs when it comes back,
    9.16 +	   and we are supposed to restore the IPIs and timer
    9.17 +	   interrupts etc.  When we return, the vcpu's idle loop will
    9.18 +	   start up again. */
    9.19 +	_bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
    9.20 +	_bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
    9.21 +	_bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
    9.22 +	_bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) );
    9.23 +	atomic_dec(&vcpus_rebooting);
    9.24 +}
    11.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Fri Aug 19 15:21:12 2005 +0000
    11.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Fri Aug 19 15:22:05 2005 +0000
    11.3 @@ -745,7 +745,7 @@ static void __init hpet_time_init(void)
    11.4  #endif
    11.5  
    11.6  /* Dynamically-mapped IRQ. */
    11.7 -static DEFINE_PER_CPU(int, timer_irq);
    11.8 +DEFINE_PER_CPU(int, timer_irq);
    11.9  
   11.10  static struct irqaction irq_timer = {
   11.11  	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
    16.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c	Fri Aug 19 15:21:12 2005 +0000
    16.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c	Fri Aug 19 15:22:05 2005 +0000
    16.3 @@ -144,7 +144,7 @@ asmlinkage void evtchn_do_upcall(struct 
    16.4      vcpu_info_t   *vcpu_info = &s->vcpu_data[cpu];
    16.5  
    16.6      vcpu_info->evtchn_upcall_pending = 0;
    16.7 -    
    16.8 +
    16.9      /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
   16.10      l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
   16.11      while ( l1 != 0 )
   16.12 @@ -158,9 +158,9 @@ asmlinkage void evtchn_do_upcall(struct 
   16.13              l2 &= ~(1 << l2i);
   16.14              
   16.15              port = (l1i << 5) + l2i;
   16.16 -            if ( (irq = evtchn_to_irq[port]) != -1 )
   16.17 +            if ( (irq = evtchn_to_irq[port]) != -1 ) {
   16.18                  do_IRQ(irq, regs);
   16.19 -            else
   16.20 +	    } else
   16.21                  evtchn_device_upcall(port);
   16.22          }
   16.23      }
   16.24 @@ -245,6 +245,74 @@ void unbind_virq_from_irq(int virq)
   16.25      spin_unlock(&irq_mapping_update_lock);
   16.26  }
   16.27  
   16.28 +/* This is only used when a vcpu from an xm save.  The ipi is expected
   16.29 +   to have been bound before we suspended, and so all of the xenolinux
   16.30 +   state is set up; we only need to restore the Xen side of things.
   16.31 +   The irq number has to be the same, but the evtchn number can
   16.32 +   change. */
   16.33 +void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
   16.34 +{
   16.35 +    evtchn_op_t op;
   16.36 +    int evtchn;
   16.37 +
   16.38 +    spin_lock(&irq_mapping_update_lock);
   16.39 +
   16.40 +    op.cmd = EVTCHNOP_bind_ipi;
   16.41 +    if ( HYPERVISOR_event_channel_op(&op) != 0 )
   16.42 +	panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
   16.43 +    evtchn = op.u.bind_ipi.port;
   16.44 +
   16.45 +    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
   16.46 +	   ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
   16.47 +	   evtchn);
   16.48 +
   16.49 +    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
   16.50 +    irq_to_evtchn[irq] = -1;
   16.51 +
   16.52 +    evtchn_to_irq[evtchn] = irq;
   16.53 +    irq_to_evtchn[irq]    = evtchn;
   16.54 +
   16.55 +    printk("<0>evtchn_to_irq[%d] = %d.\n", evtchn,
   16.56 +	   evtchn_to_irq[evtchn]);
   16.57 +    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
   16.58 +
   16.59 +    bind_evtchn_to_cpu(evtchn, vcpu);
   16.60 +
   16.61 +    spin_unlock(&irq_mapping_update_lock);
   16.62 +
   16.63 +    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
   16.64 +    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
   16.65 +}
   16.66 +
   16.67 +void _bind_virq_to_irq(int virq, int cpu, int irq)
   16.68 +{
   16.69 +    evtchn_op_t op;
   16.70 +    int evtchn;
   16.71 +
   16.72 +    spin_lock(&irq_mapping_update_lock);
   16.73 +
   16.74 +    op.cmd              = EVTCHNOP_bind_virq;
   16.75 +    op.u.bind_virq.virq = virq;
   16.76 +    if ( HYPERVISOR_event_channel_op(&op) != 0 )
   16.77 +            panic("Failed to bind virtual IRQ %d\n", virq);
   16.78 +    evtchn = op.u.bind_virq.port;
   16.79 +
   16.80 +    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
   16.81 +    irq_to_evtchn[irq] = -1;
   16.82 +
   16.83 +    evtchn_to_irq[evtchn] = irq;
   16.84 +    irq_to_evtchn[irq]    = evtchn;
   16.85 +
   16.86 +    per_cpu(virq_to_irq, cpu)[virq] = irq;
   16.87 +
   16.88 +    bind_evtchn_to_cpu(evtchn, cpu);
   16.89 +
   16.90 +    spin_unlock(&irq_mapping_update_lock);
   16.91 +
   16.92 +    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_mask);
   16.93 +    clear_bit(evtchn, (unsigned long *)HYPERVISOR_shared_info->evtchn_pending);
   16.94 +}
   16.95 +
   16.96  int bind_ipi_to_irq(int ipi)
   16.97  {
   16.98      evtchn_op_t op;
    17.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Fri Aug 19 15:21:12 2005 +0000
    17.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Fri Aug 19 15:22:05 2005 +0000
    17.3 @@ -16,6 +16,8 @@
    17.4  #include <asm-xen/queues.h>
    17.5  #include <asm-xen/xenbus.h>
    17.6  #include <asm-xen/ctrl_if.h>
    17.7 +#include <linux/cpu.h>
    17.8 +#include <linux/kthread.h>
    17.9  
   17.10  #define SHUTDOWN_INVALID  -1
   17.11  #define SHUTDOWN_POWEROFF  0
   17.12 @@ -58,10 +60,71 @@ EXPORT_SYMBOL(machine_power_off);
   17.13  /* Ignore multiple shutdown requests. */
   17.14  static int shutting_down = SHUTDOWN_INVALID;
   17.15  
   17.16 -static void __do_suspend(void)
   17.17 +#ifndef CONFIG_HOTPLUG_CPU
   17.18 +#define cpu_down(x) (-EOPNOTSUPP)
   17.19 +#define cpu_up(x) (-EOPNOTSUPP)
   17.20 +#endif
   17.21 +
   17.22 +static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
   17.23 +{
   17.24 +    int r;
   17.25 +    int gdt_pages;
   17.26 +    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
   17.27 +    if (r != 0)
   17.28 +	panic("pickling vcpu %d -> %d!\n", vcpu, r);
   17.29 +
   17.30 +    /* Translate from machine to physical addresses where necessary,
   17.31 +       so that they can be translated to our new machine address space
   17.32 +       after resume.  libxc is responsible for doing this to vcpu0,
   17.33 +       but we do it to the others. */
   17.34 +    gdt_pages = (ctxt->gdt_ents + 511) / 512;
   17.35 +    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
   17.36 +    for (r = 0; r < gdt_pages; r++)
   17.37 +	ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
   17.38 +}
   17.39 +
   17.40 +void _restore_vcpu(int cpu);
   17.41 +
   17.42 +atomic_t vcpus_rebooting;
   17.43 +
   17.44 +static int restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
   17.45 +{
   17.46 +    int r;
   17.47 +    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
   17.48 +
   17.49 +    /* This is kind of a hack, and implicitly relies on the fact that
   17.50 +       the vcpu stops in a place where all of the call clobbered
   17.51 +       registers are already dead. */
   17.52 +    ctxt->user_regs.esp -= 4;
   17.53 +    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
   17.54 +    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
   17.55 +
   17.56 +    /* De-canonicalise.  libxc handles this for vcpu 0, but we need
   17.57 +       to do it for the other vcpus. */
   17.58 +    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
   17.59 +    for (r = 0; r < gdt_pages; r++)
   17.60 +	ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
   17.61 +
   17.62 +    atomic_set(&vcpus_rebooting, 1);
   17.63 +    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
   17.64 +    if (r != 0) {
   17.65 +	printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
   17.66 +	return -1;
   17.67 +    }
   17.68 +
   17.69 +    /* Make sure we wait for the new vcpu to come up before trying to do
   17.70 +       anything with it or starting the next one. */
   17.71 +    while (atomic_read(&vcpus_rebooting))
   17.72 +	barrier();
   17.73 +
   17.74 +    return 0;
   17.75 +}
   17.76 +
   17.77 +static int __do_suspend(void *ignore)
   17.78  {
   17.79      int i, j;
   17.80      suspend_record_t *suspend_record;
   17.81 +    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
   17.82  
   17.83      /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
   17.84  	/* XXX SMH: yes it would :-( */	
   17.85 @@ -97,14 +160,64 @@ static void __do_suspend(void)
   17.86      extern unsigned long max_pfn;
   17.87      extern unsigned int *pfn_to_mfn_frame_list;
   17.88  
   17.89 +    cpumask_t prev_online_cpus, prev_present_cpus;
   17.90 +    int err = 0;
   17.91 +
   17.92 +    BUG_ON(smp_processor_id() != 0);
   17.93 +    BUG_ON(in_interrupt());
   17.94 +
   17.95 +#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
   17.96 +    if (num_online_cpus() > 1) {
   17.97 +	printk(KERN_WARNING "Can't suspend SMP guests without CONFIG_HOTPLUG_CPU\n");
   17.98 +	return -EOPNOTSUPP;
   17.99 +    }
  17.100 +#endif
  17.101 +
  17.102      suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
  17.103      if ( suspend_record == NULL )
  17.104          goto out;
  17.105  
  17.106 +    /* Take all of the other cpus offline.  We need to be careful not
  17.107 +       to get preempted between the final test for num_online_cpus()
  17.108 +       == 1 and disabling interrupts, since otherwise userspace could
  17.109 +       bring another cpu online, and then we'd be stuffed.  At the
  17.110 +       same time, cpu_down can reschedule, so we need to enable
  17.111 +       preemption while doing that.  This kind of sucks, but should be
  17.112 +       correct. */
  17.113 +    /* (We don't need to worry about other cpus bringing stuff up,
  17.114 +       since by the time num_online_cpus() == 1, there aren't any
  17.115 +       other cpus) */
  17.116 +    cpus_clear(prev_online_cpus);
  17.117 +    preempt_disable();
  17.118 +    while (num_online_cpus() > 1) {
  17.119 +	preempt_enable();
  17.120 +	for_each_online_cpu(i) {
  17.121 +	    if (i == 0)
  17.122 +		continue;
  17.123 +	    err = cpu_down(i);
  17.124 +	    if (err != 0) {
  17.125 +		printk(KERN_CRIT "Failed to take all CPUs down: %d.\n", err);
  17.126 +		goto out_reenable_cpus;
  17.127 +	    }
  17.128 +	    cpu_set(i, prev_online_cpus);
  17.129 +	}
  17.130 +	preempt_disable();
  17.131 +    }
  17.132 +
  17.133      suspend_record->nr_pfns = max_pfn; /* final number of pfns */
  17.134  
  17.135      __cli();
  17.136  
  17.137 +    preempt_enable();
  17.138 +
  17.139 +    cpus_clear(prev_present_cpus);
  17.140 +    for_each_present_cpu(i) {
  17.141 +	if (i == 0)
  17.142 +	    continue;
  17.143 +	save_vcpu_context(i, &suspended_cpu_records[i]);
  17.144 +	cpu_set(i, prev_present_cpus);
  17.145 +    }
  17.146 +
  17.147  #ifdef __i386__
  17.148      mm_pin_all();
  17.149      kmem_cache_shrink(pgd_cache);
  17.150 @@ -132,6 +245,8 @@ static void __do_suspend(void)
  17.151      memcpy(&suspend_record->resume_info, &xen_start_info,
  17.152             sizeof(xen_start_info));
  17.153  
  17.154 +    /* We'll stop somewhere inside this hypercall.  When it returns,
  17.155 +       we'll start resuming after the restore. */
  17.156      HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
  17.157  
  17.158      shutting_down = SHUTDOWN_INVALID; 
  17.159 @@ -171,11 +286,26 @@ static void __do_suspend(void)
  17.160  
  17.161      usbif_resume();
  17.162  
  17.163 +    for_each_cpu_mask(i, prev_present_cpus) {
  17.164 +	restore_vcpu_context(i, &suspended_cpu_records[i]);
  17.165 +    }
  17.166 +
  17.167      __sti();
  17.168  
  17.169 + out_reenable_cpus:
  17.170 +    for_each_cpu_mask(i, prev_online_cpus) {
  17.171 +	j = cpu_up(i);
  17.172 +	if (j != 0) {
  17.173 +	    printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
  17.174 +		   i, j);
  17.175 +	    err = j;
  17.176 +	}
  17.177 +    }
  17.178 +
  17.179   out:
  17.180      if ( suspend_record != NULL )
  17.181          free_page((unsigned long)suspend_record);
  17.182 +    return err;
  17.183  }
  17.184  
  17.185  static int shutdown_process(void *__unused)
  17.186 @@ -222,6 +352,18 @@ static int shutdown_process(void *__unus
  17.187      return 0;
  17.188  }
  17.189  
  17.190 +static struct task_struct *kthread_create_on_cpu(int (*f)(void *arg),
  17.191 +						 void *arg,
  17.192 +						 const char *name,
  17.193 +						 int cpu)
  17.194 +{
  17.195 +    struct task_struct *p;
  17.196 +    p = kthread_create(f, arg, name);
  17.197 +    kthread_bind(p, cpu);
  17.198 +    wake_up_process(p);
  17.199 +    return p;
  17.200 +}
  17.201 +
  17.202  static void __shutdown_handler(void *unused)
  17.203  {
  17.204      int err;
  17.205 @@ -234,7 +376,7 @@ static void __shutdown_handler(void *unu
  17.206      }
  17.207      else
  17.208      {
  17.209 -        __do_suspend();
  17.210 +	kthread_create_on_cpu(__do_suspend, NULL, "suspender", 0);
  17.211      }
  17.212  }
  17.213  
    19.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c	Fri Aug 19 15:21:12 2005 +0000
    19.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c	Fri Aug 19 15:22:05 2005 +0000
    19.3 @@ -743,3 +743,9 @@ unsigned long arch_align_stack(unsigned 
    19.4  		sp -= get_random_int() % 8192;
    19.5  	return sp & ~0xf;
    19.6  }
    19.7 +
    19.8 +#ifndef CONFIG_SMP
    19.9 +void _restore_vcpu(void)
   19.10 +{
   19.11 +}
   19.12 +#endif
    21.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c	Fri Aug 19 15:21:12 2005 +0000
    21.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c	Fri Aug 19 15:22:05 2005 +0000
    21.3 @@ -1286,4 +1286,10 @@ void smp_resume(void)
    21.4  	smp_intr_init();
    21.5  	local_setup_timer_irq();
    21.6  }
    21.7 +
    21.8 +void _restore_vcpu(void)
    21.9 +{
   21.10 +	/* XXX need to write this */
   21.11 +}
   21.12 +
   21.13  #endif
    37.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Fri Aug 19 15:21:12 2005 +0000
    37.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Fri Aug 19 15:22:05 2005 +0000
    37.3 @@ -163,7 +163,7 @@ HYPERVISOR_yield(
    37.4          TRAP_INSTR
    37.5          : "=a" (ret), "=b" (ign)
    37.6  	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
    37.7 -	: "memory" );
    37.8 +	: "memory", "ecx" );
    37.9  
   37.10      return ret;
   37.11  }
   37.12 @@ -178,7 +178,7 @@ HYPERVISOR_block(
   37.13          TRAP_INSTR
   37.14          : "=a" (ret), "=b" (ign1)
   37.15  	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
   37.16 -	: "memory" );
   37.17 +	: "memory", "ecx" );
   37.18  
   37.19      return ret;
   37.20  }
   37.21 @@ -194,7 +194,7 @@ HYPERVISOR_shutdown(
   37.22          : "=a" (ret), "=b" (ign1)
   37.23  	: "0" (__HYPERVISOR_sched_op),
   37.24  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
   37.25 -        : "memory" );
   37.26 +        : "memory", "ecx" );
   37.27  
   37.28      return ret;
   37.29  }
   37.30 @@ -210,7 +210,7 @@ HYPERVISOR_reboot(
   37.31          : "=a" (ret), "=b" (ign1)
   37.32  	: "0" (__HYPERVISOR_sched_op),
   37.33  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
   37.34 -        : "memory" );
   37.35 +        : "memory", "ecx" );
   37.36  
   37.37      return ret;
   37.38  }
   37.39 @@ -228,7 +228,7 @@ HYPERVISOR_suspend(
   37.40          : "=a" (ret), "=b" (ign1), "=S" (ign2)
   37.41  	: "0" (__HYPERVISOR_sched_op),
   37.42          "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
   37.43 -        "S" (srec) : "memory");
   37.44 +        "S" (srec) : "memory", "ecx");
   37.45  
   37.46      return ret;
   37.47  }
   37.48 @@ -244,7 +244,7 @@ HYPERVISOR_crash(
   37.49          : "=a" (ret), "=b" (ign1)
   37.50  	: "0" (__HYPERVISOR_sched_op),
   37.51  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
   37.52 -        : "memory" );
   37.53 +        : "memory", "ecx" );
   37.54  
   37.55      return ret;
   37.56  }
   37.57 @@ -529,12 +529,15 @@ HYPERVISOR_vcpu_down(
   37.58  {
   37.59      int ret;
   37.60      unsigned long ign1;
   37.61 +    /* Yes, I really do want to clobber edx here: when we resume a
   37.62 +       vcpu after unpickling a multi-processor domain, it returns
   37.63 +       here, but clobbers all of the call clobbered registers. */
   37.64      __asm__ __volatile__ (
   37.65          TRAP_INSTR
   37.66          : "=a" (ret), "=b" (ign1)
   37.67  	: "0" (__HYPERVISOR_sched_op),
   37.68  	  "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
   37.69 -        : "memory" );
   37.70 +        : "memory", "ecx", "edx" );
   37.71  
   37.72      return ret;
   37.73  }
   37.74 @@ -550,8 +553,26 @@ HYPERVISOR_vcpu_up(
   37.75          : "=a" (ret), "=b" (ign1)
   37.76  	: "0" (__HYPERVISOR_sched_op),
   37.77  	  "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
   37.78 +        : "memory", "ecx" );
   37.79 +
   37.80 +    return ret;
   37.81 +}
   37.82 +
   37.83 +static inline int
   37.84 +HYPERVISOR_vcpu_pickle(
   37.85 +    int vcpu, vcpu_guest_context_t *ctxt)
   37.86 +{
   37.87 +    int ret;
   37.88 +    unsigned long ign1, ign2;
   37.89 +    __asm__ __volatile__ (
   37.90 +        TRAP_INSTR
   37.91 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
   37.92 +	: "0" (__HYPERVISOR_sched_op),
   37.93 +	  "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
   37.94 +	  "2" (ctxt)
   37.95          : "memory" );
   37.96  
   37.97      return ret;
   37.98  }
   37.99 +
  37.100  #endif /* __HYPERCALL_H__ */
    46.1 --- a/xen/arch/x86/domain.c	Fri Aug 19 15:21:12 2005 +0000
    46.2 +++ b/xen/arch/x86/domain.c	Fri Aug 19 15:22:05 2005 +0000
    46.3 @@ -217,8 +217,16 @@ struct vcpu *arch_alloc_vcpu_struct(void
    46.4      return xmalloc(struct vcpu);
    46.5  }
    46.6  
    46.7 +/* We assume that vcpu 0 is always the last one to be freed in a
    46.8 +   domain i.e. if v->vcpu_id == 0, the domain should be
    46.9 +   single-processor. */
   46.10  void arch_free_vcpu_struct(struct vcpu *v)
   46.11  {
   46.12 +    struct vcpu *p;
   46.13 +    for_each_vcpu(v->domain, p) {
   46.14 +        if (p->next_in_list == v)
   46.15 +            p->next_in_list = v->next_in_list;
   46.16 +    }
   46.17      xfree(v);
   46.18  }
   46.19  
   46.20 @@ -403,7 +411,7 @@ int arch_set_info_guest(
   46.21      {
   46.22          if ( ((c->user_regs.cs & 3) == 0) ||
   46.23               ((c->user_regs.ss & 3) == 0) )
   46.24 -                return -EINVAL;
   46.25 +            return -EINVAL;
   46.26      }
   46.27  
   46.28      clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
   46.29 @@ -457,7 +465,7 @@ int arch_set_info_guest(
   46.30          if ( !(c->flags & VGCF_VMX_GUEST) )
   46.31  #endif
   46.32              if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
   46.33 -                  PGT_base_page_table) )
   46.34 +                                    PGT_base_page_table) )
   46.35                  return -EINVAL;
   46.36      }
   46.37  
    47.1 --- a/xen/arch/x86/mm.c	Fri Aug 19 15:21:12 2005 +0000
    47.2 +++ b/xen/arch/x86/mm.c	Fri Aug 19 15:22:05 2005 +0000
    47.3 @@ -2633,14 +2633,16 @@ long set_gdt(struct vcpu *v,
    47.4  
    47.5      if ( entries > FIRST_RESERVED_GDT_ENTRY )
    47.6          return -EINVAL;
    47.7 -    
    47.8 +
    47.9      shadow_sync_all(d);
   47.10  
   47.11      /* Check the pages in the new GDT. */
   47.12 -    for ( i = 0; i < nr_pages; i++ )
   47.13 -        if ( ((pfn = frames[i]) >= max_page) ||
   47.14 -             !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
   47.15 +    for ( i = 0; i < nr_pages; i++ ) {
   47.16 +        pfn = frames[i];
   47.17 +        if ((pfn >= max_page) ||
   47.18 +            !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
   47.19              goto fail;
   47.20 +    }
   47.21  
   47.22      /* Tear down the old GDT. */
   47.23      destroy_gdt(v);
    54.1 --- a/xen/common/event_channel.c	Fri Aug 19 15:21:12 2005 +0000
    54.2 +++ b/xen/common/event_channel.c	Fri Aug 19 15:22:05 2005 +0000
    54.3 @@ -588,7 +588,6 @@ static long evtchn_bind_vcpu(evtchn_bind
    54.4      long           rc = 0;
    54.5  
    54.6      if ( (vcpu >= MAX_VIRT_CPUS) || (d->vcpu[vcpu] == NULL) ) {
    54.7 -        printf("vcpu %d bad.\n", vcpu);
    54.8          return -EINVAL;
    54.9      }
   54.10  
   54.11 @@ -596,7 +595,6 @@ static long evtchn_bind_vcpu(evtchn_bind
   54.12  
   54.13      if ( !port_is_valid(d, port) )
   54.14      {
   54.15 -        printf("port %d bad.\n", port);
   54.16          rc = -EINVAL;
   54.17          goto out;
   54.18      }
   54.19 @@ -610,7 +608,6 @@ static long evtchn_bind_vcpu(evtchn_bind
   54.20          chn->notify_vcpu_id = vcpu;
   54.21          break;
   54.22      default:
   54.23 -        printf("evtchn type %d can't be rebound.\n", chn->state);
   54.24          rc = -EINVAL;
   54.25          break;
   54.26      }
    55.1 --- a/xen/common/schedule.c	Fri Aug 19 15:21:12 2005 +0000
    55.2 +++ b/xen/common/schedule.c	Fri Aug 19 15:22:05 2005 +0000
    55.3 @@ -38,6 +38,8 @@
    55.4  #include <xen/mm.h>
    55.5  #include <public/sched_ctl.h>
    55.6  
    55.7 +extern void arch_getdomaininfo_ctxt(struct vcpu *,
    55.8 +                                    struct vcpu_guest_context *);
    55.9  /* opt_sched: scheduler - default to SEDF */
   55.10  static char opt_sched[10] = "sedf";
   55.11  string_param("sched", opt_sched);
   55.12 @@ -82,7 +84,8 @@ void free_domain_struct(struct domain *d
   55.13      int i;
   55.14  
   55.15      SCHED_OP(free_task, d);
   55.16 -    for (i = 0; i < MAX_VIRT_CPUS; i++)
   55.17 +    /* vcpu 0 has to be the last one destructed. */
   55.18 +    for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
   55.19          if ( d->vcpu[i] )
   55.20              arch_free_vcpu_struct(d->vcpu[i]);
   55.21  
   55.22 @@ -295,10 +298,36 @@ static long do_vcpu_up(int vcpu)
   55.23      return 0;
   55.24  }
   55.25  
   55.26 +static long do_vcpu_pickle(int vcpu, unsigned long arg)
   55.27 +{
   55.28 +    struct vcpu *v;
   55.29 +    vcpu_guest_context_t *c;
   55.30 +    int ret = 0;
   55.31 +
   55.32 +    if (vcpu >= MAX_VIRT_CPUS)
   55.33 +        return -EINVAL;
   55.34 +    v = current->domain->vcpu[vcpu];
   55.35 +    if (!v)
   55.36 +        return -ESRCH;
   55.37 +    /* Don't pickle vcpus which are currently running */
   55.38 +    if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
   55.39 +        return -EBUSY;
   55.40 +    }
   55.41 +    c = xmalloc(vcpu_guest_context_t);
   55.42 +    if (!c)
   55.43 +        return -ENOMEM;
   55.44 +    arch_getdomaininfo_ctxt(v, c);
   55.45 +    if (copy_to_user((vcpu_guest_context_t *)arg,
   55.46 +                     (const vcpu_guest_context_t *)c, sizeof(*c)))
   55.47 +        ret = -EFAULT;
   55.48 +    xfree(c);
   55.49 +    return ret;
   55.50 +}
   55.51 +
   55.52  /*
   55.53   * Demultiplex scheduler-related hypercalls.
   55.54   */
   55.55 -long do_sched_op(unsigned long op)
   55.56 +long do_sched_op(unsigned long op, unsigned long arg)
   55.57  {
   55.58      long ret = 0;
   55.59  
   55.60 @@ -334,6 +363,11 @@ long do_sched_op(unsigned long op)
   55.61          ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
   55.62          break;
   55.63      }
   55.64 +    case SCHEDOP_vcpu_pickle:
   55.65 +    {
   55.66 +        ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
   55.67 +        break;
   55.68 +    }
   55.69  
   55.70      default:
   55.71          ret = -ENOSYS;
    60.1 --- a/xen/include/public/xen.h	Fri Aug 19 15:21:12 2005 +0000
    60.2 +++ b/xen/include/public/xen.h	Fri Aug 19 15:22:05 2005 +0000
    60.3 @@ -203,6 +203,7 @@ struct mmuext_op {
    60.4  #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
    60.5  #define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
    60.6  #define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
    60.7 +#define SCHEDOP_vcpu_pickle     5   /* save a vcpu's context to memory.   */
    60.8  #define SCHEDOP_cmdmask       255   /* 8-bit command. */
    60.9  #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
   60.10  #define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */