ia64/xen-unstable

changeset 6283:7f9b024a509e

Actually make suspending SMP domUs work: the previous commit didn't
bring the other vcpus up correctly.

Signed-off-by: Steven Smith, sos22@cam.ac.uk
author sos22@douglas.cl.cam.ac.uk
date Thu Aug 18 15:27:55 2005 +0000 (2005-08-18)
parents 6e6cedc1763d
children 5a7efe0cf5fb
files linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c linux-2.6-xen-sparse/arch/xen/kernel/reboot.c linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h xen/arch/x86/domain.c xen/arch/x86/mm.c xen/arch/x86/time.c xen/common/domain.c xen/common/event_channel.c xen/common/schedule.c xen/include/public/xen.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Wed Aug 17 14:37:22 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Thu Aug 18 15:27:55 2005 +0000
     1.3 @@ -115,20 +115,12 @@ void xen_idle(void)
     1.4  /* We don't actually take CPU down, just spin without interrupts. */
     1.5  static inline void play_dead(void)
     1.6  {
     1.7 -	/* Ack it */
     1.8 -	__get_cpu_var(cpu_state) = CPU_DEAD;
     1.9 -
    1.10 -	/* We shouldn't have to disable interrupts while dead, but
    1.11 -	 * some interrupts just don't seem to go away, and this makes
    1.12 -	 * it "work" for testing purposes. */
    1.13  	/* Death loop */
    1.14  	while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
    1.15  		HYPERVISOR_yield();
    1.16  
    1.17 -	local_irq_disable();
    1.18  	__flush_tlb_all();
    1.19  	cpu_set(smp_processor_id(), cpu_online_map);
    1.20 -	local_irq_enable();
    1.21  }
    1.22  #else
    1.23  static inline void play_dead(void)
    1.24 @@ -156,12 +148,23 @@ void cpu_idle (void)
    1.25  			rmb();
    1.26  
    1.27  			if (cpu_is_offline(cpu)) {
    1.28 +				printk("<0>Cpu %d going offline.\n",
    1.29 +				       cpu);
    1.30 +				local_irq_disable();
    1.31 +				/* Ack it.  From this point on until
    1.32 +				   we get woken up, we're not allowed
    1.33 +				   to take any locks.  In particular,
    1.34 +				   don't printk. */
    1.35 +				__get_cpu_var(cpu_state) = CPU_DEAD;
    1.36  #if defined(CONFIG_XEN) && defined(CONFIG_HOTPLUG_CPU)
    1.37  				/* Tell hypervisor to take vcpu down. */
    1.38  				HYPERVISOR_vcpu_down(cpu);
    1.39  #endif
    1.40  				play_dead();
    1.41 -         }
    1.42 +				local_irq_enable();
    1.43 +				printk("<0>Cpu %d back online.\n",
    1.44 +				       cpu);
    1.45 +			}
    1.46  
    1.47  			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
    1.48  			xen_idle();
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c	Wed Aug 17 14:37:22 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c	Thu Aug 18 15:27:55 2005 +0000
     2.3 @@ -129,9 +129,12 @@ static inline int __prepare_ICR2 (unsign
     2.4  
     2.5  DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
     2.6  
     2.7 +unsigned uber_debug;
     2.8 +
     2.9  static inline void __send_IPI_one(unsigned int cpu, int vector)
    2.10  {
    2.11  	unsigned int evtchn;
    2.12 +	int r;
    2.13  
    2.14  	evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
    2.15  	// printk("send_IPI_mask_bitmask cpu %d vector %d evtchn %d\n", cpu, vector, evtchn);
    2.16 @@ -142,7 +145,11 @@ static inline void __send_IPI_one(unsign
    2.17  		       synch_test_bit(evtchn, &s->evtchn_mask[0]))
    2.18  			;
    2.19  #endif
    2.20 -		notify_via_evtchn(evtchn);
    2.21 +		if (uber_debug)
    2.22 +			printk("<0>Notifying on evtchn %d.\n", evtchn);
    2.23 +		if ((r = notify_via_evtchn(evtchn)) != 0)
    2.24 +			printk("<0>Hypervisor stopped us sending an IPI: %d.\n",
    2.25 +			       r);
    2.26  	} else
    2.27  		printk("send_IPI to unbound port %d/%d",
    2.28  		       cpu, vector);
    2.29 @@ -161,6 +168,8 @@ void __send_IPI_shortcut(unsigned int sh
    2.30  			if (cpu == smp_processor_id())
    2.31  				continue;
    2.32  			if (cpu_isset(cpu, cpu_online_map)) {
    2.33 +				if (uber_debug)
    2.34 +					printk("<0>Sending ipi to %d.\n", cpu);
    2.35  				__send_IPI_one(cpu, vector);
    2.36  			}
    2.37  		}
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Wed Aug 17 14:37:22 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Thu Aug 18 15:27:55 2005 +0000
     3.3 @@ -1616,3 +1616,19 @@ void smp_resume(void)
     3.4  	smp_intr_init();
     3.5  	local_setup_timer_irq();
     3.6  }
     3.7 +
     3.8 +DECLARE_PER_CPU(int, timer_irq);
     3.9 +
    3.10 +void _restore_vcpu(void)
    3.11 +{
    3.12 +	int cpu = smp_processor_id();
    3.13 +	/* We are the first thing the vcpu runs when it comes back,
    3.14 +	   and we are supposed to restore the IPIs and timer
    3.15 +	   interrupts etc.  When we return, the vcpu's idle loop will
    3.16 +	   start up again. */
    3.17 +	printk("<0>_restore_vcpu %d.\n", cpu);
    3.18 +	_bind_virq_to_irq(VIRQ_TIMER, cpu, per_cpu(timer_irq, cpu));
    3.19 +	_bind_virq_to_irq(VIRQ_DEBUG, cpu, per_cpu(ldebug_irq, cpu));
    3.20 +	_bind_ipi_to_irq(RESCHEDULE_VECTOR, cpu, per_cpu(resched_irq, cpu) );
    3.21 +	_bind_ipi_to_irq(CALL_FUNCTION_VECTOR, cpu, per_cpu(callfunc_irq, cpu) );
    3.22 +}
     4.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Wed Aug 17 14:37:22 2005 +0000
     4.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Thu Aug 18 15:27:55 2005 +0000
     4.3 @@ -745,7 +745,7 @@ static void __init hpet_time_init(void)
     4.4  #endif
     4.5  
     4.6  /* Dynamically-mapped IRQ. */
     4.7 -static DEFINE_PER_CPU(int, timer_irq);
     4.8 +DEFINE_PER_CPU(int, timer_irq);
     4.9  
    4.10  static struct irqaction irq_timer = {
    4.11  	timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer0",
     5.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c	Wed Aug 17 14:37:22 2005 +0000
     5.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/fault.c	Thu Aug 18 15:27:55 2005 +0000
     5.3 @@ -512,6 +512,7 @@ no_context:
     5.4  	printk("%08lx\n", regs->eip);
     5.5  	dump_fault_path(address);
     5.6  	die("Oops", regs, error_code);
     5.7 +	while(1);
     5.8  	bust_spinlocks(0);
     5.9  	do_exit(SIGKILL);
    5.10  
     6.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c	Wed Aug 17 14:37:22 2005 +0000
     6.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/evtchn.c	Thu Aug 18 15:27:55 2005 +0000
     6.3 @@ -134,6 +134,8 @@ void force_evtchn_callback(void)
     6.4      (void)HYPERVISOR_xen_version(0);
     6.5  }
     6.6  
     6.7 +extern unsigned uber_debug;
     6.8 +
     6.9  /* NB. Interrupts are disabled on entry. */
    6.10  asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
    6.11  {
    6.12 @@ -145,6 +147,8 @@ asmlinkage void evtchn_do_upcall(struct 
    6.13  
    6.14      vcpu_info->evtchn_upcall_pending = 0;
    6.15      
    6.16 +    if (uber_debug && cpu != 0)
    6.17 +	printk("<0>evtchn_do_upcall on %d.\n", cpu);
    6.18      /* NB. No need for a barrier here -- XCHG is a barrier on x86. */
    6.19      l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
    6.20      while ( l1 != 0 )
    6.21 @@ -158,9 +162,13 @@ asmlinkage void evtchn_do_upcall(struct 
    6.22              l2 &= ~(1 << l2i);
    6.23              
    6.24              port = (l1i << 5) + l2i;
    6.25 -            if ( (irq = evtchn_to_irq[port]) != -1 )
    6.26 +	    if (uber_debug && cpu != 0)
    6.27 +		printk("<0>Port %d.\n", port);
    6.28 +            if ( (irq = evtchn_to_irq[port]) != -1 ) {
    6.29 +		if (uber_debug && cpu != 0)
    6.30 +		    printk("<0>irq %d.\n", irq);
    6.31                  do_IRQ(irq, regs);
    6.32 -            else
    6.33 +	    } else
    6.34                  evtchn_device_upcall(port);
    6.35          }
    6.36      }
    6.37 @@ -245,6 +253,71 @@ void unbind_virq_from_irq(int virq)
    6.38      spin_unlock(&irq_mapping_update_lock);
    6.39  }
    6.40  
    6.41 +/* This is only used when a vcpu from an xm save.  The ipi is expected
    6.42 +   to have been bound before we suspended, and so all of the xenolinux
    6.43 +   state is set up; we only need to restore the Xen side of things.
    6.44 +   The irq number has to be the same, but the evtchn number can
    6.45 +   change. */
    6.46 +void _bind_ipi_to_irq(int ipi, int vcpu, int irq)
    6.47 +{
    6.48 +    evtchn_op_t op;
    6.49 +    int evtchn;
    6.50 +
    6.51 +    spin_lock(&irq_mapping_update_lock);
    6.52 +
    6.53 +    op.cmd = EVTCHNOP_bind_ipi;
    6.54 +    if ( HYPERVISOR_event_channel_op(&op) != 0 )
    6.55 +	panic("Failed to bind virtual IPI %d on cpu %d\n", ipi, vcpu);
    6.56 +    evtchn = op.u.bind_ipi.port;
    6.57 +
    6.58 +    printk("<0>IPI %d, old evtchn %d, evtchn %d.\n",
    6.59 +	   ipi, per_cpu(ipi_to_evtchn, vcpu)[ipi],
    6.60 +	   evtchn);
    6.61 +
    6.62 +    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
    6.63 +    irq_to_evtchn[irq] = -1;
    6.64 +
    6.65 +    evtchn_to_irq[evtchn] = irq;
    6.66 +    irq_to_evtchn[irq]    = evtchn;
    6.67 +
    6.68 +    per_cpu(ipi_to_evtchn, vcpu)[ipi] = evtchn;
    6.69 +
    6.70 +    bind_evtchn_to_cpu(evtchn, vcpu);
    6.71 +
    6.72 +    spin_unlock(&irq_mapping_update_lock);
    6.73 +
    6.74 +    clear_bit(evtchn, HYPERVISOR_shared_info->evtchn_mask);
    6.75 +}
    6.76 +
    6.77 +void _bind_virq_to_irq(int virq, int cpu, int irq)
    6.78 +{
    6.79 +    evtchn_op_t op;
    6.80 +    int evtchn;
    6.81 +
    6.82 +    spin_lock(&irq_mapping_update_lock);
    6.83 +
    6.84 +    op.cmd              = EVTCHNOP_bind_virq;
    6.85 +    op.u.bind_virq.virq = virq;
    6.86 +    if ( HYPERVISOR_event_channel_op(&op) != 0 )
    6.87 +            panic("Failed to bind virtual IRQ %d\n", virq);
    6.88 +    evtchn = op.u.bind_virq.port;
    6.89 +
    6.90 +
    6.91 +    evtchn_to_irq[irq_to_evtchn[irq]] = -1;
    6.92 +    irq_to_evtchn[irq] = -1;
    6.93 +
    6.94 +    evtchn_to_irq[evtchn] = irq;
    6.95 +    irq_to_evtchn[irq]    = evtchn;
    6.96 +
    6.97 +    per_cpu(virq_to_irq, cpu)[virq] = irq;
    6.98 +
    6.99 +    bind_evtchn_to_cpu(evtchn, cpu);
   6.100 +
   6.101 +    spin_unlock(&irq_mapping_update_lock);
   6.102 +
   6.103 +    return irq;
   6.104 +}
   6.105 +
   6.106  int bind_ipi_to_irq(int ipi)
   6.107  {
   6.108      evtchn_op_t op;
     7.1 --- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Wed Aug 17 14:37:22 2005 +0000
     7.2 +++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c	Thu Aug 18 15:27:55 2005 +0000
     7.3 @@ -65,10 +65,56 @@ static int shutting_down = SHUTDOWN_INVA
     7.4  #define cpu_up(x) (-EOPNOTSUPP)
     7.5  #endif
     7.6  
     7.7 +static void save_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
     7.8 +{
     7.9 +    int r;
    7.10 +    int gdt_pages;
    7.11 +    r = HYPERVISOR_vcpu_pickle(vcpu, ctxt);
    7.12 +    BUG_ON(r != 0);
    7.13 +    gdt_pages = (ctxt->gdt_ents + 511) / 512;
    7.14 +    ctxt->ctrlreg[3] = machine_to_phys(ctxt->ctrlreg[3]);
    7.15 +    for (r = 0; r < gdt_pages; r++) {
    7.16 +	ctxt->gdt_frames[r] = mfn_to_pfn(ctxt->gdt_frames[r]);
    7.17 +    }
    7.18 +}
    7.19 +
    7.20 +void _restore_vcpu(int cpu);
    7.21 +
    7.22 +static void restore_vcpu_context(int vcpu, vcpu_guest_context_t *ctxt)
    7.23 +{
    7.24 +    int r;
    7.25 +    int gdt_pages = (ctxt->gdt_ents + 511) / 512;
    7.26 +
    7.27 +    /* This is kind of a hack, and implicitly relies on the fact that
    7.28 +       the vcpu stops in a place where all of the call clobbered
    7.29 +       registers are already dead. */
    7.30 +    printk("<0>regs.esp %x.\n", ctxt->user_regs.esp);
    7.31 +    ctxt->user_regs.esp -= 4;
    7.32 +    ((unsigned long *)ctxt->user_regs.esp)[0] = ctxt->user_regs.eip;
    7.33 +    ctxt->user_regs.eip = (unsigned long)_restore_vcpu;
    7.34 +
    7.35 +    ctxt->ctrlreg[3] = phys_to_machine(ctxt->ctrlreg[3]);
    7.36 +    for (r = 0; r < gdt_pages; r++) {
    7.37 +	ctxt->gdt_frames[r] = pfn_to_mfn(ctxt->gdt_frames[r]);
    7.38 +    }
    7.39 +    r = HYPERVISOR_boot_vcpu(vcpu, ctxt);
    7.40 +    if (r != 0) {
    7.41 +	printk(KERN_EMERG "Failed to reboot vcpu %d (%d)\n", vcpu, r);
    7.42 +	return;
    7.43 +    }
    7.44 +}
    7.45 +
    7.46 +/* Whoever decided that printk should call into the scheduler needs to
    7.47 +   be taken out and shot */
    7.48 +#define msg(x) HYPERVISOR_console_io(CONSOLEIO_write, sizeof(x), x)
    7.49 +
    7.50 +extern unsigned uber_debug;
    7.51 +
    7.52  static int __do_suspend(void *ignore)
    7.53  {
    7.54      int i, j;
    7.55      suspend_record_t *suspend_record;
    7.56 +    static vcpu_guest_context_t suspended_cpu_records[NR_CPUS];
    7.57  
    7.58      /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
    7.59  	/* XXX SMH: yes it would :-( */	
    7.60 @@ -158,6 +204,10 @@ static int __do_suspend(void *ignore)
    7.61  
    7.62      __cli();
    7.63  
    7.64 +    for (i = 0; i < NR_CPUS; i++)
    7.65 +	if (cpu_isset(i, feasible_cpus))
    7.66 +	    save_vcpu_context(i, &suspended_cpu_records[i]);
    7.67 +
    7.68  #ifdef __i386__
    7.69      mm_pin_all();
    7.70      kmem_cache_shrink(pgd_cache);
    7.71 @@ -173,7 +223,9 @@ static int __do_suspend(void *ignore)
    7.72      smp_suspend();
    7.73  #endif
    7.74  
    7.75 +    msg("xenbus going down.\n");
    7.76      xenbus_suspend();
    7.77 +    msg("xenbus gone down.\n");
    7.78  
    7.79      ctrl_if_suspend();
    7.80  
    7.81 @@ -187,10 +239,11 @@ static int __do_suspend(void *ignore)
    7.82      memcpy(&suspend_record->resume_info, &xen_start_info,
    7.83             sizeof(xen_start_info));
    7.84  
    7.85 +    msg("Suspending...\n");
    7.86      /* We'll stop somewhere inside this hypercall.  When it returns,
    7.87         we'll start resuming after the restore. */
    7.88      HYPERVISOR_suspend(virt_to_machine(suspend_record) >> PAGE_SHIFT);
    7.89 -
    7.90 +    msg("Back from suspension\n");
    7.91  
    7.92      shutting_down = SHUTDOWN_INVALID; 
    7.93  
    7.94 @@ -217,7 +270,9 @@ static int __do_suspend(void *ignore)
    7.95  
    7.96      ctrl_if_resume();
    7.97  
    7.98 +    msg("Here comes the xenbus...\n");
    7.99      xenbus_resume();
   7.100 +    msg("xenbus resumed.\n");
   7.101  
   7.102  #ifdef CONFIG_SMP
   7.103      smp_resume();
   7.104 @@ -231,21 +286,32 @@ static int __do_suspend(void *ignore)
   7.105  
   7.106      usbif_resume();
   7.107  
   7.108 -    preempt_enable();
   7.109 +    msg("Restoring cpu contexts...\n");
   7.110 +    for (i = 0; i < NR_CPUS; i++)
   7.111 +	if (cpu_isset(i, feasible_cpus))
   7.112 +	    restore_vcpu_context(i, &suspended_cpu_records[i]);
   7.113 +    msg("All vcpus rebooted.\n");
   7.114  
   7.115      __sti();
   7.116  
   7.117   out_reenable_cpus:
   7.118 +    msg("Reenabling cpus.\n");
   7.119      while (!cpus_empty(feasible_cpus)) {
   7.120  	i = first_cpu(feasible_cpus);
   7.121 +	printk("<0>Bring up %d/%d.\n", i, num_online_cpus());
   7.122 +	printk("<0>17 preempt_count %x.\n", preempt_count());
   7.123  	j = cpu_up(i);
   7.124 +	printk("<0>18 preempt_count %x.\n", preempt_count());
   7.125  	if (j != 0) {
   7.126  	    printk(KERN_CRIT "Failed to bring cpu %d back up (%d).\n",
   7.127  		   i, j);
   7.128  	    err = j;
   7.129  	}
   7.130 +	printk("<0>%d up.\n", i);
   7.131  	cpu_clear(i, feasible_cpus);
   7.132      }
   7.133 +    msg("Reenabled cpus.\n");
   7.134 +    uber_debug = 0;
   7.135  
   7.136   out:
   7.137      if ( suspend_record != NULL )
     8.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Wed Aug 17 14:37:22 2005 +0000
     8.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Thu Aug 18 15:27:55 2005 +0000
     8.3 @@ -163,7 +163,7 @@ HYPERVISOR_yield(
     8.4          TRAP_INSTR
     8.5          : "=a" (ret), "=b" (ign)
     8.6  	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_yield)
     8.7 -	: "memory" );
     8.8 +	: "memory", "ecx" );
     8.9  
    8.10      return ret;
    8.11  }
    8.12 @@ -178,7 +178,7 @@ HYPERVISOR_block(
    8.13          TRAP_INSTR
    8.14          : "=a" (ret), "=b" (ign1)
    8.15  	: "0" (__HYPERVISOR_sched_op), "1" (SCHEDOP_block)
    8.16 -	: "memory" );
    8.17 +	: "memory", "ecx" );
    8.18  
    8.19      return ret;
    8.20  }
    8.21 @@ -194,7 +194,7 @@ HYPERVISOR_shutdown(
    8.22          : "=a" (ret), "=b" (ign1)
    8.23  	: "0" (__HYPERVISOR_sched_op),
    8.24  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_poweroff << SCHEDOP_reasonshift))
    8.25 -        : "memory" );
    8.26 +        : "memory", "ecx" );
    8.27  
    8.28      return ret;
    8.29  }
    8.30 @@ -210,7 +210,7 @@ HYPERVISOR_reboot(
    8.31          : "=a" (ret), "=b" (ign1)
    8.32  	: "0" (__HYPERVISOR_sched_op),
    8.33  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_reboot << SCHEDOP_reasonshift))
    8.34 -        : "memory" );
    8.35 +        : "memory", "ecx" );
    8.36  
    8.37      return ret;
    8.38  }
    8.39 @@ -228,7 +228,7 @@ HYPERVISOR_suspend(
    8.40          : "=a" (ret), "=b" (ign1), "=S" (ign2)
    8.41  	: "0" (__HYPERVISOR_sched_op),
    8.42          "b" (SCHEDOP_shutdown | (SHUTDOWN_suspend << SCHEDOP_reasonshift)), 
    8.43 -        "S" (srec) : "memory");
    8.44 +        "S" (srec) : "memory", "ecx");
    8.45  
    8.46      return ret;
    8.47  }
    8.48 @@ -244,7 +244,7 @@ HYPERVISOR_crash(
    8.49          : "=a" (ret), "=b" (ign1)
    8.50  	: "0" (__HYPERVISOR_sched_op),
    8.51  	  "1" (SCHEDOP_shutdown | (SHUTDOWN_crash << SCHEDOP_reasonshift))
    8.52 -        : "memory" );
    8.53 +        : "memory", "ecx" );
    8.54  
    8.55      return ret;
    8.56  }
    8.57 @@ -534,7 +534,7 @@ HYPERVISOR_vcpu_down(
    8.58          : "=a" (ret), "=b" (ign1)
    8.59  	: "0" (__HYPERVISOR_sched_op),
    8.60  	  "1" (SCHEDOP_vcpu_down | (vcpu << SCHEDOP_vcpushift))
    8.61 -        : "memory" );
    8.62 +        : "memory", "ecx", "edx" );
    8.63  
    8.64      return ret;
    8.65  }
    8.66 @@ -550,8 +550,26 @@ HYPERVISOR_vcpu_up(
    8.67          : "=a" (ret), "=b" (ign1)
    8.68  	: "0" (__HYPERVISOR_sched_op),
    8.69  	  "1" (SCHEDOP_vcpu_up | (vcpu << SCHEDOP_vcpushift))
    8.70 +        : "memory", "ecx" );
    8.71 +
    8.72 +    return ret;
    8.73 +}
    8.74 +
    8.75 +static inline int
    8.76 +HYPERVISOR_vcpu_pickle(
    8.77 +    int vcpu, vcpu_guest_context_t *ctxt)
    8.78 +{
    8.79 +    int ret;
    8.80 +    unsigned long ign1, ign2;
    8.81 +    __asm__ __volatile__ (
    8.82 +        TRAP_INSTR
    8.83 +        : "=a" (ret), "=b" (ign1), "=c" (ign2)
    8.84 +	: "0" (__HYPERVISOR_sched_op),
    8.85 +	  "1" (SCHEDOP_vcpu_pickle | (vcpu << SCHEDOP_vcpushift)),
    8.86 +	  "2" (ctxt)
    8.87          : "memory" );
    8.88  
    8.89      return ret;
    8.90  }
    8.91 +
    8.92  #endif /* __HYPERCALL_H__ */
     9.1 --- a/xen/arch/x86/domain.c	Wed Aug 17 14:37:22 2005 +0000
     9.2 +++ b/xen/arch/x86/domain.c	Thu Aug 18 15:27:55 2005 +0000
     9.3 @@ -217,8 +217,16 @@ struct vcpu *arch_alloc_vcpu_struct(void
     9.4      return xmalloc(struct vcpu);
     9.5  }
     9.6  
     9.7 +/* We assume that vcpu 0 is always the last one to be freed in a
     9.8 +   domain i.e. if v->vcpu_id == 0, the domain should be
     9.9 +   single-processor. */
    9.10  void arch_free_vcpu_struct(struct vcpu *v)
    9.11  {
    9.12 +    struct vcpu *p;
    9.13 +    for_each_vcpu(v->domain, p) {
    9.14 +        if (p->next_in_list == v)
    9.15 +            p->next_in_list = v->next_in_list;
    9.16 +    }
    9.17      xfree(v);
    9.18  }
    9.19  
    9.20 @@ -402,8 +410,10 @@ int arch_set_info_guest(
    9.21      if ( !(c->flags & VGCF_VMX_GUEST) )
    9.22      {
    9.23          if ( ((c->user_regs.cs & 3) == 0) ||
    9.24 -             ((c->user_regs.ss & 3) == 0) )
    9.25 -                return -EINVAL;
    9.26 +             ((c->user_regs.ss & 3) == 0) ) {
    9.27 +            printf("User regs.cs %x, ss %x.\n", c->user_regs.cs, c->user_regs.ss);
    9.28 +            return -EINVAL;
    9.29 +        }
    9.30      }
    9.31  
    9.32      clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
    9.33 @@ -448,8 +458,10 @@ int arch_set_info_guest(
    9.34  
    9.35      if ( shadow_mode_refcounts(d) )
    9.36      {
    9.37 -        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
    9.38 +        if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) ) {
    9.39 +            printf("Bad phys_basetab %lx.\n", phys_basetab);
    9.40              return -EINVAL;
    9.41 +        }
    9.42      }
    9.43      else
    9.44      {
    9.45 @@ -457,13 +469,16 @@ int arch_set_info_guest(
    9.46          if ( !(c->flags & VGCF_VMX_GUEST) )
    9.47  #endif
    9.48              if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, 
    9.49 -                  PGT_base_page_table) )
    9.50 +                                    PGT_base_page_table) ) {
    9.51 +                printf("Bad phys_basetab2 %lx.\n", phys_basetab);
    9.52                  return -EINVAL;
    9.53 +            }
    9.54      }
    9.55  
    9.56      if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
    9.57      {
    9.58          put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
    9.59 +        printf("Failed to set gdt, %d.\n", rc);
    9.60          return rc;
    9.61      }
    9.62  
    9.63 @@ -485,6 +500,8 @@ int arch_set_info_guest(
    9.64      /* Don't redo final setup */
    9.65      set_bit(_VCPUF_initialised, &v->vcpu_flags);
    9.66  
    9.67 +    printf("Arch set_info_guest succeeded.\n");
    9.68 +
    9.69      return 0;
    9.70  }
    9.71  
    10.1 --- a/xen/arch/x86/mm.c	Wed Aug 17 14:37:22 2005 +0000
    10.2 +++ b/xen/arch/x86/mm.c	Thu Aug 18 15:27:55 2005 +0000
    10.3 @@ -2631,16 +2631,25 @@ long set_gdt(struct vcpu *v,
    10.4      int i, nr_pages = (entries + 511) / 512;
    10.5      unsigned long pfn;
    10.6  
    10.7 -    if ( entries > FIRST_RESERVED_GDT_ENTRY )
    10.8 +    if ( entries > FIRST_RESERVED_GDT_ENTRY ) {
    10.9 +        printf("Too many entries in gdt (%d).\n", entries);
   10.10          return -EINVAL;
   10.11 +    }
   10.12      
   10.13      shadow_sync_all(d);
   10.14  
   10.15      /* Check the pages in the new GDT. */
   10.16 -    for ( i = 0; i < nr_pages; i++ )
   10.17 -        if ( ((pfn = frames[i]) >= max_page) ||
   10.18 -             !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
   10.19 +    for ( i = 0; i < nr_pages; i++ ) {
   10.20 +        pfn = frames[i];
   10.21 +        if (pfn >= max_page) {
   10.22 +            printf("GDT bad as %ld >= %ld.\n", pfn, max_page);
   10.23              goto fail;
   10.24 +        }
   10.25 +        if (!get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) ) {
   10.26 +            printf("Frame %ld looks bad.\n", pfn);
   10.27 +            goto fail;
   10.28 +        }
   10.29 +    }
   10.30  
   10.31      /* Tear down the old GDT. */
   10.32      destroy_gdt(v);
    11.1 --- a/xen/arch/x86/time.c	Wed Aug 17 14:37:22 2005 +0000
    11.2 +++ b/xen/arch/x86/time.c	Thu Aug 18 15:27:55 2005 +0000
    11.3 @@ -831,7 +831,7 @@ static void local_time_calibration(void 
    11.4      tsc_elapsed32 = (u32)tsc_elapsed64;
    11.5  
    11.6      /* tsc_elapsed > stime_elapsed */
    11.7 -    ASSERT(tsc_elapsed32 != 0);
    11.8 +    //    ASSERT(tsc_elapsed32 != 0);
    11.9      while ( tsc_elapsed32 <= stime_elapsed32 )
   11.10      {
   11.11          tsc_elapsed32 <<= 1;
    12.1 --- a/xen/common/domain.c	Wed Aug 17 14:37:22 2005 +0000
    12.2 +++ b/xen/common/domain.c	Thu Aug 18 15:27:55 2005 +0000
    12.3 @@ -178,6 +178,7 @@ void domain_shutdown(u8 reason)
    12.4  {
    12.5      struct domain *d = current->domain;
    12.6      struct vcpu *v;
    12.7 +    int cntr;
    12.8  
    12.9      if ( d->domain_id == 0 )
   12.10      {
   12.11 @@ -208,8 +209,17 @@ void domain_shutdown(u8 reason)
   12.12      }
   12.13  
   12.14      /* Put every vcpu to sleep, but don't wait (avoids inter-vcpu deadlock). */
   12.15 -    for_each_vcpu ( d, v )
   12.16 +    cntr = 0;
   12.17 +    printf("Putting %d to sleep.\n", d->domain_id);
   12.18 +    for_each_vcpu ( d, v ) {
   12.19 +        if (test_bit(_VCPUF_down, &v->vcpu_flags)) {
   12.20 +            printf("vcpu %d is down.\n", v->vcpu_id);
   12.21 +        } else {
   12.22 +            printf("vcpu %d is up.\n", v->vcpu_id);
   12.23 +        }
   12.24          domain_sleep_nosync(v);
   12.25 +    }
   12.26 +    printf("Put %d vcpus to sleep for domain shutdown.\n", cntr);
   12.27  }
   12.28  
   12.29  
    13.1 --- a/xen/common/event_channel.c	Wed Aug 17 14:37:22 2005 +0000
    13.2 +++ b/xen/common/event_channel.c	Thu Aug 18 15:27:55 2005 +0000
    13.3 @@ -292,6 +292,8 @@ static long evtchn_bind_ipi(evtchn_bind_
    13.4          chn = evtchn_from_port(d, port);
    13.5          chn->state          = ECS_IPI;
    13.6          chn->notify_vcpu_id = current->vcpu_id;
    13.7 +        printf("Bound ipi on vcpu %d to port %d.\n", current->vcpu_id,
    13.8 +               port);
    13.9      }
   13.10  
   13.11      spin_unlock(&d->evtchn_lock);
   13.12 @@ -497,9 +499,24 @@ long evtchn_send(int lport)
   13.13          evtchn_set_pending(rd->vcpu[rchn->notify_vcpu_id], rport);
   13.14          break;
   13.15      case ECS_IPI:
   13.16 +        if (current->domain->domain_id != 0) {
   13.17 +            printf("Set %d pending on %d.\n", lport,
   13.18 +                   lchn->notify_vcpu_id);
   13.19 +            if (lport == 7) {
   13.20 +                struct vcpu *v = ld->vcpu[lchn->notify_vcpu_id];
   13.21 +                struct domain *d = v->domain;
   13.22 +                shared_info_t *s = d->shared_info;
   13.23 +                printf("pending %x, mask %x, pending_sel %x, upcall_pending %x.\n",
   13.24 +                       s->evtchn_pending[0],
   13.25 +                       s->evtchn_mask[0],
   13.26 +                       v->vcpu_info->evtchn_pending_sel,
   13.27 +                       v->vcpu_info->evtchn_upcall_pending);
   13.28 +            }
   13.29 +        }
   13.30          evtchn_set_pending(ld->vcpu[lchn->notify_vcpu_id], lport);
   13.31          break;
   13.32      default:
   13.33 +        printf("Failed to set %d pending: state %d.\n", lport, lchn->state);
   13.34          ret = -EINVAL;
   13.35      }
   13.36  
    14.1 --- a/xen/common/schedule.c	Wed Aug 17 14:37:22 2005 +0000
    14.2 +++ b/xen/common/schedule.c	Thu Aug 18 15:27:55 2005 +0000
    14.3 @@ -38,6 +38,8 @@
    14.4  #include <xen/mm.h>
    14.5  #include <public/sched_ctl.h>
    14.6  
    14.7 +extern void arch_getdomaininfo_ctxt(struct vcpu *,
    14.8 +                                    struct vcpu_guest_context *);
    14.9  /* opt_sched: scheduler - default to SEDF */
   14.10  static char opt_sched[10] = "sedf";
   14.11  string_param("sched", opt_sched);
   14.12 @@ -82,7 +84,8 @@ void free_domain_struct(struct domain *d
   14.13      int i;
   14.14  
   14.15      SCHED_OP(free_task, d);
   14.16 -    for (i = 0; i < MAX_VIRT_CPUS; i++)
   14.17 +    /* vcpu 0 has to be the last one destructed. */
   14.18 +    for (i = MAX_VIRT_CPUS-1; i >= 0; i--)
   14.19          if ( d->vcpu[i] )
   14.20              arch_free_vcpu_struct(d->vcpu[i]);
   14.21  
   14.22 @@ -295,10 +298,37 @@ static long do_vcpu_up(int vcpu)
   14.23      return 0;
   14.24  }
   14.25  
   14.26 +static long do_vcpu_pickle(int vcpu, unsigned long arg)
   14.27 +{
   14.28 +    struct vcpu *v;
   14.29 +    vcpu_guest_context_t *c;
   14.30 +    int ret = 0;
   14.31 +
   14.32 +    if (vcpu >= MAX_VIRT_CPUS)
   14.33 +        return -EINVAL;
   14.34 +    v = current->domain->vcpu[vcpu];
   14.35 +    if (!v)
   14.36 +        return -ESRCH;
   14.37 +    /* Don't pickle vcpus which are currently running */
   14.38 +    if (!test_bit(_VCPUF_down, &v->vcpu_flags)) {
   14.39 +        printf("Pickling a live vcpu?\n");
   14.40 +        return -EBUSY;
   14.41 +    }
   14.42 +    c = xmalloc(vcpu_guest_context_t);
   14.43 +    if (!c)
   14.44 +        return -ENOMEM;
   14.45 +    arch_getdomaininfo_ctxt(v, c);
   14.46 +    if (copy_to_user((vcpu_guest_context_t *)arg,
   14.47 +                     (const vcpu_guest_context_t *)c, sizeof(*c)))
   14.48 +        ret = -EFAULT;
   14.49 +    xfree(c);
   14.50 +    return ret;
   14.51 +}
   14.52 +
   14.53  /*
   14.54   * Demultiplex scheduler-related hypercalls.
   14.55   */
   14.56 -long do_sched_op(unsigned long op)
   14.57 +long do_sched_op(unsigned long op, unsigned long arg)
   14.58  {
   14.59      long ret = 0;
   14.60  
   14.61 @@ -334,6 +364,12 @@ long do_sched_op(unsigned long op)
   14.62          ret = do_vcpu_up((int)(op >> SCHEDOP_vcpushift));
   14.63          break;
   14.64      }
   14.65 +    case SCHEDOP_vcpu_pickle:
   14.66 +    {
   14.67 +        ret = do_vcpu_pickle((int)(op >> SCHEDOP_vcpushift), arg);
   14.68 +        printf("Pickle result %ld.\n", ret);
   14.69 +        break;
   14.70 +    }
   14.71  
   14.72      default:
   14.73          ret = -ENOSYS;
    15.1 --- a/xen/include/public/xen.h	Wed Aug 17 14:37:22 2005 +0000
    15.2 +++ b/xen/include/public/xen.h	Thu Aug 18 15:27:55 2005 +0000
    15.3 @@ -203,6 +203,7 @@ struct mmuext_op {
    15.4  #define SCHEDOP_shutdown        2   /* Stop executing this domain.        */
    15.5  #define SCHEDOP_vcpu_down       3   /* make target VCPU not-runnable.     */
    15.6  #define SCHEDOP_vcpu_up         4   /* make target VCPU runnable.         */
    15.7 +#define SCHEDOP_vcpu_pickle     5   /* save a vcpu's context to memory.   */
    15.8  #define SCHEDOP_cmdmask       255   /* 8-bit command. */
    15.9  #define SCHEDOP_reasonshift     8   /* 8-bit reason code. (SCHEDOP_shutdown) */
   15.10  #define SCHEDOP_vcpushift       8   /* 8-bit VCPU target. (SCHEDOP_up|down) */