ia64/xen-unstable

changeset 18477:4ffc70556000

x86: Support CPU hotplug offline.

Signed-off-by: Shan Haitao <haitao.shan@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Sep 11 15:06:22 2008 +0100 (2008-09-11)
parents fba8dca321c2
children 01c8ccb551b0
files xen/arch/x86/irq.c xen/arch/x86/smpboot.c xen/common/sched_credit.c xen/common/schedule.c xen/include/xen/sched.h
line diff
     1.1 --- a/xen/arch/x86/irq.c	Thu Sep 11 11:58:08 2008 +0100
     1.2 +++ b/xen/arch/x86/irq.c	Thu Sep 11 15:06:22 2008 +0100
     1.3 @@ -737,9 +737,12 @@ static int __init setup_dump_irqs(void)
     1.4  
     1.5  void fixup_irqs(cpumask_t map)
     1.6  {
     1.7 -    unsigned int irq;
     1.8 +    unsigned int irq, sp;
     1.9      static int warned;
    1.10 +    irq_guest_action_t *action;
    1.11 +    struct pending_eoi *peoi;
    1.12  
    1.13 +    /* Direct all future interrupts away from this CPU. */
    1.14      for ( irq = 0; irq < NR_IRQS; irq++ )
    1.15      {
    1.16          cpumask_t mask;
    1.17 @@ -758,8 +761,24 @@ void fixup_irqs(cpumask_t map)
    1.18              printk("Cannot set affinity for irq %i\n", irq);
    1.19      }
    1.20  
    1.21 +    /* Service any interrupts that beat us in the re-direction race. */
    1.22      local_irq_enable();
    1.23      mdelay(1);
    1.24      local_irq_disable();
    1.25 +
    1.26 +    /* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
    1.27 +    for ( irq = 0; irq < NR_IRQS; irq++ )
    1.28 +    {
    1.29 +        if ( !(irq_desc[irq].status & IRQ_GUEST) )
    1.30 +            continue;
    1.31 +        action = (irq_guest_action_t *)irq_desc[irq].action;
    1.32 +        cpu_clear(smp_processor_id(), action->cpu_eoi_map);
    1.33 +    }
    1.34 +
    1.35 +    /* Flush the interrupt EOI stack. */
    1.36 +    peoi = this_cpu(pending_eoi);
    1.37 +    for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
    1.38 +        peoi[sp].ready = 1;
    1.39 +    flush_ready_eoi(NULL);
    1.40  }
    1.41  #endif
     2.1 --- a/xen/arch/x86/smpboot.c	Thu Sep 11 11:58:08 2008 +0100
     2.2 +++ b/xen/arch/x86/smpboot.c	Thu Sep 11 15:06:22 2008 +0100
     2.3 @@ -1225,15 +1225,6 @@ int __cpu_disable(void)
     2.4  	if (cpu == 0)
     2.5  		return -EBUSY;
     2.6  
     2.7 -	/*
     2.8 -	 * Only S3 is using this path, and thus idle vcpus are running on all
     2.9 -	 * APs when we are called. To support full cpu hotplug, other 
    2.10 -	 * notification mechanisms should be introduced (e.g., migrate vcpus
    2.11 -	 * off this physical cpu before rendezvous point).
    2.12 -	 */
    2.13 -	if (!is_idle_vcpu(current))
    2.14 -		return -EINVAL;
    2.15 -
    2.16  	local_irq_disable();
    2.17  	clear_local_APIC();
    2.18  	/* Allow any queued timer interrupts to get serviced */
    2.19 @@ -1249,6 +1240,9 @@ int __cpu_disable(void)
    2.20  	fixup_irqs(map);
    2.21  	/* It's now safe to remove this processor from the online map */
    2.22  	cpu_clear(cpu, cpu_online_map);
    2.23 +
    2.24 +	cpu_disable_scheduler();
    2.25 +
    2.26  	return 0;
    2.27  }
    2.28  
    2.29 @@ -1275,28 +1269,6 @@ static int take_cpu_down(void *unused)
    2.30      return __cpu_disable();
    2.31  }
    2.32  
    2.33 -/* 
    2.34 - * XXX: One important thing missed here is to migrate vcpus
    2.35 - * from dead cpu to other online ones and then put whole
    2.36 - * system into a stop state. It assures a safe environment
    2.37 - * for a cpu hotplug/remove at normal running state.
    2.38 - *
    2.39 - * However for xen PM case, at this point:
    2.40 - * 	-> All other domains should be notified with PM event,
    2.41 - *	   and then in following states:
    2.42 - *		* Suspend state, or
    2.43 - *		* Paused state, which is a force step to all
    2.44 - *		  domains if they do nothing to suspend
    2.45 - *	-> All vcpus of dom0 (except vcpu0) have already beem
    2.46 - *	   hot removed
    2.47 - * with the net effect that all other cpus only have idle vcpu
    2.48 - * running. In this special case, we can avoid vcpu migration
    2.49 - * then and system can be considered in a stop state.
    2.50 - *
    2.51 - * So current cpu hotplug is a special version for PM specific
    2.52 - * usage, and need more effort later for full cpu hotplug.
    2.53 - * (ktian1)
    2.54 - */
    2.55  int cpu_down(unsigned int cpu)
    2.56  {
    2.57  	int err = 0;
    2.58 @@ -1307,6 +1279,12 @@ int cpu_down(unsigned int cpu)
    2.59  		goto out;
    2.60  	}
    2.61  
    2.62 +	/* Can not offline BSP */
    2.63 +	if (cpu == 0) {
    2.64 +		err = -EINVAL;
    2.65 +		goto out;
    2.66 +	}
    2.67 +
    2.68  	if (!cpu_online(cpu)) {
    2.69  		err = -EINVAL;
    2.70  		goto out;
     3.1 --- a/xen/common/sched_credit.c	Thu Sep 11 11:58:08 2008 +0100
     3.2 +++ b/xen/common/sched_credit.c	Thu Sep 11 15:06:22 2008 +0100
     3.3 @@ -1107,6 +1107,10 @@ csched_load_balance(int cpu, struct csch
     3.4  
     3.5      BUG_ON( cpu != snext->vcpu->processor );
     3.6  
     3.7 +    /* If this CPU is going offline we shouldn't steal work. */
     3.8 +    if ( unlikely(!cpu_online(cpu)) )
     3.9 +        goto out;
    3.10 +
    3.11      if ( snext->pri == CSCHED_PRI_IDLE )
    3.12          CSCHED_STAT_CRANK(load_balance_idle);
    3.13      else if ( snext->pri == CSCHED_PRI_TS_OVER )
    3.14 @@ -1149,6 +1153,7 @@ csched_load_balance(int cpu, struct csch
    3.15              return speer;
    3.16      }
    3.17  
    3.18 + out:
    3.19      /* Failed to find more important work elsewhere... */
    3.20      __runq_remove(snext);
    3.21      return snext;
     4.1 --- a/xen/common/schedule.c	Thu Sep 11 11:58:08 2008 +0100
     4.2 +++ b/xen/common/schedule.c	Thu Sep 11 15:06:22 2008 +0100
     4.3 @@ -288,6 +288,48 @@ void vcpu_force_reschedule(struct vcpu *
     4.4      }
     4.5  }
     4.6  
     4.7 +/*
     4.8 + * This function is used by cpu_hotplug code from stop_machine context.
     4.9 + * Hence we can avoid needing to take the 
    4.10 + */
    4.11 +void cpu_disable_scheduler(void)
    4.12 +{
    4.13 +    struct domain *d;
    4.14 +    struct vcpu *v;
    4.15 +    unsigned int cpu = smp_processor_id();
    4.16 +
    4.17 +    for_each_domain ( d )
    4.18 +    {
    4.19 +        for_each_vcpu ( d, v )
    4.20 +        {
    4.21 +            if ( is_idle_vcpu(v) )
    4.22 +                continue;
    4.23 +
    4.24 +            if ( (cpus_weight(v->cpu_affinity) == 1) &&
    4.25 +                 cpu_isset(cpu, v->cpu_affinity) )
    4.26 +            {
    4.27 +                printk("Breaking vcpu affinity for domain %d vcpu %d\n",
    4.28 +                        v->domain->domain_id, v->vcpu_id);
    4.29 +                cpus_setall(v->cpu_affinity);
    4.30 +            }
    4.31 +
    4.32 +            /*
    4.33 +             * Migrate single-shot timers to CPU0. A new cpu will automatically
    4.34 +             * be chosen when the timer is next re-set.
    4.35 +             */
    4.36 +            if ( v->singleshot_timer.cpu == cpu )
    4.37 +                migrate_timer(&v->singleshot_timer, 0);
    4.38 +
    4.39 +            if ( v->processor == cpu )
    4.40 +            {
    4.41 +                set_bit(_VPF_migrating, &v->pause_flags);
    4.42 +                vcpu_sleep_nosync(v);
    4.43 +                vcpu_migrate(v);
    4.44 +            }
    4.45 +        }
    4.46 +    }
    4.47 +}
    4.48 +
    4.49  static int __vcpu_set_affinity(
    4.50      struct vcpu *v, cpumask_t *affinity,
    4.51      bool_t old_lock_status, bool_t new_lock_status)
     5.1 --- a/xen/include/xen/sched.h	Thu Sep 11 11:58:08 2008 +0100
     5.2 +++ b/xen/include/xen/sched.h	Thu Sep 11 15:06:22 2008 +0100
     5.3 @@ -524,6 +524,7 @@ void domain_unpause_by_systemcontroller(
     5.4  void cpu_init(void);
     5.5  
     5.6  void vcpu_force_reschedule(struct vcpu *v);
     5.7 +void cpu_disable_scheduler(void);
     5.8  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
     5.9  int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
    5.10  void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);