ia64/xen-unstable

changeset 18949:aa0fee8a6ef5

Cleanup Intel CMCI support.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Dec 22 12:07:20 2008 +0000 (2008-12-22)
parents 4d5203f95498
children b59db1f95d19
files xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/smpboot.c xen/common/stop_machine.c xen/include/asm-x86/processor.h xen/include/asm-x86/smp.h xen/include/xen/stop_machine.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Mon Dec 22 08:12:33 2008 +0000
     1.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Mon Dec 22 12:07:20 2008 +0000
     1.3 @@ -116,16 +116,6 @@ int mce_available(struct cpuinfo_x86 *c)
     1.4  	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
     1.5  }
     1.6  
     1.7 -/*Make sure there are no machine check on offlined or suspended CPUs*/
     1.8 -void mce_disable_cpu(void)
     1.9 -{
    1.10 -    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
    1.11 -         return;
    1.12 -    printk(KERN_DEBUG "MCE: disable mce on CPU%d\n", smp_processor_id());
    1.13 -    clear_in_cr4(X86_CR4_MCE);
    1.14 -}
    1.15 -
    1.16 -
    1.17  /* This has to be run for each processor */
    1.18  void mcheck_init(struct cpuinfo_x86 *c)
    1.19  {
     2.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Dec 22 08:12:33 2008 +0000
     2.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Dec 22 12:07:20 2008 +0000
     2.3 @@ -18,7 +18,7 @@ extern int firstbank;
     2.4  
     2.5  #ifdef CONFIG_X86_MCE_THERMAL
     2.6  static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
     2.7 -{	
     2.8 +{
     2.9      printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
    2.10                  smp_processor_id());
    2.11      add_taint(TAINT_MACHINE_CHECK);
    2.12 @@ -67,11 +67,11 @@ static void intel_init_thermal(struct cp
    2.13  
    2.14      /* Thermal monitoring */
    2.15      if (!cpu_has(c, X86_FEATURE_ACPI))
    2.16 -        return;	/* -ENODEV */
    2.17 +        return; /* -ENODEV */
    2.18  
    2.19      /* Clock modulation */
    2.20      if (!cpu_has(c, X86_FEATURE_ACC))
    2.21 -        return;	/* -ENODEV */
    2.22 +        return; /* -ENODEV */
    2.23  
    2.24      /* first check if its enabled already, in which case there might
    2.25       * be some SMM goo which handles it, so we can't even put a handler
    2.26 @@ -87,7 +87,7 @@ static void intel_init_thermal(struct cp
    2.27      if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
    2.28          tm2 = 1;
    2.29  
    2.30 -	/* check whether a vector already exists, temporarily masked? */
    2.31 +    /* check whether a vector already exists, temporarily masked? */
    2.32      if (h & APIC_VECTOR_MASK) {
    2.33          printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n",
    2.34                   cpu, (h & APIC_VECTOR_MASK));
    2.35 @@ -95,8 +95,8 @@ static void intel_init_thermal(struct cp
    2.36      }
    2.37  
    2.38      /* The temperature transition interrupt handler setup */
    2.39 -    h = THERMAL_APIC_VECTOR;		/* our delivery vector */
    2.40 -    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);	/* we'll mask till we're ready */
    2.41 +    h = THERMAL_APIC_VECTOR;    /* our delivery vector */
    2.42 +    h |= (APIC_DM_FIXED | APIC_LVT_MASKED);  /* we'll mask till we're ready */
    2.43      apic_write_around(APIC_LVTTHMR, h);
    2.44  
    2.45      rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
    2.46 @@ -121,7 +121,7 @@ static inline void intel_get_extended_ms
    2.47      if (nr_intel_ext_msrs == 0)
    2.48          return;
    2.49  
    2.50 -	/*this function will called when CAP(9).MCG_EXT_P = 1*/
    2.51 +    /*this function will called when CAP(9).MCG_EXT_P = 1*/
    2.52      memset(mc_ext, 0, sizeof(struct mcinfo_extended));
    2.53      mc_ext->common.type = MC_TYPE_EXTENDED;
    2.54      mc_ext->common.size = sizeof(mc_ext);
    2.55 @@ -198,7 +198,7 @@ static int machine_check_poll(struct mc_
    2.56          struct mcinfo_bank mcb;
    2.57          /*For CMCI, only owners checks the owned MSRs*/
    2.58          if ( !test_bit(i, __get_cpu_var(mce_banks_owned)) &&
    2.59 -			(calltype & MC_FLAG_CMCI) )
    2.60 +             (calltype & MC_FLAG_CMCI) )
    2.61              continue;
    2.62          rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
    2.63  
    2.64 @@ -277,38 +277,38 @@ static fastcall void intel_machine_check
    2.65      u32 mcgstl, mcgsth;
    2.66      int i;
    2.67     
    2.68 -    rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
    2.69 -    if (mcgstl & (1<<0))	/* Recoverable ? */
    2.70 -    	recover=0;
    2.71 +    rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
    2.72 +    if (mcgstl & (1<<0))       /* Recoverable ? */
    2.73 +        recover=0;
    2.74      
    2.75 -    printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
    2.76 -    	smp_processor_id(), mcgsth, mcgstl);
    2.77 +    printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
    2.78 +           smp_processor_id(), mcgsth, mcgstl);
    2.79      
    2.80      for (i=0; i<nr_mce_banks; i++) {
    2.81 -    	rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
    2.82 -    	if (high & (1<<31)) {
    2.83 -    		if (high & (1<<29))
    2.84 -    			recover |= 1;
    2.85 -    		if (high & (1<<25))
    2.86 -    			recover |= 2;
    2.87 -    		printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
    2.88 -    		high &= ~(1<<31);
    2.89 -    		if (high & (1<<27)) {
    2.90 -    			rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
    2.91 -    			printk ("[%08x%08x]", ahigh, alow);
    2.92 -    		}
    2.93 -    		if (high & (1<<26)) {
    2.94 -    			rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
    2.95 -    			printk (" at %08x%08x", ahigh, alow);
    2.96 -    		}
    2.97 -    		printk ("\n");
    2.98 -    	}
    2.99 +        rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
   2.100 +        if (high & (1<<31)) {
   2.101 +            if (high & (1<<29))
   2.102 +                recover |= 1;
   2.103 +            if (high & (1<<25))
   2.104 +                recover |= 2;
   2.105 +            printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
   2.106 +            high &= ~(1<<31);
   2.107 +            if (high & (1<<27)) {
   2.108 +                rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
   2.109 +                printk ("[%08x%08x]", ahigh, alow);
   2.110 +            }
   2.111 +            if (high & (1<<26)) {
   2.112 +                rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
   2.113 +                printk (" at %08x%08x", ahigh, alow);
   2.114 +            }
   2.115 +            printk ("\n");
   2.116 +        }
   2.117      }
   2.118      
   2.119      if (recover & 2)
   2.120 -    	mc_panic ("CPU context corrupt");
   2.121 +        mc_panic ("CPU context corrupt");
   2.122      if (recover & 1)
   2.123 -    	mc_panic ("Unable to continue");
   2.124 +        mc_panic ("Unable to continue");
   2.125      
   2.126      printk(KERN_EMERG "Attempting to continue.\n");
   2.127      /* 
   2.128 @@ -317,25 +317,21 @@ static fastcall void intel_machine_check
   2.129       * for errors if the OS could not log the error.
   2.130       */
   2.131      for (i=0; i<nr_mce_banks; i++) {
   2.132 -    	u32 msr;
   2.133 -    	msr = MSR_IA32_MC0_STATUS+i*4;
   2.134 -    	rdmsr (msr, low, high);
   2.135 -    	if (high&(1<<31)) {
   2.136 -    		/* Clear it */
   2.137 -    		wrmsr(msr, 0UL, 0UL);
   2.138 -    		/* Serialize */
   2.139 -    		wmb();
   2.140 -    		add_taint(TAINT_MACHINE_CHECK);
   2.141 -    	}
   2.142 +        u32 msr;
   2.143 +        msr = MSR_IA32_MC0_STATUS+i*4;
   2.144 +        rdmsr (msr, low, high);
   2.145 +        if (high&(1<<31)) {
   2.146 +            /* Clear it */
   2.147 +            wrmsr(msr, 0UL, 0UL);
   2.148 +            /* Serialize */
   2.149 +            wmb();
   2.150 +            add_taint(TAINT_MACHINE_CHECK);
   2.151 +        }
   2.152      }
   2.153      mcgstl &= ~(1<<2);
   2.154      wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
   2.155  }
   2.156  
   2.157 -extern void (*cpu_down_handler)(int down_cpu);
   2.158 -extern void (*cpu_down_rollback_handler)(int down_cpu);
   2.159 -extern void mce_disable_cpu(void);
   2.160 -static bool_t cmci_clear_lock = 0;
   2.161  static DEFINE_SPINLOCK(cmci_discover_lock);
   2.162  static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
   2.163  
   2.164 @@ -350,19 +346,16 @@ static int do_cmci_discover(int i)
   2.165      rdmsrl(msr, val);
   2.166      /* Some other CPU already owns this bank. */
   2.167      if (val & CMCI_EN) {
   2.168 -    	clear_bit(i, __get_cpu_var(mce_banks_owned));
   2.169 -    	goto out;
   2.170 +        clear_bit(i, __get_cpu_var(mce_banks_owned));
   2.171 +        goto out;
   2.172      }
   2.173      wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
   2.174      rdmsrl(msr, val);
   2.175  
   2.176      if (!(val & CMCI_EN)) {
   2.177 -     /*
   2.178 -      * This bank does not support CMCI. The polling
   2.179 -      * timer has to handle it. 
   2.180 -      */
   2.181 -    	set_bit(i, __get_cpu_var(no_cmci_banks));
   2.182 -    	return 0;
   2.183 +        /* This bank does not support CMCI. Polling timer has to handle it. */
   2.184 +        set_bit(i, __get_cpu_var(no_cmci_banks));
   2.185 +        return 0;
   2.186      }
   2.187      set_bit(i, __get_cpu_var(mce_banks_owned));
   2.188  out:
   2.189 @@ -370,23 +363,25 @@ out:
   2.190      return 1;
   2.191  }
   2.192  
   2.193 -void cmci_discover(void)
   2.194 +static void cmci_discover(void)
   2.195  {
   2.196 +    unsigned long flags;
   2.197      int i;
   2.198  
   2.199      printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
   2.200 -    spin_lock(&cmci_discover_lock);
   2.201 -    for (i = 0; i < nr_mce_banks; i++) {
   2.202 -        /*If the cpu is the bank owner, need not re-discover*/
   2.203 -        if (test_bit(i, __get_cpu_var(mce_banks_owned)))
   2.204 -            continue;
   2.205 -        do_cmci_discover(i);
   2.206 -    }
   2.207 -    spin_unlock(&cmci_discover_lock);
   2.208 +
   2.209 +    spin_lock_irqsave(&cmci_discover_lock, flags);
   2.210 +
   2.211 +    for (i = 0; i < nr_mce_banks; i++)
   2.212 +        if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
   2.213 +            do_cmci_discover(i);
   2.214 +
   2.215 +    spin_unlock_irqrestore(&cmci_discover_lock, flags);
   2.216 +
   2.217      printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n", 
   2.218 -            smp_processor_id(), 
   2.219 -            *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
   2.220 -            *((unsigned long *)__get_cpu_var(no_cmci_banks)));
   2.221 +           smp_processor_id(), 
   2.222 +           *((unsigned long *)__get_cpu_var(mce_banks_owned)), 
   2.223 +           *((unsigned long *)__get_cpu_var(no_cmci_banks)));
   2.224  }
   2.225  
   2.226  /*
   2.227 @@ -402,13 +397,23 @@ void cmci_discover(void)
   2.228  
   2.229  static void mce_set_owner(void)
   2.230  {
   2.231 -
   2.232      if (!cmci_support || mce_disabled == 1)
   2.233          return;
   2.234  
   2.235      cmci_discover();
   2.236  }
   2.237  
   2.238 +static void __cpu_mcheck_distribute_cmci(void *unused)
   2.239 +{
   2.240 +    cmci_discover();
   2.241 +}
   2.242 +
   2.243 +void cpu_mcheck_distribute_cmci(void)
   2.244 +{
   2.245 +    if (cmci_support && !mce_disabled)
   2.246 +        on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0);
   2.247 +}
   2.248 +
   2.249  static void clear_cmci(void)
   2.250  {
   2.251      int i;
   2.252 @@ -431,62 +436,12 @@ static void clear_cmci(void)
   2.253      }
   2.254  }
   2.255  
   2.256 -/*we need to re-set cmci owners when cpu_down fail or cpu_up*/
   2.257 -static void cmci_reenable_cpu(void *h)
   2.258 +void cpu_mcheck_disable(void)
   2.259  {
   2.260 -    if (!mce_available(&current_cpu_data) || mce_disabled == 1)
   2.261 -         return;
   2.262 -    printk(KERN_DEBUG "CMCI: reenable mce on CPU%d\n", smp_processor_id());
   2.263 -    mce_set_owner();
   2.264 -    set_in_cr4(X86_CR4_MCE);
   2.265 -}
   2.266 -
   2.267 -/* When take cpu_down, we need to execute the impacted cmci_owner judge algorithm 
   2.268 - * First, we need to clear the ownership on the dead CPU
   2.269 - * Then,  other CPUs will check whether to take the bank's ownership from down_cpu
   2.270 - * CPU0 need not and "never" execute this path
   2.271 -*/
   2.272 -void  __cpu_clear_cmci( int down_cpu)
   2.273 -{
   2.274 -    int cpu = smp_processor_id();
   2.275 -
   2.276 -    if (!cmci_support && mce_disabled == 1)
   2.277 -        return;
   2.278 -
   2.279 -    if (cpu == 0) {
   2.280 -        printk(KERN_DEBUG "CMCI: CPU0 need not be cleared\n");
   2.281 -        return;
   2.282 -    }
   2.283 +    clear_in_cr4(X86_CR4_MCE);
   2.284  
   2.285 -    local_irq_disable();
   2.286 -    if (cpu == down_cpu){
   2.287 -        mce_disable_cpu();
   2.288 +    if (cmci_support && !mce_disabled)
   2.289          clear_cmci();
   2.290 -        wmb();
   2.291 -        test_and_set_bool(cmci_clear_lock);
   2.292 -        return;
   2.293 -    }
   2.294 -    while (!cmci_clear_lock)
   2.295 -        cpu_relax();
   2.296 -    if (cpu != down_cpu)
   2.297 -        mce_set_owner();
   2.298 -
   2.299 -    test_and_clear_bool(cmci_clear_lock);
   2.300 -    local_irq_enable();
   2.301 -
   2.302 -}
   2.303 -
   2.304 -void  __cpu_clear_cmci_rollback( int down_cpu)
   2.305 -{
   2.306 -    cpumask_t down_map;
   2.307 -    if (!cmci_support || mce_disabled == 1) 
   2.308 -        return;
   2.309 -
   2.310 -    cpus_clear(down_map);
   2.311 -    cpu_set(down_cpu, down_map);
   2.312 -    printk(KERN_ERR "CMCI: cpu_down fail. "
   2.313 -        "Reenable cmci on CPU%d\n", down_cpu);
   2.314 -    on_selected_cpus(down_map, cmci_reenable_cpu, NULL, 1, 1);
   2.315  }
   2.316  
   2.317  static void intel_init_cmci(struct cpuinfo_x86 *c)
   2.318 @@ -511,11 +466,8 @@ static void intel_init_cmci(struct cpuin
   2.319      apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
   2.320      apic_write_around(APIC_CMCI, apic);
   2.321  
   2.322 -	/*now clear mask flag*/
   2.323      l = apic_read(APIC_CMCI);
   2.324      apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
   2.325 -    cpu_down_handler =  __cpu_clear_cmci;
   2.326 -    cpu_down_rollback_handler = __cpu_clear_cmci_rollback; 
   2.327  }
   2.328  
   2.329  fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
   2.330 @@ -588,7 +540,7 @@ static void mce_init(void)
   2.331  
   2.332      set_in_cr4(X86_CR4_MCE);
   2.333      rdmsr (MSR_IA32_MCG_CAP, l, h);
   2.334 -    if (l & MCG_CTL_P)	/* Control register present ? */
   2.335 +    if (l & MCG_CTL_P) /* Control register present ? */
   2.336          wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
   2.337  
   2.338      for (i = firstbank; i < nr_mce_banks; i++)
   2.339 @@ -611,15 +563,14 @@ static void mce_init(void)
   2.340  /*p4/p6 faimily has similar MCA initialization process*/
   2.341  void intel_mcheck_init(struct cpuinfo_x86 *c)
   2.342  {
   2.343 -	
   2.344 -	mce_cap_init(c);
   2.345 -	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
   2.346 -		smp_processor_id());
   2.347 -	/* machine check is available */
   2.348 -	machine_check_vector = intel_machine_check;
   2.349 -	mce_init();
   2.350 -	mce_intel_feature_init(c);
   2.351 -	mce_set_owner();
   2.352 +    mce_cap_init(c);
   2.353 +    printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
   2.354 +            smp_processor_id());
   2.355 +    /* machine check is available */
   2.356 +    machine_check_vector = intel_machine_check;
   2.357 +    mce_init();
   2.358 +    mce_intel_feature_init(c);
   2.359 +    mce_set_owner();
   2.360  }
   2.361  
   2.362  /*
     3.1 --- a/xen/arch/x86/smpboot.c	Mon Dec 22 08:12:33 2008 +0000
     3.2 +++ b/xen/arch/x86/smpboot.c	Mon Dec 22 12:07:20 2008 +0000
     3.3 @@ -1237,25 +1237,11 @@ remove_siblinginfo(int cpu)
     3.4  }
     3.5  
     3.6  extern void fixup_irqs(cpumask_t map);
     3.7 -
     3.8 -/*
     3.9 - * Functions called when offline cpu. 
    3.10 - * We need to process some new feature such as 
    3.11 - * CMCI owner change when do cpu hotplug in latest 
    3.12 - * Intel CPU families
    3.13 -*/
    3.14 -void (*cpu_down_handler)(int down_cpu) = NULL;
    3.15 -void (*cpu_down_rollback_handler)(int down_cpu) = NULL;
    3.16 -
    3.17 -
    3.18 -int __cpu_disable(int down_cpu)
    3.19 +int __cpu_disable(void)
    3.20  {
    3.21  	cpumask_t map = cpu_online_map;
    3.22  	int cpu = smp_processor_id();
    3.23  
    3.24 -	/*Only down_cpu need to execute this function*/
    3.25 -	if (cpu != down_cpu)
    3.26 -		return 0;
    3.27  	/*
    3.28  	 * Perhaps use cpufreq to drop frequency, but that could go
    3.29  	 * into generic code.
    3.30 @@ -1278,6 +1264,8 @@ int __cpu_disable(int down_cpu)
    3.31  
    3.32  	time_suspend();
    3.33  
    3.34 +	cpu_mcheck_disable();
    3.35 +
    3.36  	remove_siblinginfo(cpu);
    3.37  
    3.38  	cpu_clear(cpu, map);
    3.39 @@ -1293,28 +1281,25 @@ int __cpu_disable(int down_cpu)
    3.40  void __cpu_die(unsigned int cpu)
    3.41  {
    3.42  	/* We don't do anything here: idle task is faking death itself. */
    3.43 -	unsigned int i;
    3.44 +	unsigned int i = 0;
    3.45  
    3.46 -	for (i = 0; i < 10; i++) {
    3.47 +	for (;;) {
    3.48  		/* They ack this in play_dead by setting CPU_DEAD */
    3.49  		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
    3.50 -			printk ("CPU %d is now offline\n", cpu);
    3.51 +			printk ("CPU %u is now offline\n", cpu);
    3.52  			return;
    3.53  		}
    3.54  		mdelay(100);
    3.55  		mb();
    3.56  		process_pending_timers();
    3.57 +		if ((++i % 10) == 0)
    3.58 +			printk(KERN_ERR "CPU %u still not dead...\n", cpu);
    3.59  	}
    3.60 - 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
    3.61  }
    3.62 -static int take_cpu_down(void *down_cpu)
    3.63 -{
    3.64  
    3.65 -    if (cpu_down_handler)
    3.66 -        cpu_down_handler(*(int *)down_cpu);
    3.67 -    wmb();
    3.68 -
    3.69 -    return __cpu_disable(*(int *)down_cpu);
    3.70 +static int take_cpu_down(void *unused)
    3.71 +{
    3.72 +    return __cpu_disable();
    3.73  }
    3.74  
    3.75  int cpu_down(unsigned int cpu)
    3.76 @@ -1340,21 +1325,17 @@ int cpu_down(unsigned int cpu)
    3.77  
    3.78  	printk("Prepare to bring CPU%d down...\n", cpu);
    3.79  
    3.80 -	err = stop_machine_run(take_cpu_down, &cpu, cpu_online_map);
    3.81 -	if ( err < 0 )
    3.82 +	err = stop_machine_run(take_cpu_down, NULL, cpu);
    3.83 +	if (err < 0)
    3.84  		goto out;
    3.85  
    3.86  	__cpu_die(cpu);
    3.87  
    3.88 -	if (cpu_online(cpu)) {
    3.89 -		printk("Bad state (DEAD, but in online map) on CPU%d\n", cpu);
    3.90 -		err = -EBUSY;
    3.91 -	}
    3.92 +	BUG_ON(cpu_online(cpu));
    3.93 +
    3.94 +	cpu_mcheck_distribute_cmci();
    3.95 +
    3.96  out:
    3.97 -	/*if cpu_offline failed, re-check cmci_owner*/
    3.98 -
    3.99 -	if ( err < 0 && cpu_down_rollback_handler) 
   3.100 -		cpu_down_rollback_handler(cpu); 
   3.101  	spin_unlock(&cpu_add_remove_lock);
   3.102  	return err;
   3.103  }
     4.1 --- a/xen/common/stop_machine.c	Mon Dec 22 08:12:33 2008 +0000
     4.2 +++ b/xen/common/stop_machine.c	Mon Dec 22 12:07:20 2008 +0000
     4.3 @@ -45,7 +45,7 @@ struct stopmachine_data {
     4.4      enum stopmachine_state state;
     4.5      atomic_t done;
     4.6  
     4.7 -    cpumask_t fn_cpus;
     4.8 +    unsigned int fn_cpu;
     4.9      int fn_result;
    4.10      int (*fn)(void *);
    4.11      void *fn_data;
    4.12 @@ -63,22 +63,21 @@ static void stopmachine_set_state(enum s
    4.13          cpu_relax();
    4.14  }
    4.15  
    4.16 -int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpus)
    4.17 +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu)
    4.18  {
    4.19      cpumask_t allbutself;
    4.20      unsigned int i, nr_cpus;
    4.21 -    int cur_cpu, ret;
    4.22 +    int ret;
    4.23  
    4.24      BUG_ON(!local_irq_is_enabled());
    4.25  
    4.26      allbutself = cpu_online_map;
    4.27 -    cur_cpu = smp_processor_id();
    4.28 -    cpu_clear(cur_cpu, allbutself);
    4.29 +    cpu_clear(smp_processor_id(), allbutself);
    4.30      nr_cpus = cpus_weight(allbutself);
    4.31  
    4.32      if ( nr_cpus == 0 )
    4.33      {
    4.34 -        BUG_ON(!cpu_isset(cur_cpu, cpus));
    4.35 +        BUG_ON(cpu != smp_processor_id());
    4.36          return (*fn)(data);
    4.37      }
    4.38  
    4.39 @@ -92,8 +91,7 @@ int stop_machine_run(int (*fn)(void *), 
    4.40      stopmachine_data.fn = fn;
    4.41      stopmachine_data.fn_data = data;
    4.42      stopmachine_data.nr_cpus = nr_cpus;
    4.43 -    stopmachine_data.fn_cpus = cpus;
    4.44 -    stopmachine_data.fn_result = 0;
    4.45 +    stopmachine_data.fn_cpu = cpu;
    4.46      atomic_set(&stopmachine_data.done, 0);
    4.47      stopmachine_data.state = STOPMACHINE_START;
    4.48  
    4.49 @@ -107,13 +105,8 @@ int stop_machine_run(int (*fn)(void *), 
    4.50      local_irq_disable();
    4.51      stopmachine_set_state(STOPMACHINE_DISABLE_IRQ);
    4.52  
    4.53 -    /* callback will run on each cpu of the input map.
    4.54 -     * If callback fails on any CPU, the stop_machine_run
    4.55 -     * will return the  *ORed* the failure
    4.56 -     */
    4.57 -    if ( cpu_isset(cur_cpu, cpus) ){
    4.58 -        stopmachine_data.fn_result |= (*fn)(data);
    4.59 -    }
    4.60 +    if ( cpu == smp_processor_id() )
    4.61 +        stopmachine_data.fn_result = (*fn)(data);
    4.62      stopmachine_set_state(STOPMACHINE_INVOKE);
    4.63      ret = stopmachine_data.fn_result;
    4.64  
    4.65 @@ -128,6 +121,7 @@ int stop_machine_run(int (*fn)(void *), 
    4.66  static void stopmachine_softirq(void)
    4.67  {
    4.68      enum stopmachine_state state = STOPMACHINE_START;
    4.69 +
    4.70      smp_mb();
    4.71  
    4.72      while ( state != STOPMACHINE_EXIT )
    4.73 @@ -142,11 +136,10 @@ static void stopmachine_softirq(void)
    4.74              local_irq_disable();
    4.75              break;
    4.76          case STOPMACHINE_INVOKE:
    4.77 -            if ( cpu_isset(smp_processor_id(), stopmachine_data.fn_cpus )) {
    4.78 -                stopmachine_data.fn_result |= 
    4.79 +            if ( stopmachine_data.fn_cpu == smp_processor_id() )
    4.80 +                stopmachine_data.fn_result =
    4.81                      stopmachine_data.fn(stopmachine_data.fn_data);
    4.82 -            }
    4.83 -           break;
    4.84 +            break;
    4.85          default:
    4.86              break;
    4.87          }
     5.1 --- a/xen/include/asm-x86/processor.h	Mon Dec 22 08:12:33 2008 +0000
     5.2 +++ b/xen/include/asm-x86/processor.h	Mon Dec 22 12:07:20 2008 +0000
     5.3 @@ -540,6 +540,8 @@ extern void mtrr_bp_init(void);
     5.4  
     5.5  void mcheck_init(struct cpuinfo_x86 *c);
     5.6  asmlinkage void do_machine_check(struct cpu_user_regs *regs);
     5.7 +void cpu_mcheck_distribute_cmci(void);
     5.8 +void cpu_mcheck_disable(void);
     5.9  
    5.10  int cpuid_hypervisor_leaves(
    5.11      uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
     6.1 --- a/xen/include/asm-x86/smp.h	Mon Dec 22 08:12:33 2008 +0000
     6.2 +++ b/xen/include/asm-x86/smp.h	Mon Dec 22 12:07:20 2008 +0000
     6.3 @@ -101,7 +101,7 @@ static __inline int logical_smp_processo
     6.4  
     6.5  #endif
     6.6  
     6.7 -extern int __cpu_disable(int down_cpu);
     6.8 +extern int __cpu_disable(void);
     6.9  extern void __cpu_die(unsigned int cpu);
    6.10  #endif /* !__ASSEMBLY__ */
    6.11  
     7.1 --- a/xen/include/xen/stop_machine.h	Mon Dec 22 08:12:33 2008 +0000
     7.2 +++ b/xen/include/xen/stop_machine.h	Mon Dec 22 12:07:20 2008 +0000
     7.3 @@ -5,7 +5,7 @@
     7.4   * stop_machine_run: freeze the machine on all CPUs and run this function
     7.5   * @fn: the function to run
     7.6   * @data: the data ptr for the @fn()
     7.7 - * @cpus: cpus to run @fn() on.
     7.8 + * @cpu: the cpu to run @fn() on (or any, if @cpu == NR_CPUS).
     7.9   *
    7.10   * Description: This causes every other cpu to enter a safe point, with
    7.11   * each of which disables interrupts, and finally interrupts are disabled
    7.12 @@ -14,6 +14,6 @@
    7.13   *
    7.14   * This can be thought of as a very heavy write lock, equivalent to
    7.15   * grabbing every spinlock in the kernel. */
    7.16 -int stop_machine_run(int (*fn)(void *), void *data, cpumask_t cpu);
    7.17 +int stop_machine_run(int (*fn)(void *), void *data, unsigned int cpu);
    7.18  
    7.19  #endif /* __XEN_STOP_MACHINE_H__ */