ia64/xen-unstable
changeset 17548:520519f5e346
Add acpi C3 support for x86.
C3 & deep C state will need to do ARB_DIS or cache flush. ARB_DIS
should be done within the last cpu which is ready for C3. Bus master
activities are checked and recorded for C state promotion/demotion
judgement.
C3 is disabled by default, till TSC/APIC stop issues resolved.
Adding cmdline option "max_cstate=3" can enable C3.
Signed-off-by: Wei Gang <gang.wei@intel.com>
C3 & deep C state will need to do ARB_DIS or cache flush. ARB_DIS
should be done within the last cpu which is ready for C3. Bus master
activities are checked and recorded for C state promotion/demotion
judgement.
C3 is disabled by default, till TSC/APIC stop issues resolved.
Adding cmdline option "max_cstate=3" can enable C3.
Signed-off-by: Wei Gang <gang.wei@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu May 01 10:46:59 2008 +0100 (2008-05-01) |
parents | 93bc6d9b5f31 |
children | 4aec1797720f |
files | xen/arch/x86/acpi/cpu_idle.c |
line diff
1.1 --- a/xen/arch/x86/acpi/cpu_idle.c Thu May 01 10:41:51 2008 +0100 1.2 +++ b/xen/arch/x86/acpi/cpu_idle.c Thu May 01 10:46:59 2008 +0100 1.3 @@ -60,6 +60,16 @@ extern void (*pm_idle) (void); 1.4 static void (*pm_idle_save) (void) __read_mostly; 1.5 unsigned int max_cstate __read_mostly = 2; 1.6 integer_param("max_cstate", max_cstate); 1.7 +/* 1.8 + * bm_history -- bit-mask with a bit per jiffy of bus-master activity 1.9 + * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms 1.10 + * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms 1.11 + * 100 HZ: 0x0000000F: 4 jiffies = 40ms 1.12 + * reduce history for more aggressive entry into C3 1.13 + */ 1.14 +unsigned int bm_history __read_mostly = 1.15 + (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); 1.16 +integer_param("bm_history", bm_history); 1.17 1.18 struct acpi_processor_cx; 1.19 1.20 @@ -91,10 +101,20 @@ struct acpi_processor_cx 1.21 struct acpi_processor_cx_policy demotion; 1.22 }; 1.23 1.24 +struct acpi_processor_flags 1.25 +{ 1.26 + u8 bm_control:1; 1.27 + u8 bm_check:1; 1.28 + u8 has_cst:1; 1.29 + u8 power_setup_done:1; 1.30 + u8 bm_rld_set:1; 1.31 +}; 1.32 + 1.33 struct acpi_processor_power 1.34 { 1.35 + struct acpi_processor_flags flags; 1.36 struct acpi_processor_cx *state; 1.37 - u64 bm_check_timestamp; 1.38 + s_time_t bm_check_timestamp; 1.39 u32 default_state; 1.40 u32 bm_activity; 1.41 u32 count; 1.42 @@ -185,6 +205,29 @@ static void acpi_processor_power_activat 1.43 old->promotion.count = 0; 1.44 new->demotion.count = 0; 1.45 1.46 + /* Cleanup from old state. */ 1.47 + if ( old ) 1.48 + { 1.49 + switch ( old->type ) 1.50 + { 1.51 + case ACPI_STATE_C3: 1.52 + /* Disable bus master reload */ 1.53 + if ( new->type != ACPI_STATE_C3 && power->flags.bm_check ) 1.54 + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 1.55 + break; 1.56 + } 1.57 + } 1.58 + 1.59 + /* Prepare to use new state. */ 1.60 + switch ( new->type ) 1.61 + { 1.62 + case ACPI_STATE_C3: 1.63 + /* Enable bus master reload */ 1.64 + if ( old->type != ACPI_STATE_C3 && power->flags.bm_check ) 1.65 + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); 1.66 + break; 1.67 + } 1.68 + 1.69 power->state = new; 1.70 1.71 return; 1.72 @@ -196,7 +239,7 @@ static void acpi_safe_halt(void) 1.73 safe_halt(); 1.74 } 1.75 1.76 -#define MWAIT_ECX_INTERRUPT_BREAK (0x1) 1.77 +#define MWAIT_ECX_INTERRUPT_BREAK (0x1) 1.78 1.79 static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) 1.80 { 1.81 @@ -229,6 +272,8 @@ static void acpi_idle_do_entry(struct ac 1.82 } 1.83 } 1.84 1.85 +static atomic_t c3_cpu_count; 1.86 + 1.87 static void acpi_processor_idle(void) 1.88 { 1.89 struct acpi_processor_power *power = NULL; 1.90 @@ -261,6 +306,62 @@ static void acpi_processor_idle(void) 1.91 } 1.92 1.93 /* 1.94 + * Check BM Activity 1.95 + * ----------------- 1.96 + * Check for bus mastering activity (if required), record, and check 1.97 + * for demotion. 1.98 + */ 1.99 + if ( power->flags.bm_check ) 1.100 + { 1.101 + u32 bm_status = 0; 1.102 + unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23; 1.103 + 1.104 + if ( diff > 31 ) 1.105 + diff = 31; 1.106 + 1.107 + power->bm_activity <<= diff; 1.108 + 1.109 + acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); 1.110 + if ( bm_status ) 1.111 + { 1.112 + power->bm_activity |= 0x1; 1.113 + acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); 1.114 + } 1.115 + /* 1.116 + * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect 1.117 + * the true state of bus mastering activity; forcing us to 1.118 + * manually check the BMIDEA bit of each IDE channel. 1.119 + */ 1.120 + /*else if ( errata.piix4.bmisx ) 1.121 + { 1.122 + if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01) 1.123 + || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) ) 1.124 + pr->power.bm_activity |= 0x1; 1.125 + }*/ 1.126 + 1.127 + power->bm_check_timestamp = NOW(); 1.128 + 1.129 + /* 1.130 + * If bus mastering is or was active this jiffy, demote 1.131 + * to avoid a faulty transition. Note that the processor 1.132 + * won't enter a low-power state during this call (to this 1.133 + * function) but should upon the next. 1.134 + * 1.135 + * TBD: A better policy might be to fallback to the demotion 1.136 + * state (use it for this quantum only) istead of 1.137 + * demoting -- and rely on duration as our sole demotion 1.138 + * qualification. This may, however, introduce DMA 1.139 + * issues (e.g. floppy DMA transfer overrun/underrun). 1.140 + */ 1.141 + if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm ) 1.142 + { 1.143 + local_irq_enable(); 1.144 + next_state = cx->demotion.state; 1.145 + goto end; 1.146 + } 1.147 + } 1.148 + 1.149 + /* 1.150 * Sleep: 1.151 * ------ 1.152 * Invoke the current Cx state to put the processor to sleep. 1.153 @@ -303,6 +404,73 @@ static void acpi_processor_idle(void) 1.154 sleep_ticks = 1.155 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD; 1.156 break; 1.157 + 1.158 + case ACPI_STATE_C3: 1.159 + /* 1.160 + * disable bus master 1.161 + * bm_check implies we need ARB_DIS 1.162 + * !bm_check implies we need cache flush 1.163 + * bm_control implies whether we can do ARB_DIS 1.164 + * 1.165 + * That leaves a case where bm_check is set and bm_control is 1.166 + * not set. In that case we cannot do much, we enter C3 1.167 + * without doing anything. 1.168 + */ 1.169 + if ( power->flags.bm_check && power->flags.bm_control ) 1.170 + { 1.171 + atomic_inc(&c3_cpu_count); 1.172 + if ( atomic_read(&c3_cpu_count) == num_online_cpus() ) 1.173 + { 1.174 + /* 1.175 + * All CPUs are trying to go to C3 1.176 + * Disable bus master arbitration 1.177 + */ 1.178 + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); 1.179 + } 1.180 + } 1.181 + else if ( !power->flags.bm_check ) 1.182 + { 1.183 + /* SMP with no shared cache... Invalidate cache */ 1.184 + ACPI_FLUSH_CPU_CACHE(); 1.185 + } 1.186 + 1.187 + /* Get start time (ticks) */ 1.188 + t1 = inl(pmtmr_ioport); 1.189 + 1.190 + /* 1.191 + * FIXME: Before invoking C3, be aware that TSC/APIC timer may be 1.192 + * stopped by H/W. Without carefully handling of TSC/APIC stop issues, 1.193 + * deep C state can't work correctly. 1.194 + */ 1.195 + /* placeholder for preparing TSC stop */ 1.196 + 1.197 + /* placeholder for preparing APIC stop */ 1.198 + 1.199 + /* Invoke C3 */ 1.200 + acpi_idle_do_entry(cx); 1.201 + 1.202 + /* placeholder for recovering APIC */ 1.203 + 1.204 + /* placeholder for recovering TSC */ 1.205 + 1.206 + /* Get end time (ticks) */ 1.207 + t2 = inl(pmtmr_ioport); 1.208 + if ( power->flags.bm_check && power->flags.bm_control ) 1.209 + { 1.210 + /* Enable bus master arbitration */ 1.211 + atomic_dec(&c3_cpu_count); 1.212 + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); 1.213 + } 1.214 + 1.215 + /* Compute time (ticks) that we were actually asleep */ 1.216 + sleep_ticks = ticks_elapsed(t1, t2); 1.217 + /* Re-enable interrupts */ 1.218 + local_irq_enable(); 1.219 + /* Do not account our idle-switching overhead: */ 1.220 + sleep_ticks -= cx->latency_ticks + C3_OVERHEAD; 1.221 + 1.222 + break; 1.223 + 1.224 default: 1.225 local_irq_enable(); 1.226 return; 1.227 @@ -331,8 +499,19 @@ static void acpi_processor_idle(void) 1.228 cx->demotion.count = 0; 1.229 if ( cx->promotion.count >= cx->promotion.threshold.count ) 1.230 { 1.231 - next_state = cx->promotion.state; 1.232 - goto end; 1.233 + if ( power->flags.bm_check ) 1.234 + { 1.235 + if ( !(power->bm_activity & cx->promotion.threshold.bm) ) 1.236 + { 1.237 + next_state = cx->promotion.state; 1.238 + goto end; 1.239 + } 1.240 + } 1.241 + else 1.242 + { 1.243 + next_state = cx->promotion.state; 1.244 + goto end; 1.245 + } 1.246 } 1.247 } 1.248 } 1.249 @@ -425,6 +604,8 @@ static int acpi_processor_set_power_poli 1.250 cx->demotion.state = lower; 1.251 cx->demotion.threshold.ticks = cx->latency_ticks; 1.252 cx->demotion.threshold.count = 1; 1.253 + if ( cx->type == ACPI_STATE_C3 ) 1.254 + cx->demotion.threshold.bm = bm_history; 1.255 } 1.256 1.257 lower = cx; 1.258 @@ -445,6 +626,8 @@ static int acpi_processor_set_power_poli 1.259 cx->promotion.threshold.count = 4; 1.260 else 1.261 cx->promotion.threshold.count = 10; 1.262 + if ( higher->type == ACPI_STATE_C3 ) 1.263 + cx->promotion.threshold.bm = bm_history; 1.264 } 1.265 1.266 higher = cx; 1.267 @@ -511,11 +694,40 @@ static int acpi_processor_ffh_cstate_pro 1.268 return 0; 1.269 } 1.270 1.271 +/* 1.272 + * Initialize bm_flags based on the CPU cache properties 1.273 + * On SMP it depends on cache configuration 1.274 + * - When cache is not shared among all CPUs, we flush cache 1.275 + * before entering C3. 1.276 + * - When cache is shared among all CPUs, we use bm_check 1.277 + * mechanism as in UP case 1.278 + * 1.279 + * This routine is called only after all the CPUs are online 1.280 + */ 1.281 +static void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags) 1.282 +{ 1.283 + struct cpuinfo_x86 *c = ¤t_cpu_data; 1.284 + 1.285 + flags->bm_check = 0; 1.286 + if ( num_online_cpus() == 1 ) 1.287 + flags->bm_check = 1; 1.288 + else if ( c->x86_vendor == X86_VENDOR_INTEL ) 1.289 + { 1.290 + /* 1.291 + * Today all CPUs that support C3 share cache. 1.292 + * TBD: This needs to look at cache shared map, once 1.293 + * multi-core detection patch makes to the base. 1.294 + */ 1.295 + flags->bm_check = 1; 1.296 + } 1.297 +} 1.298 + 1.299 #define VENDOR_INTEL (1) 1.300 #define NATIVE_CSTATE_BEYOND_HALT (2) 1.301 1.302 -static int check_cx(xen_processor_cx_t *cx) 1.303 +static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx) 1.304 { 1.305 + static int bm_check_flag; 1.306 if ( cx == NULL ) 1.307 return -EINVAL; 1.308 1.309 @@ -543,6 +755,56 @@ static int check_cx(xen_processor_cx_t * 1.310 return -ENODEV; 1.311 } 1.312 1.313 + if ( cx->type == ACPI_STATE_C3 ) 1.314 + { 1.315 + /* All the logic here assumes flags.bm_check is same across all CPUs */ 1.316 + if ( !bm_check_flag ) 1.317 + { 1.318 + /* Determine whether bm_check is needed based on CPU */ 1.319 + acpi_processor_power_init_bm_check(&(power->flags)); 1.320 + bm_check_flag = power->flags.bm_check; 1.321 + } 1.322 + else 1.323 + { 1.324 + power->flags.bm_check = bm_check_flag; 1.325 + } 1.326 + 1.327 + if ( power->flags.bm_check ) 1.328 + { 1.329 + if ( !power->flags.bm_control ) 1.330 + { 1.331 + if ( power->flags.has_cst != 1 ) 1.332 + { 1.333 + /* bus mastering control is necessary */ 1.334 + ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1.335 + "C3 support requires BM control\n")); 1.336 + return -1; 1.337 + } 1.338 + else 1.339 + { 1.340 + /* Here we enter C3 without bus mastering */ 1.341 + ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1.342 + "C3 support without BM control\n")); 1.343 + } 1.344 + } 1.345 + } 1.346 + else 1.347 + { 1.348 + /* 1.349 + * WBINVD should be set in fadt, for C3 state to be 1.350 + * supported on when bm_check is not required. 1.351 + */ 1.352 + if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) ) 1.353 + { 1.354 + ACPI_DEBUG_PRINT((ACPI_DB_INFO, 1.355 + "Cache invalidation should work properly" 1.356 + " for C3 to be enabled on SMP systems\n")); 1.357 + return -1; 1.358 + } 1.359 + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); 1.360 + } 1.361 + } 1.362 + 1.363 return 0; 1.364 } 1.365 1.366 @@ -552,7 +814,7 @@ static int set_cx(struct acpi_processor_ 1.367 struct acpi_processor_cx *cx; 1.368 1.369 /* skip unsupported acpi cstate */ 1.370 - if ( check_cx(xen_cx) ) 1.371 + if ( check_cx(acpi_power, xen_cx) ) 1.372 return -EFAULT; 1.373 1.374 cx = &acpi_power->states[xen_cx->type]; 1.375 @@ -663,6 +925,10 @@ long set_cx_pminfo(uint32_t cpu, struct 1.376 1.377 init_cx_pminfo(acpi_power); 1.378 1.379 + acpi_power->flags.bm_check = power->flags.bm_check; 1.380 + acpi_power->flags.bm_control = power->flags.bm_control; 1.381 + acpi_power->flags.has_cst = power->flags.has_cst; 1.382 + 1.383 states = power->states; 1.384 1.385 for ( i = 0; i < power->count; i++ )