ia64/xen-unstable

view xen/arch/x86/acpi/cpu_idle.c @ 18472:cfbe4df8d47c

CPUIDLE: Adjust Cx residency to contain Cx exit latency & overhead.

Signed-off-by: Wei Gang <gang.wei@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Sep 10 11:17:13 2008 +0100 (2008-09-10)
parents bed1b98b63cc
children 706844309f36
line source
1 /*
2 * cpu_idle - xen idle state module derived from Linux
3 * drivers/acpi/processor_idle.c &
4 * arch/x86/kernel/acpi/cstate.c
5 *
6 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
7 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
8 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
9 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
10 * - Added processor hotplug support
11 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
12 * - Added support for C3 on SMP
13 * Copyright (C) 2007, 2008 Intel Corporation
14 *
15 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or (at
20 * your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful, but
23 * WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License along
28 * with this program; if not, write to the Free Software Foundation, Inc.,
29 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
30 *
31 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 */
34 #include <xen/config.h>
35 #include <xen/errno.h>
36 #include <xen/lib.h>
37 #include <xen/types.h>
38 #include <xen/acpi.h>
39 #include <xen/smp.h>
40 #include <xen/guest_access.h>
41 #include <xen/keyhandler.h>
42 #include <asm/cache.h>
43 #include <asm/io.h>
44 #include <asm/hpet.h>
45 #include <asm/processor.h>
46 #include <public/platform.h>
47 #include <public/sysctl.h>
49 #define DEBUG_PM_CX
51 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
52 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
53 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
55 #define ACPI_PROCESSOR_MAX_POWER 8
56 #define ACPI_PROCESSOR_MAX_C2_LATENCY 100
57 #define ACPI_PROCESSOR_MAX_C3_LATENCY 1000
59 static void (*lapic_timer_off)(void);
60 static void (*lapic_timer_on)(void);
62 extern u32 pmtmr_ioport;
63 extern void (*pm_idle) (void);
65 static void (*pm_idle_save) (void) __read_mostly;
66 unsigned int max_cstate __read_mostly = 2;
67 integer_param("max_cstate", max_cstate);
68 /*
69 * bm_history -- bit-mask with a bit per jiffy of bus-master activity
70 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
71 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
72 * 100 HZ: 0x0000000F: 4 jiffies = 40ms
73 * reduce history for more aggressive entry into C3
74 */
75 unsigned int bm_history __read_mostly =
76 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
77 integer_param("bm_history", bm_history);
79 struct acpi_processor_cx;
81 struct acpi_processor_cx_policy
82 {
83 u32 count;
84 struct acpi_processor_cx *state;
85 struct
86 {
87 u32 time;
88 u32 ticks;
89 u32 count;
90 u32 bm;
91 } threshold;
92 };
94 struct acpi_processor_cx
95 {
96 u8 valid;
97 u8 type;
98 u32 address;
99 u8 space_id;
100 u32 latency;
101 u32 latency_ticks;
102 u32 power;
103 u32 usage;
104 u64 time;
105 struct acpi_processor_cx_policy promotion;
106 struct acpi_processor_cx_policy demotion;
107 };
109 struct acpi_processor_flags
110 {
111 u8 bm_control:1;
112 u8 bm_check:1;
113 u8 has_cst:1;
114 u8 power_setup_done:1;
115 u8 bm_rld_set:1;
116 };
118 struct acpi_processor_power
119 {
120 struct acpi_processor_flags flags;
121 struct acpi_processor_cx *state;
122 s_time_t bm_check_timestamp;
123 u32 default_state;
124 u32 bm_activity;
125 u32 count;
126 struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
127 };
129 static struct acpi_processor_power processor_powers[NR_CPUS];
131 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
132 {
133 uint32_t i;
135 printk("==cpu%d==\n", cpu);
136 printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
137 printk("max_cstate:\t\tC%d\n", max_cstate);
138 printk("bus master activity:\t%08x\n", power->bm_activity);
139 printk("states:\n");
141 for ( i = 1; i < power->count; i++ )
142 {
143 printk((power->states[i].type == power->state->type) ? " *" : " ");
144 printk("C%d:\t\t", i);
145 printk("type[C%d] ", power->states[i].type);
146 if ( power->states[i].promotion.state )
147 printk("promotion[C%d] ", power->states[i].promotion.state->type);
148 else
149 printk("promotion[--] ");
150 if ( power->states[i].demotion.state )
151 printk("demotion[C%d] ", power->states[i].demotion.state->type);
152 else
153 printk("demotion[--] ");
154 printk("latency[%03d]\n ", power->states[i].latency);
155 printk("\t\t\t");
156 printk("usage[%08d] ", power->states[i].usage);
157 printk("duration[%"PRId64"]\n", power->states[i].time);
158 }
159 }
161 static void dump_cx(unsigned char key)
162 {
163 for( int i = 0; i < num_online_cpus(); i++ )
164 print_acpi_power(i, &processor_powers[i]);
165 }
167 static int __init cpu_idle_key_init(void)
168 {
169 register_keyhandler(
170 'c', dump_cx, "dump cx structures");
171 return 0;
172 }
173 __initcall(cpu_idle_key_init);
175 static inline u32 ticks_elapsed(u32 t1, u32 t2)
176 {
177 if ( t2 >= t1 )
178 return (t2 - t1);
179 else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
180 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
181 else
182 return ((0xFFFFFFFF - t1) + t2);
183 }
185 static void acpi_processor_power_activate(struct acpi_processor_power *power,
186 struct acpi_processor_cx *new)
187 {
188 struct acpi_processor_cx *old;
190 if ( !power || !new )
191 return;
193 old = power->state;
195 if ( old )
196 old->promotion.count = 0;
197 new->demotion.count = 0;
199 /* Cleanup from old state. */
200 if ( old )
201 {
202 switch ( old->type )
203 {
204 case ACPI_STATE_C3:
205 /* Disable bus master reload */
206 if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
207 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
208 break;
209 }
210 }
212 /* Prepare to use new state. */
213 switch ( new->type )
214 {
215 case ACPI_STATE_C3:
216 /* Enable bus master reload */
217 if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
218 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
219 break;
220 }
222 power->state = new;
224 return;
225 }
227 static void acpi_safe_halt(void)
228 {
229 smp_mb__after_clear_bit();
230 safe_halt();
231 }
233 #define MWAIT_ECX_INTERRUPT_BREAK (0x1)
235 static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
236 {
237 __monitor((void *)current, 0, 0);
238 smp_mb();
239 __mwait(eax, ecx);
240 }
242 static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
243 {
244 mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);
245 }
247 static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
248 {
249 if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
250 {
251 /* Call into architectural FFH based C-state */
252 acpi_processor_ffh_cstate_enter(cx);
253 }
254 else
255 {
256 int unused;
257 /* IO port based C-state */
258 inb(cx->address);
259 /* Dummy wait op - must do something useless after P_LVL2 read
260 because chipsets cannot guarantee that STPCLK# signal
261 gets asserted in time to freeze execution properly. */
262 unused = inl(pmtmr_ioport);
263 }
264 }
266 static struct {
267 spinlock_t lock;
268 unsigned int count;
269 } c3_cpu_status = { .lock = SPIN_LOCK_UNLOCKED };
271 static void acpi_processor_idle(void)
272 {
273 struct acpi_processor_power *power = NULL;
274 struct acpi_processor_cx *cx = NULL;
275 struct acpi_processor_cx *next_state = NULL;
276 int sleep_ticks = 0;
277 u32 t1, t2 = 0;
279 power = &processor_powers[smp_processor_id()];
281 /*
282 * Interrupts must be disabled during bus mastering calculations and
283 * for C2/C3 transitions.
284 */
285 local_irq_disable();
287 if ( softirq_pending(smp_processor_id()) )
288 {
289 local_irq_enable();
290 return;
291 }
293 cx = power->state;
294 if ( !cx )
295 {
296 if ( pm_idle_save )
297 {
298 printk(XENLOG_DEBUG "call pm_idle_save()\n");
299 pm_idle_save();
300 }
301 else
302 {
303 printk(XENLOG_DEBUG "call acpi_safe_halt()\n");
304 acpi_safe_halt();
305 }
306 return;
307 }
309 /*
310 * Check BM Activity
311 * -----------------
312 * Check for bus mastering activity (if required), record, and check
313 * for demotion.
314 */
315 if ( power->flags.bm_check )
316 {
317 u32 bm_status = 0;
318 unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
320 if ( diff > 31 )
321 diff = 31;
323 power->bm_activity <<= diff;
325 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
326 if ( bm_status )
327 {
328 power->bm_activity |= 0x1;
329 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
330 }
331 /*
332 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
333 * the true state of bus mastering activity; forcing us to
334 * manually check the BMIDEA bit of each IDE channel.
335 */
336 /*else if ( errata.piix4.bmisx )
337 {
338 if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
339 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
340 pr->power.bm_activity |= 0x1;
341 }*/
343 power->bm_check_timestamp = NOW();
345 /*
346 * If bus mastering is or was active this jiffy, demote
347 * to avoid a faulty transition. Note that the processor
348 * won't enter a low-power state during this call (to this
349 * function) but should upon the next.
350 *
351 * TBD: A better policy might be to fallback to the demotion
352 * state (use it for this quantum only) istead of
353 * demoting -- and rely on duration as our sole demotion
354 * qualification. This may, however, introduce DMA
355 * issues (e.g. floppy DMA transfer overrun/underrun).
356 */
357 if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
358 {
359 local_irq_enable();
360 next_state = cx->demotion.state;
361 goto end;
362 }
363 }
365 /*
366 * Sleep:
367 * ------
368 * Invoke the current Cx state to put the processor to sleep.
369 */
370 if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
371 smp_mb__after_clear_bit();
373 switch ( cx->type )
374 {
375 case ACPI_STATE_C1:
376 /*
377 * Invoke C1.
378 * Use the appropriate idle routine, the one that would
379 * be used without acpi C-states.
380 */
381 if ( pm_idle_save )
382 pm_idle_save();
383 else
384 acpi_safe_halt();
386 /*
387 * TBD: Can't get time duration while in C1, as resumes
388 * go to an ISR rather than here. Need to instrument
389 * base interrupt handler.
390 */
391 sleep_ticks = 0xFFFFFFFF;
392 break;
394 case ACPI_STATE_C2:
395 /* Get start time (ticks) */
396 t1 = inl(pmtmr_ioport);
397 /* Invoke C2 */
398 acpi_idle_do_entry(cx);
399 /* Get end time (ticks) */
400 t2 = inl(pmtmr_ioport);
402 /* Re-enable interrupts */
403 local_irq_enable();
404 /* Compute time (ticks) that we were actually asleep */
405 sleep_ticks = ticks_elapsed(t1, t2);
406 break;
408 case ACPI_STATE_C3:
409 /*
410 * disable bus master
411 * bm_check implies we need ARB_DIS
412 * !bm_check implies we need cache flush
413 * bm_control implies whether we can do ARB_DIS
414 *
415 * That leaves a case where bm_check is set and bm_control is
416 * not set. In that case we cannot do much, we enter C3
417 * without doing anything.
418 */
419 if ( power->flags.bm_check && power->flags.bm_control )
420 {
421 spin_lock(&c3_cpu_status.lock);
422 if ( ++c3_cpu_status.count == num_online_cpus() )
423 {
424 /*
425 * All CPUs are trying to go to C3
426 * Disable bus master arbitration
427 */
428 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
429 }
430 spin_unlock(&c3_cpu_status.lock);
431 }
432 else if ( !power->flags.bm_check )
433 {
434 /* SMP with no shared cache... Invalidate cache */
435 ACPI_FLUSH_CPU_CACHE();
436 }
438 /*
439 * Before invoking C3, be aware that TSC/APIC timer may be
440 * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
441 * deep C state can't work correctly.
442 */
443 /* preparing TSC stop */
444 cstate_save_tsc();
445 /* preparing APIC stop */
446 lapic_timer_off();
448 /* Get start time (ticks) */
449 t1 = inl(pmtmr_ioport);
450 /* Invoke C3 */
451 acpi_idle_do_entry(cx);
452 /* Get end time (ticks) */
453 t2 = inl(pmtmr_ioport);
455 /* recovering TSC */
456 cstate_restore_tsc();
458 if ( power->flags.bm_check && power->flags.bm_control )
459 {
460 /* Enable bus master arbitration */
461 spin_lock(&c3_cpu_status.lock);
462 if ( c3_cpu_status.count-- == num_online_cpus() )
463 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
464 spin_unlock(&c3_cpu_status.lock);
465 }
467 /* Re-enable interrupts */
468 local_irq_enable();
469 /* recovering APIC */
470 lapic_timer_on();
471 /* Compute time (ticks) that we were actually asleep */
472 sleep_ticks = ticks_elapsed(t1, t2);
474 break;
476 default:
477 local_irq_enable();
478 return;
479 }
481 cx->usage++;
482 if ( sleep_ticks > 0 )
483 cx->time += sleep_ticks;
485 next_state = power->state;
487 /*
488 * Promotion?
489 * ----------
490 * Track the number of longs (time asleep is greater than threshold)
491 * and promote when the count threshold is reached. Note that bus
492 * mastering activity may prevent promotions.
493 * Do not promote above max_cstate.
494 */
495 if ( cx->promotion.state &&
496 ((cx->promotion.state - power->states) <= max_cstate) )
497 {
498 if ( sleep_ticks > cx->promotion.threshold.ticks )
499 {
500 cx->promotion.count++;
501 cx->demotion.count = 0;
502 if ( cx->promotion.count >= cx->promotion.threshold.count )
503 {
504 if ( power->flags.bm_check )
505 {
506 if ( !(power->bm_activity & cx->promotion.threshold.bm) )
507 {
508 next_state = cx->promotion.state;
509 goto end;
510 }
511 }
512 else
513 {
514 next_state = cx->promotion.state;
515 goto end;
516 }
517 }
518 }
519 }
521 /*
522 * Demotion?
523 * ---------
524 * Track the number of shorts (time asleep is less than time threshold)
525 * and demote when the usage threshold is reached.
526 */
527 if ( cx->demotion.state )
528 {
529 if ( sleep_ticks < cx->demotion.threshold.ticks )
530 {
531 cx->demotion.count++;
532 cx->promotion.count = 0;
533 if ( cx->demotion.count >= cx->demotion.threshold.count )
534 {
535 next_state = cx->demotion.state;
536 goto end;
537 }
538 }
539 }
541 end:
542 /*
543 * Demote if current state exceeds max_cstate
544 */
545 if ( (power->state - power->states) > max_cstate )
546 {
547 if ( cx->demotion.state )
548 next_state = cx->demotion.state;
549 }
551 /*
552 * New Cx State?
553 * -------------
554 * If we're going to start using a new Cx state we must clean up
555 * from the previous and prepare to use the new.
556 */
557 if ( next_state != power->state )
558 acpi_processor_power_activate(power, next_state);
559 }
561 static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
562 {
563 unsigned int i;
564 unsigned int state_is_set = 0;
565 struct acpi_processor_cx *lower = NULL;
566 struct acpi_processor_cx *higher = NULL;
567 struct acpi_processor_cx *cx;
569 if ( !power )
570 return -EINVAL;
572 /*
573 * This function sets the default Cx state policy (OS idle handler).
574 * Our scheme is to promote quickly to C2 but more conservatively
575 * to C3. We're favoring C2 for its characteristics of low latency
576 * (quick response), good power savings, and ability to allow bus
577 * mastering activity. Note that the Cx state policy is completely
578 * customizable and can be altered dynamically.
579 */
581 /* startup state */
582 for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
583 {
584 cx = &power->states[i];
585 if ( !cx->valid )
586 continue;
588 if ( !state_is_set )
589 power->state = cx;
590 state_is_set++;
591 break;
592 }
594 if ( !state_is_set )
595 return -ENODEV;
597 /* demotion */
598 for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
599 {
600 cx = &power->states[i];
601 if ( !cx->valid )
602 continue;
604 if ( lower )
605 {
606 cx->demotion.state = lower;
607 cx->demotion.threshold.ticks = cx->latency_ticks;
608 cx->demotion.threshold.count = 1;
609 if ( cx->type == ACPI_STATE_C3 )
610 cx->demotion.threshold.bm = bm_history;
611 }
613 lower = cx;
614 }
616 /* promotion */
617 for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
618 {
619 cx = &power->states[i];
620 if ( !cx->valid )
621 continue;
623 if ( higher )
624 {
625 cx->promotion.state = higher;
626 cx->promotion.threshold.ticks = cx->latency_ticks;
627 if ( cx->type >= ACPI_STATE_C2 )
628 cx->promotion.threshold.count = 4;
629 else
630 cx->promotion.threshold.count = 10;
631 if ( higher->type == ACPI_STATE_C3 )
632 cx->promotion.threshold.bm = bm_history;
633 }
635 higher = cx;
636 }
638 return 0;
639 }
641 static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
642 {
643 memset(acpi_power, 0, sizeof(*acpi_power));
645 acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
647 acpi_power->states[ACPI_STATE_C0].valid = 1;
648 acpi_power->states[ACPI_STATE_C1].valid = 1;
650 acpi_power->count = 2;
652 return 0;
653 }
655 #define CPUID_MWAIT_LEAF (5)
656 #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
657 #define CPUID5_ECX_INTERRUPT_BREAK (0x2)
659 #define MWAIT_ECX_INTERRUPT_BREAK (0x1)
661 #define MWAIT_SUBSTATE_MASK (0xf)
662 #define MWAIT_SUBSTATE_SIZE (4)
664 static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx)
665 {
666 struct cpuinfo_x86 *c = &current_cpu_data;
667 unsigned int eax, ebx, ecx, edx;
668 unsigned int edx_part;
669 unsigned int cstate_type; /* C-state type and not ACPI C-state type */
670 unsigned int num_cstate_subtype;
672 if ( c->cpuid_level < CPUID_MWAIT_LEAF )
673 {
674 printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n");
675 return -EFAULT;
676 }
678 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
679 printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
680 eax, ebx, ecx, edx);
682 /* Check whether this particular cx_type (in CST) is supported or not */
683 cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
684 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
685 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
687 if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
688 return -EFAULT;
690 /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
691 if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
692 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
693 return -EFAULT;
695 printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n", cx->type);
696 return 0;
697 }
699 /*
700 * Initialize bm_flags based on the CPU cache properties
701 * On SMP it depends on cache configuration
702 * - When cache is not shared among all CPUs, we flush cache
703 * before entering C3.
704 * - When cache is shared among all CPUs, we use bm_check
705 * mechanism as in UP case
706 *
707 * This routine is called only after all the CPUs are online
708 */
709 static void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags)
710 {
711 struct cpuinfo_x86 *c = &current_cpu_data;
713 flags->bm_check = 0;
714 if ( num_online_cpus() == 1 )
715 flags->bm_check = 1;
716 else if ( c->x86_vendor == X86_VENDOR_INTEL )
717 {
718 /*
719 * Today all CPUs that support C3 share cache.
720 * TBD: This needs to look at cache shared map, once
721 * multi-core detection patch makes to the base.
722 */
723 flags->bm_check = 1;
724 }
725 }
727 #define VENDOR_INTEL (1)
728 #define NATIVE_CSTATE_BEYOND_HALT (2)
730 static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx)
731 {
732 static int bm_check_flag;
734 switch ( cx->reg.space_id )
735 {
736 case ACPI_ADR_SPACE_SYSTEM_IO:
737 if ( cx->reg.address == 0 )
738 return -EINVAL;
739 break;
741 case ACPI_ADR_SPACE_FIXED_HARDWARE:
742 if ( cx->type > ACPI_STATE_C1 )
743 {
744 if ( cx->reg.bit_width != VENDOR_INTEL ||
745 cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
746 return -EINVAL;
748 /* assume all logical cpu has the same support for mwait */
749 if ( acpi_processor_ffh_cstate_probe(cx) )
750 return -EINVAL;
751 }
752 break;
754 default:
755 return -ENODEV;
756 }
758 if ( cx->type == ACPI_STATE_C3 )
759 {
760 /* We must be able to use HPET in place of LAPIC timers. */
761 if ( hpet_broadcast_is_available() )
762 {
763 lapic_timer_off = hpet_broadcast_enter;
764 lapic_timer_on = hpet_broadcast_exit;
765 }
766 else if ( pit_broadcast_is_available() )
767 {
768 lapic_timer_off = pit_broadcast_enter;
769 lapic_timer_on = pit_broadcast_exit;
770 }
771 else
772 {
773 return -EINVAL;
774 }
776 /* All the logic here assumes flags.bm_check is same across all CPUs */
777 if ( !bm_check_flag )
778 {
779 /* Determine whether bm_check is needed based on CPU */
780 acpi_processor_power_init_bm_check(&(power->flags));
781 bm_check_flag = power->flags.bm_check;
782 }
783 else
784 {
785 power->flags.bm_check = bm_check_flag;
786 }
788 if ( power->flags.bm_check )
789 {
790 if ( !power->flags.bm_control )
791 {
792 if ( power->flags.has_cst != 1 )
793 {
794 /* bus mastering control is necessary */
795 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
796 "C3 support requires BM control\n"));
797 return -EINVAL;
798 }
799 else
800 {
801 /* Here we enter C3 without bus mastering */
802 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
803 "C3 support without BM control\n"));
804 }
805 }
806 }
807 else
808 {
809 /*
810 * WBINVD should be set in fadt, for C3 state to be
811 * supported on when bm_check is not required.
812 */
813 if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) )
814 {
815 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
816 "Cache invalidation should work properly"
817 " for C3 to be enabled on SMP systems\n"));
818 return -EINVAL;
819 }
820 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
821 }
822 }
824 return 0;
825 }
827 static void set_cx(
828 struct acpi_processor_power *acpi_power,
829 xen_processor_cx_t *xen_cx)
830 {
831 struct acpi_processor_cx *cx;
833 if ( check_cx(acpi_power, xen_cx) != 0 )
834 return;
836 cx = &acpi_power->states[xen_cx->type];
837 if ( !cx->valid )
838 acpi_power->count++;
840 cx->valid = 1;
841 cx->type = xen_cx->type;
842 cx->address = xen_cx->reg.address;
843 cx->space_id = xen_cx->reg.space_id;
844 cx->latency = xen_cx->latency;
845 cx->power = xen_cx->power;
847 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
848 }
850 int get_cpu_id(u8 acpi_id)
851 {
852 int i;
853 u8 apic_id;
855 apic_id = x86_acpiid_to_apicid[acpi_id];
856 if ( apic_id == 0xff )
857 return -1;
859 for ( i = 0; i < NR_CPUS; i++ )
860 {
861 if ( apic_id == x86_cpu_to_apicid[i] )
862 return i;
863 }
865 return -1;
866 }
868 #ifdef DEBUG_PM_CX
869 static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
870 {
871 XEN_GUEST_HANDLE(xen_processor_cx_t) states;
872 xen_processor_cx_t state;
873 XEN_GUEST_HANDLE(xen_processor_csd_t) csd;
874 xen_processor_csd_t dp;
875 uint32_t i;
877 printk("cpu%d cx acpi info:\n", cpu);
878 printk("\tcount = %d\n", power->count);
879 printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n"
880 "\t pwr_setup_done[%d], bm_rld_set[%d]\n",
881 power->flags.bm_control, power->flags.bm_check, power->flags.has_cst,
882 power->flags.power_setup_done, power->flags.bm_rld_set);
884 states = power->states;
886 for ( i = 0; i < power->count; i++ )
887 {
888 if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) )
889 return;
891 printk("\tstates[%d]:\n", i);
892 printk("\t\treg.space_id = 0x%x\n", state.reg.space_id);
893 printk("\t\treg.bit_width = 0x%x\n", state.reg.bit_width);
894 printk("\t\treg.bit_offset = 0x%x\n", state.reg.bit_offset);
895 printk("\t\treg.access_size = 0x%x\n", state.reg.access_size);
896 printk("\t\treg.address = 0x%"PRIx64"\n", state.reg.address);
897 printk("\t\ttype = %d\n", state.type);
898 printk("\t\tlatency = %d\n", state.latency);
899 printk("\t\tpower = %d\n", state.power);
901 csd = state.dp;
902 printk("\t\tdp(@0x%p)\n", csd.p);
904 if ( csd.p != NULL )
905 {
906 if ( unlikely(copy_from_guest(&dp, csd, 1)) )
907 return;
908 printk("\t\t\tdomain = %d\n", dp.domain);
909 printk("\t\t\tcoord_type = %d\n", dp.coord_type);
910 printk("\t\t\tnum = %d\n", dp.num);
911 }
912 }
913 }
914 #else
915 #define print_cx_pminfo(c, p)
916 #endif
918 long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
919 {
920 XEN_GUEST_HANDLE(xen_processor_cx_t) states;
921 xen_processor_cx_t xen_cx;
922 struct acpi_processor_power *acpi_power;
923 int cpu_id, i;
925 if ( unlikely(!guest_handle_okay(power->states, power->count)) )
926 return -EFAULT;
928 print_cx_pminfo(cpu, power);
930 /* map from acpi_id to cpu_id */
931 cpu_id = get_cpu_id((u8)cpu);
932 if ( cpu_id == -1 )
933 {
934 printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
935 return -EFAULT;
936 }
938 acpi_power = &processor_powers[cpu_id];
940 init_cx_pminfo(acpi_power);
942 acpi_power->flags.bm_check = power->flags.bm_check;
943 acpi_power->flags.bm_control = power->flags.bm_control;
944 acpi_power->flags.has_cst = power->flags.has_cst;
946 states = power->states;
948 for ( i = 0; i < power->count; i++ )
949 {
950 if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) )
951 return -EFAULT;
953 set_cx(acpi_power, &xen_cx);
954 }
956 /* FIXME: C-state dependency is not supported by far */
958 /* initialize default policy */
959 acpi_processor_set_power_policy(acpi_power);
961 print_acpi_power(cpu_id, acpi_power);
963 if ( cpu_id == 0 && pm_idle_save == NULL )
964 {
965 pm_idle_save = pm_idle;
966 pm_idle = acpi_processor_idle;
967 }
969 return 0;
970 }
972 uint32_t pmstat_get_cx_nr(uint32_t cpuid)
973 {
974 return processor_powers[cpuid].count;
975 }
977 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
978 {
979 struct acpi_processor_power *power = &processor_powers[cpuid];
980 struct vcpu *v = idle_vcpu[cpuid];
981 uint64_t usage;
982 int i;
984 stat->last = (power->state) ? power->state->type : 0;
985 stat->nr = processor_powers[cpuid].count;
986 stat->idle_time = v->runstate.time[RUNSTATE_running];
987 if ( v->is_running )
988 stat->idle_time += NOW() - v->runstate.state_entry_time;
990 for ( i = 0; i < power->count; i++ )
991 {
992 usage = power->states[i].usage;
993 if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
994 return -EFAULT;
995 }
996 for ( i = 0; i < power->count; i++ )
997 if ( copy_to_guest_offset(stat->residencies, i,
998 &power->states[i].time, 1) )
999 return -EFAULT;
1001 return 0;
1004 int pmstat_reset_cx_stat(uint32_t cpuid)
1006 return 0;