ia64/xen-unstable

view xen/arch/x86/acpi/cpu_idle.c @ 18051:79517ed2a108

x86: PIT broadcast to fix local APIC timer stop issue for Deep C state

Local APIC timer may stop at deep C state (C3/C4...) entry. Initial
HPET broadcast working in legacy replacing mode, broke RTC intr, so
was bypassed. This patch add the logic that use platform timer (PIT)
to reenable local APIC timer at C state entry/exit.

Currently, only keep PIT enabled with 100Hz freq. The next step is
trying to dynamically enable/disable PIT while needed, and give it
lower freq.

Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Tian Kevin <kevin.tian@intel.com>
Signed-off-by: Wei Gang <gang.wei@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 14 10:43:32 2008 +0100 (2008-07-14)
parents 8182f5158dc2
children bed1b98b63cc
line source
1 /*
2 * cpu_idle - xen idle state module derived from Linux
3 * drivers/acpi/processor_idle.c &
4 * arch/x86/kernel/acpi/cstate.c
5 *
6 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
7 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
8 * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
9 * Copyright (C) 2004 Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
10 * - Added processor hotplug support
11 * Copyright (C) 2005 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
12 * - Added support for C3 on SMP
13 * Copyright (C) 2007, 2008 Intel Corporation
14 *
15 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or (at
20 * your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful, but
23 * WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
25 * General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License along
28 * with this program; if not, write to the Free Software Foundation, Inc.,
29 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
30 *
31 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 */
34 #include <xen/config.h>
35 #include <xen/errno.h>
36 #include <xen/lib.h>
37 #include <xen/types.h>
38 #include <xen/acpi.h>
39 #include <xen/smp.h>
40 #include <xen/guest_access.h>
41 #include <xen/keyhandler.h>
42 #include <asm/cache.h>
43 #include <asm/io.h>
44 #include <asm/hpet.h>
45 #include <asm/processor.h>
46 #include <public/platform.h>
47 #include <public/sysctl.h>
49 #define DEBUG_PM_CX
51 #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
52 #define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
53 #define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
55 #define ACPI_PROCESSOR_MAX_POWER 8
56 #define ACPI_PROCESSOR_MAX_C2_LATENCY 100
57 #define ACPI_PROCESSOR_MAX_C3_LATENCY 1000
59 static void (*lapic_timer_off)(void);
60 static void (*lapic_timer_on)(void);
62 extern u32 pmtmr_ioport;
63 extern void (*pm_idle) (void);
65 static void (*pm_idle_save) (void) __read_mostly;
66 unsigned int max_cstate __read_mostly = 2;
67 integer_param("max_cstate", max_cstate);
68 /*
69 * bm_history -- bit-mask with a bit per jiffy of bus-master activity
70 * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
71 * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
72 * 100 HZ: 0x0000000F: 4 jiffies = 40ms
73 * reduce history for more aggressive entry into C3
74 */
75 unsigned int bm_history __read_mostly =
76 (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
77 integer_param("bm_history", bm_history);
79 struct acpi_processor_cx;
81 struct acpi_processor_cx_policy
82 {
83 u32 count;
84 struct acpi_processor_cx *state;
85 struct
86 {
87 u32 time;
88 u32 ticks;
89 u32 count;
90 u32 bm;
91 } threshold;
92 };
94 struct acpi_processor_cx
95 {
96 u8 valid;
97 u8 type;
98 u32 address;
99 u8 space_id;
100 u32 latency;
101 u32 latency_ticks;
102 u32 power;
103 u32 usage;
104 u64 time;
105 struct acpi_processor_cx_policy promotion;
106 struct acpi_processor_cx_policy demotion;
107 };
109 struct acpi_processor_flags
110 {
111 u8 bm_control:1;
112 u8 bm_check:1;
113 u8 has_cst:1;
114 u8 power_setup_done:1;
115 u8 bm_rld_set:1;
116 };
118 struct acpi_processor_power
119 {
120 struct acpi_processor_flags flags;
121 struct acpi_processor_cx *state;
122 s_time_t bm_check_timestamp;
123 u32 default_state;
124 u32 bm_activity;
125 u32 count;
126 struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
127 };
129 static struct acpi_processor_power processor_powers[NR_CPUS];
131 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
132 {
133 uint32_t i;
135 printk("==cpu%d==\n", cpu);
136 printk("active state:\t\tC%d\n", (power->state)?power->state->type:-1);
137 printk("max_cstate:\t\tC%d\n", max_cstate);
138 printk("bus master activity:\t%08x\n", power->bm_activity);
139 printk("states:\n");
141 for ( i = 1; i < power->count; i++ )
142 {
143 printk((power->states[i].type == power->state->type) ? " *" : " ");
144 printk("C%d:\t\t", i);
145 printk("type[C%d] ", power->states[i].type);
146 if ( power->states[i].promotion.state )
147 printk("promotion[C%d] ", power->states[i].promotion.state->type);
148 else
149 printk("promotion[--] ");
150 if ( power->states[i].demotion.state )
151 printk("demotion[C%d] ", power->states[i].demotion.state->type);
152 else
153 printk("demotion[--] ");
154 printk("latency[%03d]\n ", power->states[i].latency);
155 printk("\t\t\t");
156 printk("usage[%08d] ", power->states[i].usage);
157 printk("duration[%"PRId64"]\n", power->states[i].time);
158 }
159 }
161 static void dump_cx(unsigned char key)
162 {
163 for( int i = 0; i < num_online_cpus(); i++ )
164 print_acpi_power(i, &processor_powers[i]);
165 }
167 static int __init cpu_idle_key_init(void)
168 {
169 register_keyhandler(
170 'c', dump_cx, "dump cx structures");
171 return 0;
172 }
173 __initcall(cpu_idle_key_init);
175 static inline u32 ticks_elapsed(u32 t1, u32 t2)
176 {
177 if ( t2 >= t1 )
178 return (t2 - t1);
179 else if ( !(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER) )
180 return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
181 else
182 return ((0xFFFFFFFF - t1) + t2);
183 }
185 static void acpi_processor_power_activate(struct acpi_processor_power *power,
186 struct acpi_processor_cx *new)
187 {
188 struct acpi_processor_cx *old;
190 if ( !power || !new )
191 return;
193 old = power->state;
195 if ( old )
196 old->promotion.count = 0;
197 new->demotion.count = 0;
199 /* Cleanup from old state. */
200 if ( old )
201 {
202 switch ( old->type )
203 {
204 case ACPI_STATE_C3:
205 /* Disable bus master reload */
206 if ( new->type != ACPI_STATE_C3 && power->flags.bm_check )
207 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
208 break;
209 }
210 }
212 /* Prepare to use new state. */
213 switch ( new->type )
214 {
215 case ACPI_STATE_C3:
216 /* Enable bus master reload */
217 if ( old->type != ACPI_STATE_C3 && power->flags.bm_check )
218 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
219 break;
220 }
222 power->state = new;
224 return;
225 }
227 static void acpi_safe_halt(void)
228 {
229 smp_mb__after_clear_bit();
230 safe_halt();
231 }
233 #define MWAIT_ECX_INTERRUPT_BREAK (0x1)
235 static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
236 {
237 __monitor((void *)current, 0, 0);
238 smp_mb();
239 __mwait(eax, ecx);
240 }
242 static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
243 {
244 mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);
245 }
247 static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
248 {
249 if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
250 {
251 /* Call into architectural FFH based C-state */
252 acpi_processor_ffh_cstate_enter(cx);
253 }
254 else
255 {
256 int unused;
257 /* IO port based C-state */
258 inb(cx->address);
259 /* Dummy wait op - must do something useless after P_LVL2 read
260 because chipsets cannot guarantee that STPCLK# signal
261 gets asserted in time to freeze execution properly. */
262 unused = inl(pmtmr_ioport);
263 }
264 }
266 static atomic_t c3_cpu_count;
268 static void acpi_processor_idle(void)
269 {
270 struct acpi_processor_power *power = NULL;
271 struct acpi_processor_cx *cx = NULL;
272 struct acpi_processor_cx *next_state = NULL;
273 int sleep_ticks = 0;
274 u32 t1, t2 = 0;
276 power = &processor_powers[smp_processor_id()];
278 /*
279 * Interrupts must be disabled during bus mastering calculations and
280 * for C2/C3 transitions.
281 */
282 local_irq_disable();
284 if ( softirq_pending(smp_processor_id()) )
285 {
286 local_irq_enable();
287 return;
288 }
290 cx = power->state;
291 if ( !cx )
292 {
293 if ( pm_idle_save )
294 {
295 printk(XENLOG_DEBUG "call pm_idle_save()\n");
296 pm_idle_save();
297 }
298 else
299 {
300 printk(XENLOG_DEBUG "call acpi_safe_halt()\n");
301 acpi_safe_halt();
302 }
303 return;
304 }
306 /*
307 * Check BM Activity
308 * -----------------
309 * Check for bus mastering activity (if required), record, and check
310 * for demotion.
311 */
312 if ( power->flags.bm_check )
313 {
314 u32 bm_status = 0;
315 unsigned long diff = (NOW() - power->bm_check_timestamp) >> 23;
317 if ( diff > 31 )
318 diff = 31;
320 power->bm_activity <<= diff;
322 acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
323 if ( bm_status )
324 {
325 power->bm_activity |= 0x1;
326 acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
327 }
328 /*
329 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
330 * the true state of bus mastering activity; forcing us to
331 * manually check the BMIDEA bit of each IDE channel.
332 */
333 /*else if ( errata.piix4.bmisx )
334 {
335 if ( (inb_p(errata.piix4.bmisx + 0x02) & 0x01)
336 || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01) )
337 pr->power.bm_activity |= 0x1;
338 }*/
340 power->bm_check_timestamp = NOW();
342 /*
343 * If bus mastering is or was active this jiffy, demote
344 * to avoid a faulty transition. Note that the processor
345 * won't enter a low-power state during this call (to this
346 * function) but should upon the next.
347 *
348 * TBD: A better policy might be to fallback to the demotion
349 * state (use it for this quantum only) istead of
350 * demoting -- and rely on duration as our sole demotion
351 * qualification. This may, however, introduce DMA
352 * issues (e.g. floppy DMA transfer overrun/underrun).
353 */
354 if ( (power->bm_activity & 0x1) && cx->demotion.threshold.bm )
355 {
356 local_irq_enable();
357 next_state = cx->demotion.state;
358 goto end;
359 }
360 }
362 /*
363 * Sleep:
364 * ------
365 * Invoke the current Cx state to put the processor to sleep.
366 */
367 if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
368 smp_mb__after_clear_bit();
370 switch ( cx->type )
371 {
372 case ACPI_STATE_C1:
373 /*
374 * Invoke C1.
375 * Use the appropriate idle routine, the one that would
376 * be used without acpi C-states.
377 */
378 if ( pm_idle_save )
379 pm_idle_save();
380 else
381 acpi_safe_halt();
383 /*
384 * TBD: Can't get time duration while in C1, as resumes
385 * go to an ISR rather than here. Need to instrument
386 * base interrupt handler.
387 */
388 sleep_ticks = 0xFFFFFFFF;
389 break;
391 case ACPI_STATE_C2:
392 /* Get start time (ticks) */
393 t1 = inl(pmtmr_ioport);
394 /* Invoke C2 */
395 acpi_idle_do_entry(cx);
396 /* Get end time (ticks) */
397 t2 = inl(pmtmr_ioport);
399 /* Re-enable interrupts */
400 local_irq_enable();
401 /* Compute time (ticks) that we were actually asleep */
402 sleep_ticks =
403 ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
404 break;
406 case ACPI_STATE_C3:
407 /*
408 * disable bus master
409 * bm_check implies we need ARB_DIS
410 * !bm_check implies we need cache flush
411 * bm_control implies whether we can do ARB_DIS
412 *
413 * That leaves a case where bm_check is set and bm_control is
414 * not set. In that case we cannot do much, we enter C3
415 * without doing anything.
416 */
417 if ( power->flags.bm_check && power->flags.bm_control )
418 {
419 atomic_inc(&c3_cpu_count);
420 if ( atomic_read(&c3_cpu_count) == num_online_cpus() )
421 {
422 /*
423 * All CPUs are trying to go to C3
424 * Disable bus master arbitration
425 */
426 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
427 }
428 }
429 else if ( !power->flags.bm_check )
430 {
431 /* SMP with no shared cache... Invalidate cache */
432 ACPI_FLUSH_CPU_CACHE();
433 }
435 /*
436 * Before invoking C3, be aware that TSC/APIC timer may be
437 * stopped by H/W. Without carefully handling of TSC/APIC stop issues,
438 * deep C state can't work correctly.
439 */
440 /* preparing TSC stop */
441 cstate_save_tsc();
442 /* preparing APIC stop */
443 lapic_timer_off();
445 /* Get start time (ticks) */
446 t1 = inl(pmtmr_ioport);
447 /* Invoke C3 */
448 acpi_idle_do_entry(cx);
449 /* Get end time (ticks) */
450 t2 = inl(pmtmr_ioport);
452 /* recovering TSC */
453 cstate_restore_tsc();
455 if ( power->flags.bm_check && power->flags.bm_control )
456 {
457 /* Enable bus master arbitration */
458 atomic_dec(&c3_cpu_count);
459 acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
460 }
462 /* Re-enable interrupts */
463 local_irq_enable();
464 /* recovering APIC */
465 lapic_timer_on();
466 /* Compute time (ticks) that we were actually asleep */
467 sleep_ticks = ticks_elapsed(t1, t2);
468 /* Do not account our idle-switching overhead: */
469 sleep_ticks -= cx->latency_ticks + C3_OVERHEAD;
471 break;
473 default:
474 local_irq_enable();
475 return;
476 }
478 cx->usage++;
479 if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
480 cx->time += sleep_ticks;
482 next_state = power->state;
484 /*
485 * Promotion?
486 * ----------
487 * Track the number of longs (time asleep is greater than threshold)
488 * and promote when the count threshold is reached. Note that bus
489 * mastering activity may prevent promotions.
490 * Do not promote above max_cstate.
491 */
492 if ( cx->promotion.state &&
493 ((cx->promotion.state - power->states) <= max_cstate) )
494 {
495 if ( sleep_ticks > cx->promotion.threshold.ticks )
496 {
497 cx->promotion.count++;
498 cx->demotion.count = 0;
499 if ( cx->promotion.count >= cx->promotion.threshold.count )
500 {
501 if ( power->flags.bm_check )
502 {
503 if ( !(power->bm_activity & cx->promotion.threshold.bm) )
504 {
505 next_state = cx->promotion.state;
506 goto end;
507 }
508 }
509 else
510 {
511 next_state = cx->promotion.state;
512 goto end;
513 }
514 }
515 }
516 }
518 /*
519 * Demotion?
520 * ---------
521 * Track the number of shorts (time asleep is less than time threshold)
522 * and demote when the usage threshold is reached.
523 */
524 if ( cx->demotion.state )
525 {
526 if ( sleep_ticks < cx->demotion.threshold.ticks )
527 {
528 cx->demotion.count++;
529 cx->promotion.count = 0;
530 if ( cx->demotion.count >= cx->demotion.threshold.count )
531 {
532 next_state = cx->demotion.state;
533 goto end;
534 }
535 }
536 }
538 end:
539 /*
540 * Demote if current state exceeds max_cstate
541 */
542 if ( (power->state - power->states) > max_cstate )
543 {
544 if ( cx->demotion.state )
545 next_state = cx->demotion.state;
546 }
548 /*
549 * New Cx State?
550 * -------------
551 * If we're going to start using a new Cx state we must clean up
552 * from the previous and prepare to use the new.
553 */
554 if ( next_state != power->state )
555 acpi_processor_power_activate(power, next_state);
556 }
558 static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
559 {
560 unsigned int i;
561 unsigned int state_is_set = 0;
562 struct acpi_processor_cx *lower = NULL;
563 struct acpi_processor_cx *higher = NULL;
564 struct acpi_processor_cx *cx;
566 if ( !power )
567 return -EINVAL;
569 /*
570 * This function sets the default Cx state policy (OS idle handler).
571 * Our scheme is to promote quickly to C2 but more conservatively
572 * to C3. We're favoring C2 for its characteristics of low latency
573 * (quick response), good power savings, and ability to allow bus
574 * mastering activity. Note that the Cx state policy is completely
575 * customizable and can be altered dynamically.
576 */
578 /* startup state */
579 for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
580 {
581 cx = &power->states[i];
582 if ( !cx->valid )
583 continue;
585 if ( !state_is_set )
586 power->state = cx;
587 state_is_set++;
588 break;
589 }
591 if ( !state_is_set )
592 return -ENODEV;
594 /* demotion */
595 for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
596 {
597 cx = &power->states[i];
598 if ( !cx->valid )
599 continue;
601 if ( lower )
602 {
603 cx->demotion.state = lower;
604 cx->demotion.threshold.ticks = cx->latency_ticks;
605 cx->demotion.threshold.count = 1;
606 if ( cx->type == ACPI_STATE_C3 )
607 cx->demotion.threshold.bm = bm_history;
608 }
610 lower = cx;
611 }
613 /* promotion */
614 for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
615 {
616 cx = &power->states[i];
617 if ( !cx->valid )
618 continue;
620 if ( higher )
621 {
622 cx->promotion.state = higher;
623 cx->promotion.threshold.ticks = cx->latency_ticks;
624 if ( cx->type >= ACPI_STATE_C2 )
625 cx->promotion.threshold.count = 4;
626 else
627 cx->promotion.threshold.count = 10;
628 if ( higher->type == ACPI_STATE_C3 )
629 cx->promotion.threshold.bm = bm_history;
630 }
632 higher = cx;
633 }
635 return 0;
636 }
638 static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
639 {
640 memset(acpi_power, 0, sizeof(*acpi_power));
642 acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
644 acpi_power->states[ACPI_STATE_C0].valid = 1;
645 acpi_power->states[ACPI_STATE_C1].valid = 1;
647 acpi_power->count = 2;
649 return 0;
650 }
652 #define CPUID_MWAIT_LEAF (5)
653 #define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
654 #define CPUID5_ECX_INTERRUPT_BREAK (0x2)
656 #define MWAIT_ECX_INTERRUPT_BREAK (0x1)
658 #define MWAIT_SUBSTATE_MASK (0xf)
659 #define MWAIT_SUBSTATE_SIZE (4)
661 static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx)
662 {
663 struct cpuinfo_x86 *c = &current_cpu_data;
664 unsigned int eax, ebx, ecx, edx;
665 unsigned int edx_part;
666 unsigned int cstate_type; /* C-state type and not ACPI C-state type */
667 unsigned int num_cstate_subtype;
669 if ( c->cpuid_level < CPUID_MWAIT_LEAF )
670 {
671 printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n");
672 return -EFAULT;
673 }
675 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
676 printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
677 eax, ebx, ecx, edx);
679 /* Check whether this particular cx_type (in CST) is supported or not */
680 cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
681 edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
682 num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
684 if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
685 return -EFAULT;
687 /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
688 if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
689 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
690 return -EFAULT;
692 printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n", cx->type);
693 return 0;
694 }
696 /*
697 * Initialize bm_flags based on the CPU cache properties
698 * On SMP it depends on cache configuration
699 * - When cache is not shared among all CPUs, we flush cache
700 * before entering C3.
701 * - When cache is shared among all CPUs, we use bm_check
702 * mechanism as in UP case
703 *
704 * This routine is called only after all the CPUs are online
705 */
706 static void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags)
707 {
708 struct cpuinfo_x86 *c = &current_cpu_data;
710 flags->bm_check = 0;
711 if ( num_online_cpus() == 1 )
712 flags->bm_check = 1;
713 else if ( c->x86_vendor == X86_VENDOR_INTEL )
714 {
715 /*
716 * Today all CPUs that support C3 share cache.
717 * TBD: This needs to look at cache shared map, once
718 * multi-core detection patch makes to the base.
719 */
720 flags->bm_check = 1;
721 }
722 }
724 #define VENDOR_INTEL (1)
725 #define NATIVE_CSTATE_BEYOND_HALT (2)
727 static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx)
728 {
729 static int bm_check_flag;
731 switch ( cx->reg.space_id )
732 {
733 case ACPI_ADR_SPACE_SYSTEM_IO:
734 if ( cx->reg.address == 0 )
735 return -EINVAL;
736 break;
738 case ACPI_ADR_SPACE_FIXED_HARDWARE:
739 if ( cx->type > ACPI_STATE_C1 )
740 {
741 if ( cx->reg.bit_width != VENDOR_INTEL ||
742 cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
743 return -EINVAL;
745 /* assume all logical cpu has the same support for mwait */
746 if ( acpi_processor_ffh_cstate_probe(cx) )
747 return -EINVAL;
748 }
749 break;
751 default:
752 return -ENODEV;
753 }
755 if ( cx->type == ACPI_STATE_C3 )
756 {
757 /* We must be able to use HPET in place of LAPIC timers. */
758 if ( hpet_broadcast_is_available() )
759 {
760 lapic_timer_off = hpet_broadcast_enter;
761 lapic_timer_on = hpet_broadcast_exit;
762 }
763 else if ( pit_broadcast_is_available() )
764 {
765 lapic_timer_off = pit_broadcast_enter;
766 lapic_timer_on = pit_broadcast_exit;
767 }
768 else
769 {
770 return -EINVAL;
771 }
773 /* All the logic here assumes flags.bm_check is same across all CPUs */
774 if ( !bm_check_flag )
775 {
776 /* Determine whether bm_check is needed based on CPU */
777 acpi_processor_power_init_bm_check(&(power->flags));
778 bm_check_flag = power->flags.bm_check;
779 }
780 else
781 {
782 power->flags.bm_check = bm_check_flag;
783 }
785 if ( power->flags.bm_check )
786 {
787 if ( !power->flags.bm_control )
788 {
789 if ( power->flags.has_cst != 1 )
790 {
791 /* bus mastering control is necessary */
792 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
793 "C3 support requires BM control\n"));
794 return -EINVAL;
795 }
796 else
797 {
798 /* Here we enter C3 without bus mastering */
799 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
800 "C3 support without BM control\n"));
801 }
802 }
803 }
804 else
805 {
806 /*
807 * WBINVD should be set in fadt, for C3 state to be
808 * supported on when bm_check is not required.
809 */
810 if ( !(acpi_gbl_FADT.flags & ACPI_FADT_WBINVD) )
811 {
812 ACPI_DEBUG_PRINT((ACPI_DB_INFO,
813 "Cache invalidation should work properly"
814 " for C3 to be enabled on SMP systems\n"));
815 return -EINVAL;
816 }
817 acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
818 }
819 }
821 return 0;
822 }
824 static void set_cx(
825 struct acpi_processor_power *acpi_power,
826 xen_processor_cx_t *xen_cx)
827 {
828 struct acpi_processor_cx *cx;
830 if ( check_cx(acpi_power, xen_cx) != 0 )
831 return;
833 cx = &acpi_power->states[xen_cx->type];
834 if ( !cx->valid )
835 acpi_power->count++;
837 cx->valid = 1;
838 cx->type = xen_cx->type;
839 cx->address = xen_cx->reg.address;
840 cx->space_id = xen_cx->reg.space_id;
841 cx->latency = xen_cx->latency;
842 cx->power = xen_cx->power;
844 cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
845 }
847 int get_cpu_id(u8 acpi_id)
848 {
849 int i;
850 u8 apic_id;
852 apic_id = x86_acpiid_to_apicid[acpi_id];
853 if ( apic_id == 0xff )
854 return -1;
856 for ( i = 0; i < NR_CPUS; i++ )
857 {
858 if ( apic_id == x86_cpu_to_apicid[i] )
859 return i;
860 }
862 return -1;
863 }
865 #ifdef DEBUG_PM_CX
866 static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
867 {
868 XEN_GUEST_HANDLE(xen_processor_cx_t) states;
869 xen_processor_cx_t state;
870 XEN_GUEST_HANDLE(xen_processor_csd_t) csd;
871 xen_processor_csd_t dp;
872 uint32_t i;
874 printk("cpu%d cx acpi info:\n", cpu);
875 printk("\tcount = %d\n", power->count);
876 printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n"
877 "\t pwr_setup_done[%d], bm_rld_set[%d]\n",
878 power->flags.bm_control, power->flags.bm_check, power->flags.has_cst,
879 power->flags.power_setup_done, power->flags.bm_rld_set);
881 states = power->states;
883 for ( i = 0; i < power->count; i++ )
884 {
885 if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) )
886 return;
888 printk("\tstates[%d]:\n", i);
889 printk("\t\treg.space_id = 0x%x\n", state.reg.space_id);
890 printk("\t\treg.bit_width = 0x%x\n", state.reg.bit_width);
891 printk("\t\treg.bit_offset = 0x%x\n", state.reg.bit_offset);
892 printk("\t\treg.access_size = 0x%x\n", state.reg.access_size);
893 printk("\t\treg.address = 0x%"PRIx64"\n", state.reg.address);
894 printk("\t\ttype = %d\n", state.type);
895 printk("\t\tlatency = %d\n", state.latency);
896 printk("\t\tpower = %d\n", state.power);
898 csd = state.dp;
899 printk("\t\tdp(@0x%p)\n", csd.p);
901 if ( csd.p != NULL )
902 {
903 if ( unlikely(copy_from_guest(&dp, csd, 1)) )
904 return;
905 printk("\t\t\tdomain = %d\n", dp.domain);
906 printk("\t\t\tcoord_type = %d\n", dp.coord_type);
907 printk("\t\t\tnum = %d\n", dp.num);
908 }
909 }
910 }
911 #else
912 #define print_cx_pminfo(c, p)
913 #endif
915 long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
916 {
917 XEN_GUEST_HANDLE(xen_processor_cx_t) states;
918 xen_processor_cx_t xen_cx;
919 struct acpi_processor_power *acpi_power;
920 int cpu_id, i;
922 if ( unlikely(!guest_handle_okay(power->states, power->count)) )
923 return -EFAULT;
925 print_cx_pminfo(cpu, power);
927 /* map from acpi_id to cpu_id */
928 cpu_id = get_cpu_id((u8)cpu);
929 if ( cpu_id == -1 )
930 {
931 printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
932 return -EFAULT;
933 }
935 acpi_power = &processor_powers[cpu_id];
937 init_cx_pminfo(acpi_power);
939 acpi_power->flags.bm_check = power->flags.bm_check;
940 acpi_power->flags.bm_control = power->flags.bm_control;
941 acpi_power->flags.has_cst = power->flags.has_cst;
943 states = power->states;
945 for ( i = 0; i < power->count; i++ )
946 {
947 if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) )
948 return -EFAULT;
950 set_cx(acpi_power, &xen_cx);
951 }
953 /* FIXME: C-state dependency is not supported by far */
955 /* initialize default policy */
956 acpi_processor_set_power_policy(acpi_power);
958 print_acpi_power(cpu_id, acpi_power);
960 if ( cpu_id == 0 && pm_idle_save == NULL )
961 {
962 pm_idle_save = pm_idle;
963 pm_idle = acpi_processor_idle;
964 }
966 return 0;
967 }
969 uint32_t pmstat_get_cx_nr(uint32_t cpuid)
970 {
971 return processor_powers[cpuid].count;
972 }
974 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
975 {
976 struct acpi_processor_power *power = &processor_powers[cpuid];
977 struct vcpu *v = idle_vcpu[cpuid];
978 uint64_t usage;
979 int i;
981 stat->last = (power->state) ? power->state->type : 0;
982 stat->nr = processor_powers[cpuid].count;
983 stat->idle_time = v->runstate.time[RUNSTATE_running];
984 if ( v->is_running )
985 stat->idle_time += NOW() - v->runstate.state_entry_time;
987 for ( i = 0; i < power->count; i++ )
988 {
989 usage = power->states[i].usage;
990 if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
991 return -EFAULT;
992 }
993 for ( i = 0; i < power->count; i++ )
994 if ( copy_to_guest_offset(stat->residencies, i,
995 &power->states[i].time, 1) )
996 return -EFAULT;
998 return 0;
999 }
1001 int pmstat_reset_cx_stat(uint32_t cpuid)
1003 return 0;