ia64/xen-unstable

changeset 17435:8d750b7acfa3

x86: Conditionally disable PIT 100HZ timer interrupt

100HZ PIT timer interrupt set a 10ms upper limit for C state
residency, which makes Xen not power friendly. This patch disable PIT
timer interrupt in the conditions:
- CPU has APIC support, and
- PIT is not used as platform time source

Signed-off-by: Yu Ke <ke.yu@intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Apr 10 11:11:25 2008 +0100 (2008-04-10)
parents 5b7a3e040683
children 24e3a0ce63f8
files xen/arch/x86/time.c
line diff
     1.1 --- a/xen/arch/x86/time.c	Thu Apr 10 10:12:04 2008 +0100
     1.2 +++ b/xen/arch/x86/time.c	Thu Apr 10 11:11:25 2008 +0100
     1.3 @@ -67,19 +67,16 @@ struct platform_timesource {
     1.4  static DEFINE_PER_CPU(struct cpu_time, cpu_time);
     1.5  
     1.6  /*
     1.7 - * Protected by platform_timer_lock, which must be acquired with interrupts
     1.8 - * disabled because plt_overflow() is called from PIT ch0 interrupt context.
     1.9 + * We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
    1.10 + * Otherwise overflow happens too quickly (~50ms) for us to guarantee that
    1.11 + * softirq handling will happen in time.
    1.12 + * 
    1.13 + * The pit_lock protects the 16- and 32-bit stamp fields as well as the 
    1.14   */
    1.15 -static s_time_t stime_platform_stamp;
    1.16 -static u64 platform_timer_stamp;
    1.17 -static DEFINE_SPINLOCK(platform_timer_lock);
    1.18 -
    1.19 -/*
    1.20 - * Folding platform timer into 64-bit software counter is a really critical
    1.21 - * operation! We therefore do it directly in PIT ch0 interrupt handler.
    1.22 - */
    1.23 -static u32 plt_overflow_jiffies;
    1.24 -static void plt_overflow(void);
    1.25 +static DEFINE_SPINLOCK(pit_lock);
    1.26 +static u16 pit_stamp16;
    1.27 +static u32 pit_stamp32;
    1.28 +static int using_pit;
    1.29  
    1.30  /*
    1.31   * 32-bit division of integer dividend and integer divisor yielding
    1.32 @@ -146,21 +143,36 @@ static inline u64 scale_delta(u64 delta,
    1.33      return product;
    1.34  }
    1.35  
    1.36 -void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
    1.37 +static void timer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
    1.38  {
    1.39      ASSERT(local_irq_is_enabled());
    1.40  
    1.41 +    /* Only for start-of-day interruopt tests in io_apic.c. */
    1.42      (*(volatile unsigned long *)&pit0_ticks)++;
    1.43  
    1.44      /* Rough hack to allow accurate timers to sort-of-work with no APIC. */
    1.45      if ( !cpu_has_apic )
    1.46          raise_softirq(TIMER_SOFTIRQ);
    1.47  
    1.48 -    if ( --plt_overflow_jiffies == 0 )
    1.49 -        plt_overflow();
    1.50 +    /* Emulate a 32-bit PIT counter. */
    1.51 +    if ( using_pit )
    1.52 +    {
    1.53 +        u16 count;
    1.54 +
    1.55 +        spin_lock_irq(&pit_lock);
    1.56 +
    1.57 +        outb(0x80, PIT_MODE);
    1.58 +        count  = inb(PIT_CH2);
    1.59 +        count |= inb(PIT_CH2) << 8;
    1.60 +
    1.61 +        pit_stamp32 += (u16)(pit_stamp16 - count);
    1.62 +        pit_stamp16 = count;
    1.63 +
    1.64 +        spin_unlock_irq(&pit_lock);
    1.65 +    }
    1.66  }
    1.67  
    1.68 -static struct irqaction irq0 = { timer_interrupt, "timer", NULL};
    1.69 +static struct irqaction irq0 = { timer_interrupt, "timer", NULL };
    1.70  
    1.71  /* ------ Calibrate the TSC ------- 
    1.72   * Return processor ticks per second / CALIBRATE_FRAC.
    1.73 @@ -294,12 +306,21 @@ static char *freq_string(u64 freq)
    1.74  
    1.75  static u32 read_pit_count(void)
    1.76  {
    1.77 -    u16 count;
    1.78 -    ASSERT(spin_is_locked(&platform_timer_lock));
    1.79 +    u16 count16;
    1.80 +    u32 count32;
    1.81 +    unsigned long flags;
    1.82 +
    1.83 +    spin_lock_irqsave(&pit_lock, flags);
    1.84 +
    1.85      outb(0x80, PIT_MODE);
    1.86 -    count  = inb(PIT_CH2);
    1.87 -    count |= inb(PIT_CH2) << 8;
    1.88 -    return ~count;
    1.89 +    count16  = inb(PIT_CH2);
    1.90 +    count16 |= inb(PIT_CH2) << 8;
    1.91 +
    1.92 +    count32 = pit_stamp32 + (u16)(pit_stamp16 - count16);
    1.93 +
    1.94 +    spin_unlock_irqrestore(&pit_lock, flags);
    1.95 +
    1.96 +    return count32;
    1.97  }
    1.98  
    1.99  static void init_pit(struct platform_timesource *pts)
   1.100 @@ -307,7 +328,8 @@ static void init_pit(struct platform_tim
   1.101      pts->name = "PIT";
   1.102      pts->frequency = CLOCK_TICK_RATE;
   1.103      pts->read_counter = read_pit_count;
   1.104 -    pts->counter_bits = 16;
   1.105 +    pts->counter_bits = 32;
   1.106 +    using_pit = 1;
   1.107  }
   1.108  
   1.109  /************************************************************
   1.110 @@ -465,24 +487,28 @@ static int init_pmtimer(struct platform_
   1.111  
   1.112  static struct platform_timesource plt_src; /* details of chosen timesource  */
   1.113  static u32 plt_mask;             /* hardware-width mask                     */
   1.114 -static u32 plt_overflow_period;  /* jiffies between calls to plt_overflow() */
   1.115 +static u64 plt_overflow_period;  /* ns between calls to plt_overflow()      */
   1.116  static struct time_scale plt_scale; /* scale: platform counter -> nanosecs  */
   1.117  
   1.118  /* Protected by platform_timer_lock. */
   1.119 -static u64 plt_count64;          /* 64-bit platform counter stamp           */
   1.120 -static u32 plt_count;            /* hardware-width platform counter stamp   */
   1.121 +static DEFINE_SPINLOCK(platform_timer_lock);
   1.122 +static s_time_t stime_platform_stamp; /* System time at below platform time */
   1.123 +static u64 platform_timer_stamp;      /* Platform time at above system time */
   1.124 +static u64 plt_stamp64;          /* 64-bit platform counter stamp           */
   1.125 +static u32 plt_stamp;            /* hardware-width platform counter stamp   */
   1.126 +static struct timer plt_overflow_timer;
   1.127  
   1.128 -static void plt_overflow(void)
   1.129 +static void plt_overflow(void *unused)
   1.130  {
   1.131      u32 count;
   1.132 -    unsigned long flags;
   1.133  
   1.134 -    spin_lock_irqsave(&platform_timer_lock, flags);
   1.135 +    spin_lock(&platform_timer_lock);
   1.136      count = plt_src.read_counter();
   1.137 -    plt_count64 += (count - plt_count) & plt_mask;
   1.138 -    plt_count = count;
   1.139 -    plt_overflow_jiffies = plt_overflow_period;
   1.140 -    spin_unlock_irqrestore(&platform_timer_lock, flags);
   1.141 +    plt_stamp64 += (count - plt_stamp) & plt_mask;
   1.142 +    plt_stamp = count;
   1.143 +    spin_unlock(&platform_timer_lock);
   1.144 +
   1.145 +    set_timer(&plt_overflow_timer, NOW() + plt_overflow_period);
   1.146  }
   1.147  
   1.148  static s_time_t __read_platform_stime(u64 platform_time)
   1.149 @@ -496,12 +522,11 @@ static s_time_t read_platform_stime(void
   1.150  {
   1.151      u64 count;
   1.152      s_time_t stime;
   1.153 -    unsigned long flags;
   1.154  
   1.155 -    spin_lock_irqsave(&platform_timer_lock, flags);
   1.156 -    count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
   1.157 +    spin_lock(&platform_timer_lock);
   1.158 +    count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
   1.159      stime = __read_platform_stime(count);
   1.160 -    spin_unlock_irqrestore(&platform_timer_lock, flags);
   1.161 +    spin_unlock(&platform_timer_lock);
   1.162  
   1.163      return stime;
   1.164  }
   1.165 @@ -510,27 +535,25 @@ static void platform_time_calibration(vo
   1.166  {
   1.167      u64 count;
   1.168      s_time_t stamp;
   1.169 -    unsigned long flags;
   1.170  
   1.171 -    spin_lock_irqsave(&platform_timer_lock, flags);
   1.172 -    count = plt_count64 + ((plt_src.read_counter() - plt_count) & plt_mask);
   1.173 +    spin_lock(&platform_timer_lock);
   1.174 +    count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
   1.175      stamp = __read_platform_stime(count);
   1.176      stime_platform_stamp = stamp;
   1.177      platform_timer_stamp = count;
   1.178 -    spin_unlock_irqrestore(&platform_timer_lock, flags);
   1.179 +    spin_unlock(&platform_timer_lock);
   1.180  }
   1.181  
   1.182  static void resume_platform_timer(void)
   1.183  {
   1.184      /* No change in platform_stime across suspend/resume. */
   1.185 -    platform_timer_stamp = plt_count64;
   1.186 -    plt_count = plt_src.read_counter();
   1.187 +    platform_timer_stamp = plt_stamp64;
   1.188 +    plt_stamp = plt_src.read_counter();
   1.189  }
   1.190  
   1.191  static void init_platform_timer(void)
   1.192  {
   1.193      struct platform_timesource *pts = &plt_src;
   1.194 -    u64 overflow_period;
   1.195      int rc = -1;
   1.196  
   1.197      if ( opt_clocksource[0] != '\0' )
   1.198 @@ -560,13 +583,12 @@ static void init_platform_timer(void)
   1.199  
   1.200      set_time_scale(&plt_scale, pts->frequency);
   1.201  
   1.202 -    overflow_period = scale_delta(1ull << (pts->counter_bits-1), &plt_scale);
   1.203 -    do_div(overflow_period, MILLISECS(1000/HZ));
   1.204 -    plt_overflow_period = overflow_period;
   1.205 -    plt_overflow();
   1.206 -    printk("Platform timer overflows in %d jiffies.\n", plt_overflow_period);
   1.207 +    plt_overflow_period = scale_delta(
   1.208 +        1ull << (pts->counter_bits-1), &plt_scale);
   1.209 +    init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
   1.210 +    plt_overflow(NULL);
   1.211  
   1.212 -    platform_timer_stamp = plt_count64;
   1.213 +    platform_timer_stamp = plt_stamp64;
   1.214  
   1.215      printk("Platform timer is %s %s\n",
   1.216             freq_string(pts->frequency), pts->name);
   1.217 @@ -968,6 +990,19 @@ void __init early_time_init(void)
   1.218      setup_irq(0, &irq0);
   1.219  }
   1.220  
   1.221 +static int __init late_time_init(void)
   1.222 +{
   1.223 +    if ( !using_pit && cpu_has_apic )
   1.224 +    {
   1.225 +        /* Disable PIT CH0 timer interrupt. */
   1.226 +        outb_p(0x30, PIT_MODE);
   1.227 +        outb_p(0, PIT_CH0);
   1.228 +        outb_p(0, PIT_CH0);
   1.229 +    }
   1.230 +    return 0;
   1.231 +}
   1.232 +__initcall(late_time_init);
   1.233 +
   1.234  void send_timer_event(struct vcpu *v)
   1.235  {
   1.236      send_guest_vcpu_virq(v, VIRQ_TIMER);
   1.237 @@ -1018,7 +1053,7 @@ int time_resume(void)
   1.238  int dom0_pit_access(struct ioreq *ioreq)
   1.239  {
   1.240      /* Is Xen using Channel 2? Then disallow direct dom0 access. */
   1.241 -    if ( plt_src.read_counter == read_pit_count )
   1.242 +    if ( using_pit )
   1.243          return 0;
   1.244  
   1.245      switch ( ioreq->addr )