ia64/xen-unstable

changeset 17919:baaea9f0db5e

x86: Add cpufreq logic to S3 suspend/resume

When suspend to S3, stop the cpufreq dbs governor. When resume from
S3, firstly sync cpu state and freq at the 1st dbs timer; from 2nd dbs
timer on, cpufreq dbs governor control cpu px transfer according to
its workload algorithm. Px statistic is also handled.

Signed-off-by: Liu Jinsong <jinsong.liu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 27 16:16:47 2008 +0100 (2008-06-27)
parents 2ac9155a85c1
children 6b0663901174
files xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/power.c xen/include/acpi/cpufreq/cpufreq.h xen/include/acpi/cpufreq/processor_perf.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Jun 27 16:12:14 2008 +0100
     1.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Jun 27 16:16:47 2008 +0100
     1.3 @@ -47,6 +47,10 @@
     1.4  struct processor_pminfo processor_pminfo[NR_CPUS];
     1.5  struct cpufreq_policy xen_px_policy[NR_CPUS];
     1.6  
     1.7 +static cpumask_t *cpufreq_dom_pt;
     1.8 +static cpumask_t cpufreq_dom_mask;
     1.9 +static unsigned int cpufreq_dom_max;
    1.10 +
    1.11  enum {
    1.12      UNDEFINED_CAPABLE = 0,
    1.13      SYSTEM_INTEL_MSR_CAPABLE,
    1.14 @@ -60,7 +64,6 @@ struct acpi_cpufreq_data {
    1.15      struct processor_performance *acpi_data;
    1.16      struct cpufreq_frequency_table *freq_table;
    1.17      unsigned int max_freq;
    1.18 -    unsigned int resume;
    1.19      unsigned int cpu_feature;
    1.20  };
    1.21  
    1.22 @@ -328,14 +331,16 @@ static int acpi_cpufreq_target(struct cp
    1.23  
    1.24      next_perf_state = data->freq_table[next_state].index;
    1.25      if (perf->state == next_perf_state) {
    1.26 -        if (unlikely(data->resume)) {
    1.27 -            printk("xen_pminfo: @acpi_cpufreq_target, "
    1.28 -                "Called after resume, resetting to P%d\n", 
    1.29 +        if (unlikely(policy->resume)) {
    1.30 +            printk(KERN_INFO "Called after resume, resetting to P%d\n", 
    1.31                  next_perf_state);
    1.32 -            data->resume = 0;
    1.33 +            policy->resume = 0;
    1.34          }
    1.35 -        else
    1.36 +        else {
    1.37 +            printk(KERN_INFO "Already at target state (P%d)\n", 
    1.38 +                next_perf_state);
    1.39              return 0;
    1.40 +        }
    1.41      }
    1.42  
    1.43      switch (data->cpu_feature) {
    1.44 @@ -531,7 +536,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
    1.45       * the first call to ->target() should result in us actually
    1.46       * writing something to the appropriate registers.
    1.47       */
    1.48 -    data->resume = 1;
    1.49 +    policy->resume = 1;
    1.50  
    1.51      return result;
    1.52  
    1.53 @@ -549,61 +554,101 @@ static struct cpufreq_driver acpi_cpufre
    1.54      .init   = acpi_cpufreq_cpu_init,
    1.55  };
    1.56  
    1.57 -int acpi_cpufreq_init(void)
    1.58 +void cpufreq_dom_exit(void)
    1.59  {
    1.60 -    unsigned int i, ret = 0;
    1.61 -    unsigned int dom, max_dom = 0;
    1.62 -    cpumask_t *pt, dom_mask;
    1.63 +    cpufreq_dom_max = 0;
    1.64 +    cpus_clear(cpufreq_dom_mask);
    1.65 +    if (cpufreq_dom_pt)
    1.66 +        xfree(cpufreq_dom_pt);
    1.67 +}
    1.68  
    1.69 -    cpus_clear(dom_mask);
    1.70 +int cpufreq_dom_init(void)
    1.71 +{
    1.72 +    unsigned int i;
    1.73 +
    1.74 +    cpufreq_dom_max = 0;
    1.75 +    cpus_clear(cpufreq_dom_mask);
    1.76  
    1.77      for_each_online_cpu(i) {
    1.78 -        cpu_set(processor_pminfo[i].perf.domain_info.domain, dom_mask);
    1.79 -        if (max_dom < processor_pminfo[i].perf.domain_info.domain)
    1.80 -            max_dom = processor_pminfo[i].perf.domain_info.domain;
    1.81 +        cpu_set(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
    1.82 +        if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
    1.83 +            cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
    1.84      }
    1.85 -    max_dom++;
    1.86 +    cpufreq_dom_max++;
    1.87  
    1.88 -    pt = xmalloc_array(cpumask_t, max_dom);
    1.89 -    if (!pt)
    1.90 +    cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
    1.91 +    if (!cpufreq_dom_pt)
    1.92          return -ENOMEM;
    1.93 -    memset(pt, 0, max_dom * sizeof(cpumask_t));
    1.94 -
    1.95 -    /* get cpumask of each psd domain */
    1.96 -    for_each_online_cpu(i)
    1.97 -        cpu_set(i, pt[processor_pminfo[i].perf.domain_info.domain]);
    1.98 +    memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
    1.99  
   1.100      for_each_online_cpu(i)
   1.101 -        processor_pminfo[i].perf.shared_cpu_map = 
   1.102 -            pt[processor_pminfo[i].perf.domain_info.domain];
   1.103 +        cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
   1.104 +
   1.105 +    for_each_online_cpu(i)
   1.106 +        processor_pminfo[i].perf.shared_cpu_map =
   1.107 +            cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain];
   1.108  
   1.109 -    cpufreq_driver = &acpi_cpufreq_driver;
   1.110 +    return 0;
   1.111 +}
   1.112  
   1.113 -    /* setup cpufreq infrastructure */
   1.114 +static int cpufreq_cpu_init(void)
   1.115 +{
   1.116 +    int i, ret = 0;
   1.117 +
   1.118      for_each_online_cpu(i) {
   1.119          xen_px_policy[i].cpu = i;
   1.120  
   1.121          ret = px_statistic_init(i);
   1.122          if (ret)
   1.123 -            goto out;
   1.124 +            return ret;
   1.125  
   1.126          ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]);
   1.127          if (ret)
   1.128 -            goto out;
   1.129 +            return ret;
   1.130      }
   1.131 +    return ret;
   1.132 +}
   1.133  
   1.134 -    /* setup ondemand cpufreq */
   1.135 -    for (dom=0; dom<max_dom; dom++) {
   1.136 -        if (!cpu_isset(dom, dom_mask))
   1.137 +int cpufreq_dom_dbs(unsigned int event)
   1.138 +{
   1.139 +    int cpu, dom, ret = 0;
   1.140 +
   1.141 +    for (dom=0; dom<cpufreq_dom_max; dom++) {
   1.142 +        if (!cpu_isset(dom, cpufreq_dom_mask))
   1.143              continue;
   1.144 -        i = first_cpu(pt[dom]);
   1.145 -        ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START);
   1.146 +        cpu = first_cpu(cpufreq_dom_pt[dom]);
   1.147 +        ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
   1.148          if (ret)
   1.149 -            goto out;
   1.150 +            return ret;
   1.151      }
   1.152 -
   1.153 -out:
   1.154 -    xfree(pt);
   1.155 -   
   1.156      return ret;
   1.157  }
   1.158 +
   1.159 +int acpi_cpufreq_init(void)
   1.160 +{
   1.161 +    int ret = 0;
   1.162 +    
   1.163 +    /* setup cpumask of psd dom and shared cpu map of cpu */
   1.164 +    ret = cpufreq_dom_init();
   1.165 +    if (ret)
   1.166 +        goto err;
   1.167 +
   1.168 +    /* setup cpufreq driver */
   1.169 +    cpufreq_driver = &acpi_cpufreq_driver;
   1.170 +
   1.171 +    /* setup cpufreq infrastructure */
   1.172 +    ret = cpufreq_cpu_init();
   1.173 +    if (ret)
   1.174 +        goto err;
   1.175 +
   1.176 +    /* setup cpufreq dbs according to dom coordiation */
   1.177 +    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
   1.178 +    if (ret)
   1.179 +        goto err;
   1.180 +
   1.181 +    return ret;
   1.182 +
   1.183 +err:
   1.184 +    cpufreq_dom_exit();
   1.185 +    return ret;
   1.186 +}
     2.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c	Fri Jun 27 16:12:14 2008 +0100
     2.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c	Fri Jun 27 16:16:47 2008 +0100
     2.3 @@ -79,6 +79,12 @@ static void dbs_check_cpu(struct cpu_dbs
     2.4          return;
     2.5  
     2.6      policy = this_dbs_info->cur_policy;
     2.7 +
     2.8 +    if (unlikely(policy->resume)) {
     2.9 +        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
    2.10 +        return;
    2.11 +    }
    2.12 +
    2.13      cur_ns = NOW();
    2.14      total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
    2.15      this_dbs_info->prev_cpu_wall = NOW();
    2.16 @@ -217,8 +223,7 @@ int cpufreq_governor_dbs(struct cpufreq_
    2.17          break;
    2.18  
    2.19      case CPUFREQ_GOV_STOP:
    2.20 -        if (this_dbs_info->enable)
    2.21 -            dbs_timer_exit(this_dbs_info);
    2.22 +        dbs_timer_exit(this_dbs_info);
    2.23          dbs_enable--;
    2.24  
    2.25          break;
    2.26 @@ -233,5 +238,4 @@ int cpufreq_governor_dbs(struct cpufreq_
    2.27          break;
    2.28      }
    2.29      return 0;
    2.30 -}
    2.31 -             
    2.32 +} 
     3.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c	Fri Jun 27 16:12:14 2008 +0100
     3.2 +++ b/xen/arch/x86/acpi/cpufreq/utility.c	Fri Jun 27 16:16:47 2008 +0100
     3.3 @@ -37,6 +37,33 @@ struct cpufreq_driver *cpufreq_driver;
     3.4   *                    Px STATISTIC INFO                              *
     3.5   *********************************************************************/
     3.6  
     3.7 +void px_statistic_suspend(void)
     3.8 +{
     3.9 +    int cpu;
    3.10 +    uint64_t now;
    3.11 +
    3.12 +    now = NOW();
    3.13 +
    3.14 +    for_each_online_cpu(cpu) {
    3.15 +        struct pm_px *pxpt = &px_statistic_data[cpu];
    3.16 +        pxpt->u.pt[pxpt->u.cur].residency +=
    3.17 +                    now - pxpt->prev_state_wall;
    3.18 +    }
    3.19 +}
    3.20 +
    3.21 +void px_statistic_resume(void)
    3.22 +{
    3.23 +    int cpu;
    3.24 +    uint64_t now;
    3.25 +
    3.26 +    now = NOW();
    3.27 +
    3.28 +    for_each_online_cpu(cpu) {
    3.29 +        struct pm_px *pxpt = &px_statistic_data[cpu];
    3.30 +        pxpt->prev_state_wall = now;
    3.31 +    }
    3.32 +}
    3.33 +
    3.34  void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
    3.35  {
    3.36      uint32_t i;
    3.37 @@ -242,3 +269,62 @@ int __cpufreq_driver_getavg(struct cpufr
    3.38  
    3.39      return ret;
    3.40  }
    3.41 +
    3.42 +
    3.43 +/*********************************************************************
    3.44 + *               CPUFREQ SUSPEND/RESUME                              *
    3.45 + *********************************************************************/
    3.46 +
    3.47 +void cpufreq_suspend(void)
    3.48 +{
    3.49 +    int cpu;
    3.50 +
    3.51 +    /* to protect the case when Px was controlled by dom0-kernel */
    3.52 +    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
    3.53 +    for_each_online_cpu(cpu) {
    3.54 +        struct processor_performance *perf = &processor_pminfo[cpu].perf;
    3.55 +
    3.56 +        if (!perf->init)
    3.57 +            return;
    3.58 +    }
    3.59 +
    3.60 +    cpufreq_dom_dbs(CPUFREQ_GOV_STOP);
    3.61 +
    3.62 +    cpufreq_dom_exit();
    3.63 +
    3.64 +    px_statistic_suspend();
    3.65 +}
    3.66 +
    3.67 +int cpufreq_resume(void)
    3.68 +{
    3.69 +    int cpu, ret = 0;
    3.70 +
    3.71 +    /* 1. to protect the case when Px was controlled by dom0-kernel */
    3.72 +    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
    3.73 +    /* 2. set state and resume flag to sync cpu to right state and freq */
    3.74 +    for_each_online_cpu(cpu) {
    3.75 +        struct processor_performance *perf = &processor_pminfo[cpu].perf;
    3.76 +        struct cpufreq_policy *policy = &xen_px_policy[cpu];
    3.77 +
    3.78 +        if (!perf->init)
    3.79 +            goto err;
    3.80 +        perf->state = 0;
    3.81 +        policy->resume = 1;
    3.82 +    }
    3.83 +
    3.84 +    px_statistic_resume();
    3.85 +
    3.86 +    ret = cpufreq_dom_init();
    3.87 +    if (ret)
    3.88 +        goto err;
    3.89 +
    3.90 +    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
    3.91 +    if (ret)
    3.92 +        goto err;
    3.93 +
    3.94 +    return ret;
    3.95 +
    3.96 +err:
    3.97 +    cpufreq_dom_exit();
    3.98 +    return ret;
    3.99 +}
     4.1 --- a/xen/arch/x86/acpi/power.c	Fri Jun 27 16:12:14 2008 +0100
     4.2 +++ b/xen/arch/x86/acpi/power.c	Fri Jun 27 16:16:47 2008 +0100
     4.3 @@ -27,6 +27,8 @@
     4.4  #include <public/platform.h>
     4.5  #include <asm/tboot.h>
     4.6  
     4.7 +#include <acpi/cpufreq/cpufreq.h>
     4.8 +
     4.9  static char opt_acpi_sleep[20];
    4.10  string_param("acpi_sleep", opt_acpi_sleep);
    4.11  
    4.12 @@ -126,6 +128,8 @@ static int enter_state(u32 state)
    4.13  
    4.14      freeze_domains();
    4.15  
    4.16 +    cpufreq_suspend();
    4.17 +
    4.18      disable_nonboot_cpus();
    4.19      if ( num_online_cpus() != 1 )
    4.20      {
    4.21 @@ -181,6 +185,7 @@ static int enter_state(u32 state)
    4.22  
    4.23   enable_cpu:
    4.24      enable_nonboot_cpus();
    4.25 +    cpufreq_resume();
    4.26      thaw_domains();
    4.27      spin_unlock(&pm_lock);
    4.28      return error;
     5.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Fri Jun 27 16:12:14 2008 +0100
     5.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Jun 27 16:16:47 2008 +0100
     5.3 @@ -36,7 +36,10 @@ struct cpufreq_policy {
     5.4      unsigned int        max;    /* in kHz */
     5.5      unsigned int        cur;    /* in kHz, only needed if cpufreq
     5.6                                   * governors are used */
     5.7 +    unsigned int        resume; /* flag for cpufreq 1st run
     5.8 +                                 * S3 wakeup, hotplug cpu, etc */
     5.9  };
    5.10 +extern struct cpufreq_policy xen_px_policy[NR_CPUS];
    5.11  
    5.12  #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
    5.13  #define CPUFREQ_SHARED_TYPE_HW   (1) /* HW does needed coordination */
     6.1 --- a/xen/include/acpi/cpufreq/processor_perf.h	Fri Jun 27 16:12:14 2008 +0100
     6.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h	Fri Jun 27 16:16:47 2008 +0100
     6.3 @@ -10,6 +10,13 @@ int powernow_cpufreq_init(void);
     6.4  void px_statistic_update(cpumask_t, uint8_t, uint8_t);
     6.5  int  px_statistic_init(int);
     6.6  void px_statistic_reset(int);
     6.7 +void px_statistic_suspend(void);
     6.8 +void px_statistic_resume(void);
     6.9 +void cpufreq_dom_exit(void);
    6.10 +int  cpufreq_dom_init(void);
    6.11 +int  cpufreq_dom_dbs(unsigned int);
    6.12 +void cpufreq_suspend(void);
    6.13 +int  cpufreq_resume(void);
    6.14  
    6.15  struct processor_performance {
    6.16      uint32_t state;