ia64/xen-unstable

changeset 18484:f125e481d8b6

x86: Clean up cpufreq core logic

Clean up cpufreq core logic, which now can cope with cpu
online/offline event, and also dynamic platform limitation event
(_PPC).

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Sep 12 10:34:50 2008 +0100 (2008-09-12)
parents 346c073ed6a4
children 34aed15ba9df
files xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c xen/arch/x86/acpi/cpufreq/powernow.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/pmstat.c xen/arch/x86/acpi/power.c xen/arch/x86/platform_hypercall.c xen/arch/x86/smpboot.c xen/include/acpi/cpufreq/cpufreq.h xen/include/acpi/cpufreq/processor_perf.h xen/include/public/platform.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Thu Sep 11 18:00:06 2008 +0100
     1.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 12 10:34:50 2008 +0100
     1.3 @@ -32,6 +32,7 @@
     1.4  #include <xen/errno.h>
     1.5  #include <xen/delay.h>
     1.6  #include <xen/cpumask.h>
     1.7 +#include <xen/sched.h>
     1.8  #include <xen/timer.h>
     1.9  #include <xen/xmalloc.h>
    1.10  #include <asm/bug.h>
    1.11 @@ -44,12 +45,8 @@
    1.12  #include <acpi/acpi.h>
    1.13  #include <acpi/cpufreq/cpufreq.h>
    1.14  
    1.15 -struct processor_pminfo processor_pminfo[NR_CPUS];
    1.16 -struct cpufreq_policy xen_px_policy[NR_CPUS];
    1.17 -
    1.18 -static cpumask_t *cpufreq_dom_pt;
    1.19 -static unsigned long *cpufreq_dom_mask;
    1.20 -static unsigned int cpufreq_dom_max;
    1.21 +/* TODO: change to link list later as domain number may be sparse */
    1.22 +static cpumask_t cpufreq_dom_map[NR_CPUS];
    1.23  
    1.24  enum {
    1.25      UNDEFINED_CAPABLE = 0,
    1.26 @@ -335,7 +332,7 @@ static int acpi_cpufreq_target(struct cp
    1.27      if (unlikely(result))
    1.28          return -ENODEV;
    1.29  
    1.30 -    online_policy_cpus = policy->cpus;
    1.31 +    cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
    1.32  
    1.33      next_perf_state = data->freq_table[next_state].index;
    1.34      if (perf->state == next_perf_state) {
    1.35 @@ -390,6 +387,20 @@ static int acpi_cpufreq_target(struct cp
    1.36      return result;
    1.37  }
    1.38  
    1.39 +static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
    1.40 +{
    1.41 +    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
    1.42 +    struct processor_performance *perf = &processor_pminfo[policy->cpu].perf;
    1.43 +
    1.44 +    if (!policy || !data)
    1.45 +        return -EINVAL;
    1.46 +
    1.47 +    cpufreq_verify_within_limits(policy, 0, 
    1.48 +        perf->states[perf->platform_limit].core_frequency * 1000);
    1.49 +
    1.50 +    return cpufreq_frequency_table_verify(policy, data->freq_table);
    1.51 +}
    1.52 +
    1.53  static unsigned long
    1.54  acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
    1.55  {
    1.56 @@ -441,14 +452,6 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
    1.57      perf = data->acpi_data;
    1.58      policy->shared_type = perf->shared_type;
    1.59  
    1.60 -    /* 
    1.61 -     * Currently the latest linux (kernel version 2.6.26) 
    1.62 -     * still has issue when handle the situation _psd HW_ALL coordination.
    1.63 -     * In Xen hypervisor, we handle _psd HW_ALL coordination in same way as
    1.64 -     * _psd SW_ALL coordination for the seek of safety.
    1.65 -     */
    1.66 -    policy->cpus = perf->shared_cpu_map;
    1.67 -
    1.68      /* capability check */
    1.69      if (perf->state_count <= 1) {
    1.70          printk("No P-States\n");
    1.71 @@ -496,6 +499,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
    1.72              policy->cpuinfo.transition_latency =
    1.73                  perf->states[i].transition_latency * 1000;
    1.74      }
    1.75 +    policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
    1.76  
    1.77      data->max_freq = perf->states[0].core_frequency * 1000;
    1.78      /* table init */
    1.79 @@ -554,114 +558,173 @@ err_unreg:
    1.80      return result;
    1.81  }
    1.82  
    1.83 -static struct cpufreq_driver acpi_cpufreq_driver = {
    1.84 -    .target = acpi_cpufreq_target,
    1.85 -    .init   = acpi_cpufreq_cpu_init,
    1.86 -};
    1.87 -
    1.88 -void cpufreq_dom_exit(void)
    1.89 +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
    1.90  {
    1.91 -    cpufreq_dom_max = 0;
    1.92 -    if (cpufreq_dom_mask)
    1.93 -        xfree(cpufreq_dom_mask);
    1.94 -    if (cpufreq_dom_pt)
    1.95 -        xfree(cpufreq_dom_pt);
    1.96 -}
    1.97 -
    1.98 -int cpufreq_dom_init(void)
    1.99 -{
   1.100 -    unsigned int i;
   1.101 -
   1.102 -    cpufreq_dom_max = 0;
   1.103 +    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
   1.104  
   1.105 -    for_each_online_cpu(i) {
   1.106 -        if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain)
   1.107 -            cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain;
   1.108 +    if (data) {
   1.109 +        drv_data[policy->cpu] = NULL;
   1.110 +        xfree(data->freq_table);
   1.111 +        xfree(data);
   1.112      }
   1.113 -    cpufreq_dom_max++;
   1.114 -
   1.115 -    cpufreq_dom_mask = xmalloc_array(unsigned long,
   1.116 -                                     BITS_TO_LONGS(cpufreq_dom_max));
   1.117 -    if (!cpufreq_dom_mask)
   1.118 -        return -ENOMEM;
   1.119 -    bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max);
   1.120 -
   1.121 -    cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max);
   1.122 -    if (!cpufreq_dom_pt)
   1.123 -        return -ENOMEM;
   1.124 -    memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t));
   1.125 -
   1.126 -    for_each_online_cpu(i) {
   1.127 -        __set_bit(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask);
   1.128 -        cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]);
   1.129 -    }
   1.130 -
   1.131 -    for_each_online_cpu(i)
   1.132 -        processor_pminfo[i].perf.shared_cpu_map =
   1.133 -            cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain];
   1.134  
   1.135      return 0;
   1.136  }
   1.137  
   1.138 -static int cpufreq_cpu_init(void)
   1.139 -{
   1.140 -    int i, ret = 0;
   1.141 -
   1.142 -    for_each_online_cpu(i) {
   1.143 -        xen_px_policy[i].cpu = i;
   1.144 +static struct cpufreq_driver acpi_cpufreq_driver = {
   1.145 +    .verify = acpi_cpufreq_verify,
   1.146 +    .target = acpi_cpufreq_target,
   1.147 +    .init   = acpi_cpufreq_cpu_init,
   1.148 +    .exit   = acpi_cpufreq_cpu_exit,
   1.149 +};
   1.150  
   1.151 -        ret = px_statistic_init(i);
   1.152 -        if (ret)
   1.153 -            return ret;
   1.154 +int cpufreq_limit_change(unsigned int cpu)
   1.155 +{
   1.156 +    struct processor_performance *perf = &processor_pminfo[cpu].perf;
   1.157 +    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
   1.158 +    struct cpufreq_policy policy;
   1.159  
   1.160 -        ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]);
   1.161 +    if (!cpu_online(cpu) || !data)
   1.162 +        return -ENODEV;
   1.163 +
   1.164 +    if ((perf->platform_limit < 0) || 
   1.165 +        (perf->platform_limit >= perf->state_count))
   1.166 +        return -EINVAL;
   1.167 +
   1.168 +    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
   1.169 +
   1.170 +    policy.max =
   1.171 +        perf->states[perf->platform_limit].core_frequency * 1000;
   1.172 +
   1.173 +    return __cpufreq_set_policy(data, &policy);
   1.174 +}
   1.175 +
   1.176 +int cpufreq_add_cpu(unsigned int cpu)
   1.177 +{
   1.178 +    int ret = 0;
   1.179 +    unsigned int firstcpu;
   1.180 +    unsigned int dom;
   1.181 +    unsigned int j;
   1.182 +    struct cpufreq_policy new_policy;
   1.183 +    struct cpufreq_policy *policy;
   1.184 +    struct processor_performance *perf = &processor_pminfo[cpu].perf;
   1.185 +
   1.186 +    /* to protect the case when Px was not controlled by xen */
   1.187 +    if (!(perf->init & XEN_PX_INIT))
   1.188 +        return 0;
   1.189 +
   1.190 +    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
   1.191 +        return -EINVAL;
   1.192 +
   1.193 +    ret = px_statistic_init(cpu);
   1.194 +    if (ret)
   1.195 +        return ret;
   1.196 +
   1.197 +    dom = perf->domain_info.domain;
   1.198 +    if (cpus_weight(cpufreq_dom_map[dom])) {
   1.199 +        /* share policy with the first cpu since on same boat */
   1.200 +        firstcpu = first_cpu(cpufreq_dom_map[dom]);
   1.201 +        policy = cpufreq_cpu_policy[firstcpu];
   1.202 +
   1.203 +        cpufreq_cpu_policy[cpu] = policy;
   1.204 +        cpu_set(cpu, cpufreq_dom_map[dom]);
   1.205 +        cpu_set(cpu, policy->cpus);
   1.206 +
   1.207 +        printk(KERN_EMERG"adding CPU %u\n", cpu);
   1.208 +    } else {
   1.209 +        /* for the first cpu, setup policy and do init work */
   1.210 +        policy = xmalloc(struct cpufreq_policy);
   1.211 +        if (!policy) {
   1.212 +            px_statistic_exit(cpu);
   1.213 +            return -ENOMEM;
   1.214 +        }
   1.215 +        memset(policy, 0, sizeof(struct cpufreq_policy));
   1.216 +
   1.217 +        cpufreq_cpu_policy[cpu] = policy;
   1.218 +        cpu_set(cpu, cpufreq_dom_map[dom]);
   1.219 +        cpu_set(cpu, policy->cpus);
   1.220 +
   1.221 +        policy->cpu = cpu;
   1.222 +        ret = cpufreq_driver->init(policy);
   1.223          if (ret)
   1.224 -            return ret;
   1.225 +            goto err1;
   1.226 +        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
   1.227      }
   1.228 +
   1.229 +    /*
   1.230 +     * After get full cpumap of the coordination domain,
   1.231 +     * we can safely start gov here.
   1.232 +     */
   1.233 +    if (cpus_weight(cpufreq_dom_map[dom]) ==
   1.234 +        perf->domain_info.num_processors) {
   1.235 +        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
   1.236 +        policy->governor = NULL;
   1.237 +        ret = __cpufreq_set_policy(policy, &new_policy);
   1.238 +        if (ret)
   1.239 +            goto err2;
   1.240 +    }
   1.241 +
   1.242 +    return 0;
   1.243 +
   1.244 +err2:
   1.245 +    cpufreq_driver->exit(policy);
   1.246 +err1:
   1.247 +    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
   1.248 +        cpufreq_cpu_policy[j] = NULL;
   1.249 +        px_statistic_exit(j);
   1.250 +    }
   1.251 +
   1.252 +    cpus_clear(cpufreq_dom_map[dom]);
   1.253 +    xfree(policy);
   1.254      return ret;
   1.255  }
   1.256  
   1.257 -int cpufreq_dom_dbs(unsigned int event)
   1.258 +int cpufreq_del_cpu(unsigned int cpu)
   1.259  {
   1.260 -    unsigned int cpu, dom;
   1.261 -    int ret = 0;
   1.262 +    unsigned int dom;
   1.263 +    struct cpufreq_policy *policy;
   1.264 +    struct processor_performance *perf = &processor_pminfo[cpu].perf;
   1.265  
   1.266 -    for (dom = 0; dom < cpufreq_dom_max; dom++) {
   1.267 -        if (!test_bit(dom, cpufreq_dom_mask))
   1.268 -            continue;
   1.269 -        cpu = first_cpu(cpufreq_dom_pt[dom]);
   1.270 -        ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event);
   1.271 -        if (ret)
   1.272 -            return ret;
   1.273 +    /* to protect the case when Px was not controlled by xen */
   1.274 +    if (!(perf->init & XEN_PX_INIT))
   1.275 +        return 0;
   1.276 +
   1.277 +    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
   1.278 +        return -EINVAL;
   1.279 +
   1.280 +    dom = perf->domain_info.domain;
   1.281 +    policy = cpufreq_cpu_policy[cpu];
   1.282 +
   1.283 +    printk(KERN_EMERG"deleting CPU %u\n", cpu);
   1.284 +
   1.285 +    /* for the first cpu of the domain, stop gov */
   1.286 +    if (cpus_weight(cpufreq_dom_map[dom]) ==
   1.287 +        perf->domain_info.num_processors)
   1.288 +        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
   1.289 +
   1.290 +    cpufreq_cpu_policy[cpu] = NULL;
   1.291 +    cpu_clear(cpu, policy->cpus);
   1.292 +    cpu_clear(cpu, cpufreq_dom_map[dom]);
   1.293 +    px_statistic_exit(cpu);
   1.294 +
   1.295 +    /* for the last cpu of the domain, clean room */
   1.296 +    /* It's safe here to free freq_table, drv_data and policy */
   1.297 +    if (!cpus_weight(cpufreq_dom_map[dom])) {
   1.298 +        cpufreq_driver->exit(policy);
   1.299 +        xfree(policy);
   1.300      }
   1.301 -    return ret;
   1.302 +
   1.303 +    return 0;
   1.304  }
   1.305  
   1.306 -int acpi_cpufreq_init(void)
   1.307 +static int __init cpufreq_driver_init(void)
   1.308  {
   1.309      int ret = 0;
   1.310 -    
   1.311 -    /* setup cpumask of psd dom and shared cpu map of cpu */
   1.312 -    ret = cpufreq_dom_init();
   1.313 -    if (ret)
   1.314 -        goto err;
   1.315 -
   1.316 -    /* setup cpufreq driver */
   1.317 -    cpufreq_driver = &acpi_cpufreq_driver;
   1.318  
   1.319 -    /* setup cpufreq infrastructure */
   1.320 -    ret = cpufreq_cpu_init();
   1.321 -    if (ret)
   1.322 -        goto err;
   1.323 -
   1.324 -    /* setup cpufreq dbs according to dom coordiation */
   1.325 -    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
   1.326 -    if (ret)
   1.327 -        goto err;
   1.328 +    if ((cpufreq_controller == FREQCTL_xen) &&
   1.329 +        (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
   1.330 +        ret = cpufreq_register_driver(&acpi_cpufreq_driver);
   1.331  
   1.332      return ret;
   1.333 -
   1.334 -err:
   1.335 -    cpufreq_dom_exit();
   1.336 -    return ret;
   1.337  }
   1.338 +__initcall(cpufreq_driver_init);
     2.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c	Thu Sep 11 18:00:06 2008 +0100
     2.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c	Fri Sep 12 10:34:50 2008 +0100
     2.3 @@ -238,4 +238,9 @@ int cpufreq_governor_dbs(struct cpufreq_
     2.4          break;
     2.5      }
     2.6      return 0;
     2.7 -} 
     2.8 +}
     2.9 +
    2.10 +struct cpufreq_governor cpufreq_gov_dbs = {
    2.11 +    .name = "ondemand",
    2.12 +    .governor = cpufreq_governor_dbs,
    2.13 +};
     3.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c	Thu Sep 11 18:00:06 2008 +0100
     3.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c	Fri Sep 12 10:34:50 2008 +0100
     3.3 @@ -50,7 +50,7 @@
     3.4  #define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
     3.5  
     3.6  extern struct processor_pminfo processor_pminfo[NR_CPUS];
     3.7 -extern struct cpufreq_policy xen_px_policy[NR_CPUS];
     3.8 +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
     3.9  
    3.10  struct powernow_cpufreq_data {
    3.11      struct processor_performance *acpi_data;
    3.12 @@ -281,9 +281,9 @@ int powernow_cpufreq_init(void)
    3.13  
    3.14      /* setup cpufreq infrastructure */
    3.15      for_each_online_cpu(i) {
    3.16 -        xen_px_policy[i].cpu = i;
    3.17 +        cpufreq_cpu_policy[i]->cpu = i;
    3.18  
    3.19 -        ret = powernow_cpufreq_cpu_init(&xen_px_policy[i]);
    3.20 +        ret = powernow_cpufreq_cpu_init(cpufreq_cpu_policy[i]);
    3.21          if (ret)
    3.22              goto cpufreq_init_out;
    3.23      }
    3.24 @@ -293,7 +293,7 @@ int powernow_cpufreq_init(void)
    3.25          if (!cpu_isset(dom, dom_mask))
    3.26              continue;
    3.27          i = first_cpu(pt[dom]);
    3.28 -        ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START);
    3.29 +        ret = cpufreq_governor_dbs(cpufreq_cpu_policy[i], CPUFREQ_GOV_START);
    3.30          if (ret)
    3.31              goto cpufreq_init_out;
    3.32      }
     4.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c	Thu Sep 11 18:00:06 2008 +0100
     4.2 +++ b/xen/arch/x86/acpi/cpufreq/utility.c	Fri Sep 12 10:34:50 2008 +0100
     4.3 @@ -31,47 +31,14 @@
     4.4  #include <acpi/cpufreq/cpufreq.h>
     4.5  #include <public/sysctl.h>
     4.6  
     4.7 -struct cpufreq_driver *cpufreq_driver;
     4.8 +struct cpufreq_driver   *cpufreq_driver;
     4.9 +struct processor_pminfo processor_pminfo[NR_CPUS];
    4.10 +struct cpufreq_policy   *cpufreq_cpu_policy[NR_CPUS];
    4.11  
    4.12  /*********************************************************************
    4.13   *                    Px STATISTIC INFO                              *
    4.14   *********************************************************************/
    4.15  
    4.16 -void px_statistic_suspend(void)
    4.17 -{
    4.18 -    int cpu;
    4.19 -    uint64_t now;
    4.20 -
    4.21 -    now = NOW();
    4.22 -
    4.23 -    for_each_online_cpu(cpu) {
    4.24 -        struct pm_px *pxpt = &px_statistic_data[cpu];
    4.25 -        uint64_t total_idle_ns;
    4.26 -        uint64_t tmp_idle_ns;
    4.27 -
    4.28 -        total_idle_ns = get_cpu_idle_time(cpu);
    4.29 -        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
    4.30 -
    4.31 -        pxpt->u.pt[pxpt->u.cur].residency +=
    4.32 -                    now - pxpt->prev_state_wall;
    4.33 -        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
    4.34 -    }
    4.35 -}
    4.36 -
    4.37 -void px_statistic_resume(void)
    4.38 -{
    4.39 -    int cpu;
    4.40 -    uint64_t now;
    4.41 -
    4.42 -    now = NOW();
    4.43 -
    4.44 -    for_each_online_cpu(cpu) {
    4.45 -        struct pm_px *pxpt = &px_statistic_data[cpu];
    4.46 -        pxpt->prev_state_wall = now;
    4.47 -        pxpt->prev_idle_wall = get_cpu_idle_time(cpu);
    4.48 -    }
    4.49 -}
    4.50 -
    4.51  void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
    4.52  {
    4.53      uint32_t i;
    4.54 @@ -101,7 +68,7 @@ void px_statistic_update(cpumask_t cpuma
    4.55      }
    4.56  }
    4.57  
    4.58 -int px_statistic_init(int cpuid)
    4.59 +int px_statistic_init(unsigned int cpuid)
    4.60  {
    4.61      uint32_t i, count;
    4.62      struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.63 @@ -123,7 +90,7 @@ int px_statistic_init(int cpuid)
    4.64      memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
    4.65  
    4.66      pxpt->u.total = pmpt->perf.state_count;
    4.67 -    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
    4.68 +    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
    4.69  
    4.70      for (i=0; i < pmpt->perf.state_count; i++)
    4.71          pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
    4.72 @@ -134,7 +101,16 @@ int px_statistic_init(int cpuid)
    4.73      return 0;
    4.74  }
    4.75  
    4.76 -void px_statistic_reset(int cpuid)
    4.77 +void px_statistic_exit(unsigned int cpuid)
    4.78 +{
    4.79 +    struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.80 +
    4.81 +    xfree(pxpt->u.trans_pt);
    4.82 +    xfree(pxpt->u.pt);
    4.83 +    memset(pxpt, 0, sizeof(struct pm_px));
    4.84 +}
    4.85 +
    4.86 +void px_statistic_reset(unsigned int cpuid)
    4.87  {
    4.88      uint32_t i, j, count;
    4.89      struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.90 @@ -184,6 +160,38 @@ int cpufreq_frequency_table_cpuinfo(stru
    4.91          return 0;
    4.92  }
    4.93  
    4.94 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
    4.95 +                                   struct cpufreq_frequency_table *table)
    4.96 +{
    4.97 +    unsigned int next_larger = ~0;
    4.98 +    unsigned int i;
    4.99 +    unsigned int count = 0;
   4.100 +
   4.101 +    if (!cpu_online(policy->cpu))
   4.102 +        return -EINVAL;
   4.103 +
   4.104 +    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
   4.105 +                                 policy->cpuinfo.max_freq);
   4.106 +
   4.107 +    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
   4.108 +        unsigned int freq = table[i].frequency;
   4.109 +        if (freq == CPUFREQ_ENTRY_INVALID)
   4.110 +            continue;
   4.111 +        if ((freq >= policy->min) && (freq <= policy->max))
   4.112 +            count++;
   4.113 +        else if ((next_larger > freq) && (freq > policy->max))
   4.114 +            next_larger = freq;
   4.115 +    }
   4.116 +
   4.117 +    if (!count)
   4.118 +        policy->max = next_larger;
   4.119 +
   4.120 +    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
   4.121 +                                 policy->cpuinfo.max_freq);
   4.122 +
   4.123 +    return 0;
   4.124 +}
   4.125 +
   4.126  int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
   4.127                                     struct cpufreq_frequency_table *table,
   4.128                                     unsigned int target_freq,
   4.129 @@ -289,57 +297,51 @@ int __cpufreq_driver_getavg(struct cpufr
   4.130  
   4.131  
   4.132  /*********************************************************************
   4.133 - *               CPUFREQ SUSPEND/RESUME                              *
   4.134 + *                 POLICY                                            *
   4.135   *********************************************************************/
   4.136  
   4.137 -void cpufreq_suspend(void)
   4.138 +/*
   4.139 + * data   : current policy.
   4.140 + * policy : policy to be set.
   4.141 + */
   4.142 +int __cpufreq_set_policy(struct cpufreq_policy *data,
   4.143 +                                struct cpufreq_policy *policy)
   4.144  {
   4.145 -    int cpu;
   4.146 +    int ret = 0;
   4.147  
   4.148 -    /* to protect the case when Px was not controlled by xen */
   4.149 -    for_each_online_cpu(cpu) {
   4.150 -        struct processor_performance *perf = &processor_pminfo[cpu].perf;
   4.151 +    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
   4.152  
   4.153 -        if (!(perf->init & XEN_PX_INIT))
   4.154 -            return;
   4.155 +    if (policy->min > data->min && policy->min > policy->max)
   4.156 +        return -EINVAL;
   4.157 +
   4.158 +    /* verify the cpu speed can be set within this limit */
   4.159 +    ret = cpufreq_driver->verify(policy);
   4.160 +    if (ret)
   4.161 +        return ret;
   4.162 +
   4.163 +    data->min = policy->min;
   4.164 +    data->max = policy->max;
   4.165 +
   4.166 +    if (policy->governor != data->governor) {
   4.167 +        /* save old, working values */
   4.168 +        struct cpufreq_governor *old_gov = data->governor;
   4.169 +
   4.170 +        /* end old governor */
   4.171 +        if (data->governor)
   4.172 +            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
   4.173 +
   4.174 +        /* start new governor */
   4.175 +        data->governor = policy->governor;
   4.176 +        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
   4.177 +            /* new governor failed, so re-start old one */
   4.178 +            if (old_gov) {
   4.179 +                data->governor = old_gov;
   4.180 +                __cpufreq_governor(data, CPUFREQ_GOV_START);
   4.181 +            }
   4.182 +            return -EINVAL;
   4.183 +        }
   4.184 +        /* might be a policy change, too, so fall through */
   4.185      }
   4.186  
   4.187 -    cpufreq_dom_dbs(CPUFREQ_GOV_STOP);
   4.188 -
   4.189 -    cpufreq_dom_exit();
   4.190 -
   4.191 -    px_statistic_suspend();
   4.192 +    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
   4.193  }
   4.194 -
   4.195 -int cpufreq_resume(void)
   4.196 -{
   4.197 -    int cpu, ret = 0;
   4.198 -
   4.199 -    /* 1. to protect the case when Px was not controlled by xen */
   4.200 -    /* 2. set state and resume flag to sync cpu to right state and freq */
   4.201 -    for_each_online_cpu(cpu) {
   4.202 -        struct processor_performance *perf = &processor_pminfo[cpu].perf;
   4.203 -        struct cpufreq_policy *policy = &xen_px_policy[cpu];
   4.204 -
   4.205 -        if (!(perf->init & XEN_PX_INIT))
   4.206 -            goto err;
   4.207 -        perf->state = 0;
   4.208 -        policy->resume = 1;
   4.209 -    }
   4.210 -
   4.211 -    px_statistic_resume();
   4.212 -
   4.213 -    ret = cpufreq_dom_init();
   4.214 -    if (ret)
   4.215 -        goto err;
   4.216 -
   4.217 -    ret = cpufreq_dom_dbs(CPUFREQ_GOV_START);
   4.218 -    if (ret)
   4.219 -        goto err;
   4.220 -
   4.221 -    return ret;
   4.222 -
   4.223 -err:
   4.224 -    cpufreq_dom_exit();
   4.225 -    return ret;
   4.226 -}
     5.1 --- a/xen/arch/x86/acpi/pmstat.c	Thu Sep 11 18:00:06 2008 +0100
     5.2 +++ b/xen/arch/x86/acpi/pmstat.c	Fri Sep 12 10:34:50 2008 +0100
     5.3 @@ -78,7 +78,7 @@ int do_get_pm_info(struct xen_sysctl_get
     5.4          tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
     5.5  
     5.6          now = NOW();
     5.7 -        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
     5.8 +        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
     5.9          pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
    5.10          pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
    5.11          pxpt->prev_state_wall = now;
     6.1 --- a/xen/arch/x86/acpi/power.c	Thu Sep 11 18:00:06 2008 +0100
     6.2 +++ b/xen/arch/x86/acpi/power.c	Fri Sep 12 10:34:50 2008 +0100
     6.3 @@ -133,8 +133,6 @@ static int enter_state(u32 state)
     6.4  
     6.5      freeze_domains();
     6.6  
     6.7 -    cpufreq_suspend();
     6.8 -
     6.9      disable_nonboot_cpus();
    6.10      if ( num_online_cpus() != 1 )
    6.11      {
    6.12 @@ -142,6 +140,8 @@ static int enter_state(u32 state)
    6.13          goto enable_cpu;
    6.14      }
    6.15  
    6.16 +    cpufreq_del_cpu(0);
    6.17 +
    6.18      hvm_cpu_down();
    6.19  
    6.20      acpi_sleep_prepare(state);
    6.21 @@ -189,8 +189,8 @@ static int enter_state(u32 state)
    6.22          BUG();
    6.23  
    6.24   enable_cpu:
    6.25 +    cpufreq_add_cpu(0);
    6.26      enable_nonboot_cpus();
    6.27 -    cpufreq_resume();
    6.28      thaw_domains();
    6.29      spin_unlock(&pm_lock);
    6.30      return error;
     7.1 --- a/xen/arch/x86/platform_hypercall.c	Thu Sep 11 18:00:06 2008 +0100
     7.2 +++ b/xen/arch/x86/platform_hypercall.c	Fri Sep 12 10:34:50 2008 +0100
     7.3 @@ -393,7 +393,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     7.4                  memcpy ((void *)&pxpt->status_register,
     7.5                      (void *)&xenpxpt->status_register,
     7.6                      sizeof(struct xen_pct_register));
     7.7 -                pxpt->init |= XEN_PX_PCT;
     7.8              }
     7.9              if ( xenpxpt->flags & XEN_PX_PSS ) 
    7.10              {
    7.11 @@ -411,7 +410,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
    7.12                      break;
    7.13                  }
    7.14                  pxpt->state_count = xenpxpt->state_count;
    7.15 -                pxpt->init |= XEN_PX_PSS;
    7.16              }
    7.17              if ( xenpxpt->flags & XEN_PX_PSD )
    7.18              {
    7.19 @@ -419,27 +417,34 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
    7.20                  memcpy ((void *)&pxpt->domain_info,
    7.21                      (void *)&xenpxpt->domain_info,
    7.22                      sizeof(struct xen_psd_package));
    7.23 -                pxpt->init |= XEN_PX_PSD;
    7.24              }
    7.25              if ( xenpxpt->flags & XEN_PX_PPC )
    7.26              {
    7.27 -                pxpt->ppc = xenpxpt->ppc;
    7.28 -                pxpt->init |= XEN_PX_PPC;
    7.29 +                pxpt->platform_limit = xenpxpt->platform_limit;
    7.30 +
    7.31 +                if ( pxpt->init == XEN_PX_INIT )
    7.32 +                {
    7.33 +                    ret = cpufreq_limit_change(cpuid);
    7.34 +                    break;
    7.35 +                }
    7.36              }
    7.37  
    7.38 -            if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS |
    7.39 -                                 XEN_PX_PSD | XEN_PX_PPC ) )
    7.40 +            if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS |
    7.41 +                                     XEN_PX_PSD | XEN_PX_PPC ) )
    7.42              {
    7.43 -                pxpt->init |= XEN_PX_INIT;
    7.44 +                pxpt->init = XEN_PX_INIT;
    7.45                  cpu_count++;
    7.46 -            }
    7.47 -            if ( cpu_count == num_online_cpus() )
    7.48 -            {
    7.49 -                if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
    7.50 +
    7.51 +                /* Currently we only handle Intel and AMD processor */
    7.52 +                if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
    7.53 +                    ret = cpufreq_add_cpu(cpuid);
    7.54 +                else if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
    7.55 +                    (cpu_count == num_online_cpus()) )
    7.56                      ret = powernow_cpufreq_init();
    7.57                  else
    7.58 -                    ret = acpi_cpufreq_init();
    7.59 +                    break;
    7.60              }
    7.61 +
    7.62              break;
    7.63          }
    7.64   
     8.1 --- a/xen/arch/x86/smpboot.c	Thu Sep 11 18:00:06 2008 +0100
     8.2 +++ b/xen/arch/x86/smpboot.c	Fri Sep 12 10:34:50 2008 +0100
     8.3 @@ -55,6 +55,7 @@
     8.4  #include <mach_wakecpu.h>
     8.5  #include <smpboot_hooks.h>
     8.6  #include <xen/stop_machine.h>
     8.7 +#include <acpi/cpufreq/processor_perf.h>
     8.8  
     8.9  #define set_kernel_exec(x, y) (0)
    8.10  #define setup_trampoline()    (bootsym_phys(trampoline_realmode_entry))
    8.11 @@ -1232,6 +1233,8 @@ int __cpu_disable(void)
    8.12  	mdelay(1);
    8.13  	local_irq_disable();
    8.14  
    8.15 +	cpufreq_del_cpu(cpu);
    8.16 +
    8.17  	time_suspend();
    8.18  
    8.19  	remove_siblinginfo(cpu);
    8.20 @@ -1421,6 +1424,8 @@ int __devinit __cpu_up(unsigned int cpu)
    8.21  		mb();
    8.22  		process_pending_timers();
    8.23  	}
    8.24 +
    8.25 +	cpufreq_add_cpu(cpu);
    8.26  	return 0;
    8.27  }
    8.28  
     9.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Thu Sep 11 18:00:06 2008 +0100
     9.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Sep 12 10:34:50 2008 +0100
     9.3 @@ -19,6 +19,8 @@
     9.4  
     9.5  #define CPUFREQ_NAME_LEN 16
     9.6  
     9.7 +struct cpufreq_governor;
     9.8 +
     9.9  struct cpufreq_cpuinfo {
    9.10      unsigned int        max_freq;
    9.11      unsigned int        min_freq;
    9.12 @@ -30,16 +32,21 @@ struct cpufreq_policy {
    9.13      unsigned int        shared_type;   /* ANY or ALL affected CPUs
    9.14                                            should set cpufreq */
    9.15      unsigned int        cpu;           /* cpu nr of registered CPU */
    9.16 -    struct cpufreq_cpuinfo    cpuinfo; /* see above */
    9.17 +    struct cpufreq_cpuinfo    cpuinfo;
    9.18  
    9.19      unsigned int        min;    /* in kHz */
    9.20      unsigned int        max;    /* in kHz */
    9.21      unsigned int        cur;    /* in kHz, only needed if cpufreq
    9.22                                   * governors are used */
    9.23 +    struct cpufreq_governor     *governor;
    9.24 +
    9.25      unsigned int        resume; /* flag for cpufreq 1st run
    9.26                                   * S3 wakeup, hotplug cpu, etc */
    9.27  };
    9.28 -extern struct cpufreq_policy xen_px_policy[NR_CPUS];
    9.29 +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
    9.30 +
    9.31 +extern int __cpufreq_set_policy(struct cpufreq_policy *data,
    9.32 +                                struct cpufreq_policy *policy);
    9.33  
    9.34  #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
    9.35  #define CPUFREQ_SHARED_TYPE_HW   (1) /* HW does needed coordination */
    9.36 @@ -64,12 +71,27 @@ struct cpufreq_freqs {
    9.37  #define CPUFREQ_GOV_STOP   2
    9.38  #define CPUFREQ_GOV_LIMITS 3
    9.39  
    9.40 +struct cpufreq_governor {
    9.41 +    char    name[CPUFREQ_NAME_LEN];
    9.42 +    int     (*governor)(struct cpufreq_policy *policy,
    9.43 +                        unsigned int event);
    9.44 +};
    9.45 +
    9.46 +extern struct cpufreq_governor cpufreq_gov_dbs;
    9.47 +#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs
    9.48 +
    9.49  /* pass a target to the cpufreq driver */
    9.50  extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
    9.51                                     unsigned int target_freq,
    9.52                                     unsigned int relation);
    9.53  extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
    9.54  
    9.55 +static __inline__ int 
    9.56 +__cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
    9.57 +{
    9.58 +    return policy->governor->governor(policy, event);
    9.59 +}
    9.60 +
    9.61  
    9.62  /*********************************************************************
    9.63   *                      CPUFREQ DRIVER INTERFACE                     *
    9.64 @@ -91,7 +113,50 @@ struct cpufreq_driver {
    9.65  
    9.66  extern struct cpufreq_driver *cpufreq_driver;
    9.67  
    9.68 -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state);
    9.69 +static __inline__ 
    9.70 +int cpufreq_register_driver(struct cpufreq_driver *driver_data)
    9.71 +{
    9.72 +    if (!driver_data         || 
    9.73 +        !driver_data->init   || 
    9.74 +        !driver_data->exit   || 
    9.75 +        !driver_data->verify || 
    9.76 +        !driver_data->target)
    9.77 +        return -EINVAL;
    9.78 +
    9.79 +    if (cpufreq_driver)
    9.80 +        return -EBUSY;
    9.81 +
    9.82 +    cpufreq_driver = driver_data;
    9.83 +    return 0;
    9.84 +}
    9.85 +
    9.86 +static __inline__ 
    9.87 +int cpufreq_unregister_driver(struct cpufreq_driver *driver)
    9.88 +{
    9.89 +    if (!cpufreq_driver || (driver != cpufreq_driver))
    9.90 +        return -EINVAL;
    9.91 +
    9.92 +    cpufreq_driver = NULL;
    9.93 +    return 0;
    9.94 +}
    9.95 +
    9.96 +static __inline__
    9.97 +void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
    9.98 +                                  unsigned int min, unsigned int max)
    9.99 +{
   9.100 +    if (policy->min < min)
   9.101 +        policy->min = min;
   9.102 +    if (policy->max < min)
   9.103 +        policy->max = min;
   9.104 +    if (policy->min > max)
   9.105 +        policy->min = max;
   9.106 +    if (policy->max > max)
   9.107 +        policy->max = max;
   9.108 +    if (policy->min > policy->max)
   9.109 +        policy->min = policy->max;
   9.110 +    return;
   9.111 +}
   9.112 +
   9.113  
   9.114  /*********************************************************************
   9.115   *                     FREQUENCY TABLE HELPERS                       *
   9.116 @@ -109,6 +174,9 @@ struct cpufreq_frequency_table {
   9.117  int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
   9.118                     struct cpufreq_frequency_table *table);
   9.119  
   9.120 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
   9.121 +                   struct cpufreq_frequency_table *table);
   9.122 +
   9.123  int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
   9.124                     struct cpufreq_frequency_table *table,
   9.125                     unsigned int target_freq,
    10.1 --- a/xen/include/acpi/cpufreq/processor_perf.h	Thu Sep 11 18:00:06 2008 +0100
    10.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h	Fri Sep 12 10:34:50 2008 +0100
    10.3 @@ -7,26 +7,23 @@
    10.4  #define XEN_PX_INIT 0x80000000
    10.5  
    10.6  int get_cpu_id(u8);
    10.7 -int acpi_cpufreq_init(void);
    10.8  int powernow_cpufreq_init(void);
    10.9  
   10.10  void px_statistic_update(cpumask_t, uint8_t, uint8_t);
   10.11 -int  px_statistic_init(int);
   10.12 -void px_statistic_reset(int);
   10.13 -void px_statistic_suspend(void);
   10.14 -void px_statistic_resume(void);
   10.15 +int  px_statistic_init(unsigned int);
   10.16 +void px_statistic_exit(unsigned int);
   10.17 +void px_statistic_reset(unsigned int);
   10.18  
   10.19 -void cpufreq_dom_exit(void);
   10.20 -int  cpufreq_dom_init(void);
   10.21 -int  cpufreq_dom_dbs(unsigned int);
   10.22 -void cpufreq_suspend(void);
   10.23 -int  cpufreq_resume(void);
   10.24 +int  cpufreq_limit_change(unsigned int);
   10.25 +
   10.26 +int  cpufreq_add_cpu(unsigned int);
   10.27 +int  cpufreq_del_cpu(unsigned int);
   10.28  
   10.29  uint64_t get_cpu_idle_time(unsigned int);
   10.30  
   10.31  struct processor_performance {
   10.32      uint32_t state;
   10.33 -    uint32_t ppc;
   10.34 +    uint32_t platform_limit;
   10.35      struct xen_pct_register control_register;
   10.36      struct xen_pct_register status_register;
   10.37      uint32_t state_count;
    11.1 --- a/xen/include/public/platform.h	Thu Sep 11 18:00:06 2008 +0100
    11.2 +++ b/xen/include/public/platform.h	Fri Sep 12 10:34:50 2008 +0100
    11.3 @@ -289,7 +289,7 @@ struct xen_psd_package {
    11.4  
    11.5  struct xen_processor_performance {
    11.6      uint32_t flags;     /* flag for Px sub info type */
    11.7 -    uint32_t ppc;       /* Platform limitation on freq usage */
    11.8 +    uint32_t platform_limit;  /* Platform limitation on freq usage */
    11.9      struct xen_pct_register control_register;
   11.10      struct xen_pct_register status_register;
   11.11      uint32_t state_count;     /* total available performance states */