ia64/xen-unstable

changeset 18553:08374be21318

X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
arch

Rebase cpufreq logic for supporting both x86 and ia64 arch:
1. move cpufreq arch-independent logic into common dir
(xen/drivers/acpi
and xen/drivers/cpufreq dir);
2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir;

Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Sep 26 14:04:38 2008 +0100 (2008-09-26)
parents 5274aa966231
children d1d9915041de
files xen/arch/x86/acpi/Makefile xen/arch/x86/acpi/cpufreq/Makefile xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/pmstat.c xen/drivers/Makefile xen/drivers/acpi/Makefile xen/drivers/acpi/pmstat.c xen/drivers/cpufreq/Makefile xen/drivers/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/drivers/cpufreq/utility.c xen/include/acpi/cpufreq/cpufreq.h xen/include/acpi/cpufreq/processor_perf.h
line diff
     1.1 --- a/xen/arch/x86/acpi/Makefile	Fri Sep 26 11:12:29 2008 +0100
     1.2 +++ b/xen/arch/x86/acpi/Makefile	Fri Sep 26 14:04:38 2008 +0100
     1.3 @@ -2,4 +2,3 @@ subdir-y += cpufreq
     1.4  
     1.5  obj-y += boot.o
     1.6  obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
     1.7 -obj-y += pmstat.o
     2.1 --- a/xen/arch/x86/acpi/cpufreq/Makefile	Fri Sep 26 11:12:29 2008 +0100
     2.2 +++ b/xen/arch/x86/acpi/cpufreq/Makefile	Fri Sep 26 14:04:38 2008 +0100
     2.3 @@ -1,4 +1,2 @@
     2.4  obj-y += cpufreq.o
     2.5 -obj-y += utility.o
     2.6 -obj-y += cpufreq_ondemand.o
     2.7  obj-y += powernow.o
     3.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 26 11:12:29 2008 +0100
     3.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Sep 26 14:04:38 2008 +0100
     3.3 @@ -45,9 +45,6 @@
     3.4  #include <acpi/acpi.h>
     3.5  #include <acpi/cpufreq/cpufreq.h>
     3.6  
     3.7 -/* TODO: change to link list later as domain number may be sparse */
     3.8 -static cpumask_t cpufreq_dom_map[NR_CPUS];
     3.9 -
    3.10  enum {
    3.11      UNDEFINED_CAPABLE = 0,
    3.12      SYSTEM_INTEL_MSR_CAPABLE,
    3.13 @@ -57,13 +54,6 @@ enum {
    3.14  #define INTEL_MSR_RANGE         (0xffff)
    3.15  #define CPUID_6_ECX_APERFMPERF_CAPABILITY       (0x1)
    3.16  
    3.17 -struct acpi_cpufreq_data {
    3.18 -    struct processor_performance *acpi_data;
    3.19 -    struct cpufreq_frequency_table *freq_table;
    3.20 -    unsigned int max_freq;
    3.21 -    unsigned int cpu_feature;
    3.22 -};
    3.23 -
    3.24  static struct acpi_cpufreq_data *drv_data[NR_CPUS];
    3.25  
    3.26  static struct cpufreq_driver acpi_cpufreq_driver;
    3.27 @@ -342,7 +332,7 @@ static int acpi_cpufreq_target(struct cp
    3.28              policy->resume = 0;
    3.29          }
    3.30          else {
    3.31 -            printk(KERN_INFO "Already at target state (P%d)\n", 
    3.32 +            printk(KERN_DEBUG "Already at target state (P%d)\n", 
    3.33                  next_perf_state);
    3.34              return 0;
    3.35          }
    3.36 @@ -379,7 +369,7 @@ static int acpi_cpufreq_target(struct cp
    3.37      if (!check_freqs(cmd.mask, freqs.new, data))
    3.38          return -EAGAIN;
    3.39  
    3.40 -    px_statistic_update(cmd.mask, perf->state, next_perf_state);
    3.41 +    cpufreq_statistic_update(cmd.mask, perf->state, next_perf_state);
    3.42  
    3.43      perf->state = next_perf_state;
    3.44      policy->cur = freqs.new;
    3.45 @@ -581,145 +571,6 @@ static struct cpufreq_driver acpi_cpufre
    3.46      .exit   = acpi_cpufreq_cpu_exit,
    3.47  };
    3.48  
    3.49 -int cpufreq_limit_change(unsigned int cpu)
    3.50 -{
    3.51 -    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
    3.52 -    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
    3.53 -    struct cpufreq_policy policy;
    3.54 -
    3.55 -    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
    3.56 -        return -ENODEV;
    3.57 -
    3.58 -    if ((perf->platform_limit < 0) || 
    3.59 -        (perf->platform_limit >= perf->state_count))
    3.60 -        return -EINVAL;
    3.61 -
    3.62 -    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
    3.63 -
    3.64 -    policy.max =
    3.65 -        perf->states[perf->platform_limit].core_frequency * 1000;
    3.66 -
    3.67 -    return __cpufreq_set_policy(data, &policy);
    3.68 -}
    3.69 -
    3.70 -int cpufreq_add_cpu(unsigned int cpu)
    3.71 -{
    3.72 -    int ret = 0;
    3.73 -    unsigned int firstcpu;
    3.74 -    unsigned int dom;
    3.75 -    unsigned int j;
    3.76 -    struct cpufreq_policy new_policy;
    3.77 -    struct cpufreq_policy *policy;
    3.78 -    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
    3.79 -
    3.80 -    /* to protect the case when Px was not controlled by xen */
    3.81 -    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
    3.82 -        return 0;
    3.83 -
    3.84 -    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
    3.85 -        return -EINVAL;
    3.86 -
    3.87 -    ret = px_statistic_init(cpu);
    3.88 -    if (ret)
    3.89 -        return ret;
    3.90 -
    3.91 -    dom = perf->domain_info.domain;
    3.92 -    if (cpus_weight(cpufreq_dom_map[dom])) {
    3.93 -        /* share policy with the first cpu since on same boat */
    3.94 -        firstcpu = first_cpu(cpufreq_dom_map[dom]);
    3.95 -        policy = cpufreq_cpu_policy[firstcpu];
    3.96 -
    3.97 -        cpufreq_cpu_policy[cpu] = policy;
    3.98 -        cpu_set(cpu, cpufreq_dom_map[dom]);
    3.99 -        cpu_set(cpu, policy->cpus);
   3.100 -
   3.101 -        printk(KERN_EMERG"adding CPU %u\n", cpu);
   3.102 -    } else {
   3.103 -        /* for the first cpu, setup policy and do init work */
   3.104 -        policy = xmalloc(struct cpufreq_policy);
   3.105 -        if (!policy) {
   3.106 -            px_statistic_exit(cpu);
   3.107 -            return -ENOMEM;
   3.108 -        }
   3.109 -        memset(policy, 0, sizeof(struct cpufreq_policy));
   3.110 -
   3.111 -        cpufreq_cpu_policy[cpu] = policy;
   3.112 -        cpu_set(cpu, cpufreq_dom_map[dom]);
   3.113 -        cpu_set(cpu, policy->cpus);
   3.114 -
   3.115 -        policy->cpu = cpu;
   3.116 -        ret = cpufreq_driver->init(policy);
   3.117 -        if (ret)
   3.118 -            goto err1;
   3.119 -        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
   3.120 -    }
   3.121 -
   3.122 -    /*
   3.123 -     * After get full cpumap of the coordination domain,
   3.124 -     * we can safely start gov here.
   3.125 -     */
   3.126 -    if (cpus_weight(cpufreq_dom_map[dom]) ==
   3.127 -        perf->domain_info.num_processors) {
   3.128 -        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
   3.129 -        policy->governor = NULL;
   3.130 -        ret = __cpufreq_set_policy(policy, &new_policy);
   3.131 -        if (ret)
   3.132 -            goto err2;
   3.133 -    }
   3.134 -
   3.135 -    return 0;
   3.136 -
   3.137 -err2:
   3.138 -    cpufreq_driver->exit(policy);
   3.139 -err1:
   3.140 -    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
   3.141 -        cpufreq_cpu_policy[j] = NULL;
   3.142 -        px_statistic_exit(j);
   3.143 -    }
   3.144 -
   3.145 -    cpus_clear(cpufreq_dom_map[dom]);
   3.146 -    xfree(policy);
   3.147 -    return ret;
   3.148 -}
   3.149 -
   3.150 -int cpufreq_del_cpu(unsigned int cpu)
   3.151 -{
   3.152 -    unsigned int dom;
   3.153 -    struct cpufreq_policy *policy;
   3.154 -    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
   3.155 -
   3.156 -    /* to protect the case when Px was not controlled by xen */
   3.157 -    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
   3.158 -        return 0;
   3.159 -
   3.160 -    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
   3.161 -        return -EINVAL;
   3.162 -
   3.163 -    dom = perf->domain_info.domain;
   3.164 -    policy = cpufreq_cpu_policy[cpu];
   3.165 -
   3.166 -    printk(KERN_EMERG"deleting CPU %u\n", cpu);
   3.167 -
   3.168 -    /* for the first cpu of the domain, stop gov */
   3.169 -    if (cpus_weight(cpufreq_dom_map[dom]) ==
   3.170 -        perf->domain_info.num_processors)
   3.171 -        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
   3.172 -
   3.173 -    cpufreq_cpu_policy[cpu] = NULL;
   3.174 -    cpu_clear(cpu, policy->cpus);
   3.175 -    cpu_clear(cpu, cpufreq_dom_map[dom]);
   3.176 -    px_statistic_exit(cpu);
   3.177 -
   3.178 -    /* for the last cpu of the domain, clean room */
   3.179 -    /* It's safe here to free freq_table, drv_data and policy */
   3.180 -    if (!cpus_weight(cpufreq_dom_map[dom])) {
   3.181 -        cpufreq_driver->exit(policy);
   3.182 -        xfree(policy);
   3.183 -    }
   3.184 -
   3.185 -    return 0;
   3.186 -}
   3.187 -
   3.188  static int __init cpufreq_driver_init(void)
   3.189  {
   3.190      int ret = 0;
     4.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c	Fri Sep 26 11:12:29 2008 +0100
     4.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.3 @@ -1,246 +0,0 @@
     4.4 -/*
     4.5 - *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
     4.6 - *
     4.7 - *  Copyright (C)  2001 Russell King
     4.8 - *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
     4.9 - *                      Jun Nakajima <jun.nakajima@intel.com>
    4.10 - *             Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
    4.11 - *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
    4.12 - *
    4.13 - * This program is free software; you can redistribute it and/or modify
    4.14 - * it under the terms of the GNU General Public License version 2 as
    4.15 - * published by the Free Software Foundation.
    4.16 - */
    4.17 -
    4.18 -#include <xen/types.h>
    4.19 -#include <xen/percpu.h>
    4.20 -#include <xen/cpumask.h>
    4.21 -#include <xen/types.h>
    4.22 -#include <xen/sched.h>
    4.23 -#include <xen/timer.h>
    4.24 -#include <asm/config.h>
    4.25 -#include <acpi/cpufreq/cpufreq.h>
    4.26 -
    4.27 -#define DEF_FREQUENCY_UP_THRESHOLD              (80)
    4.28 -
    4.29 -#define MIN_DBS_INTERVAL                        (MICROSECS(100))
    4.30 -#define MIN_SAMPLING_MILLISECS                  (20)
    4.31 -#define MIN_STAT_SAMPLING_RATE                   \
    4.32 -    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
    4.33 -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
    4.34 -#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
    4.35 -
    4.36 -static uint64_t def_sampling_rate;
    4.37 -
    4.38 -/* Sampling types */
    4.39 -enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
    4.40 -
    4.41 -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
    4.42 -
    4.43 -static unsigned int dbs_enable;    /* number of CPUs using this policy */
    4.44 -
    4.45 -static struct dbs_tuners {
    4.46 -    uint64_t     sampling_rate;
    4.47 -    unsigned int up_threshold;
    4.48 -    unsigned int ignore_nice;
    4.49 -    unsigned int powersave_bias;
    4.50 -} dbs_tuners_ins = {
    4.51 -    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
    4.52 -    .ignore_nice = 0,
    4.53 -    .powersave_bias = 0,
    4.54 -};
    4.55 -
    4.56 -static struct timer dbs_timer[NR_CPUS];
    4.57 -
    4.58 -uint64_t get_cpu_idle_time(unsigned int cpu)
    4.59 -{
    4.60 -    uint64_t idle_ns;
    4.61 -    struct vcpu *v;
    4.62 -
    4.63 -    if ((v = idle_vcpu[cpu]) == NULL)
    4.64 -        return 0;
    4.65 -
    4.66 -    idle_ns = v->runstate.time[RUNSTATE_running];
    4.67 -    if (v->is_running)
    4.68 -        idle_ns += NOW() - v->runstate.state_entry_time;
    4.69 -
    4.70 -    return idle_ns;
    4.71 -}
    4.72 -
    4.73 -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
    4.74 -{
    4.75 -    unsigned int load = 0;
    4.76 -    uint64_t cur_ns, idle_ns, total_ns;
    4.77 -
    4.78 -    struct cpufreq_policy *policy;
    4.79 -    unsigned int j;
    4.80 -
    4.81 -    if (!this_dbs_info->enable)
    4.82 -        return;
    4.83 -
    4.84 -    policy = this_dbs_info->cur_policy;
    4.85 -
    4.86 -    if (unlikely(policy->resume)) {
    4.87 -        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
    4.88 -        return;
    4.89 -    }
    4.90 -
    4.91 -    cur_ns = NOW();
    4.92 -    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
    4.93 -    this_dbs_info->prev_cpu_wall = NOW();
    4.94 -
    4.95 -    if (total_ns < MIN_DBS_INTERVAL)
    4.96 -        return;
    4.97 -
    4.98 -    /* Get Idle Time */
    4.99 -    idle_ns = UINT_MAX;
   4.100 -    for_each_cpu_mask(j, policy->cpus) {
   4.101 -        uint64_t total_idle_ns;
   4.102 -        unsigned int tmp_idle_ns;
   4.103 -        struct cpu_dbs_info_s *j_dbs_info;
   4.104 -
   4.105 -        j_dbs_info = &per_cpu(cpu_dbs_info, j);
   4.106 -        total_idle_ns = get_cpu_idle_time(j);
   4.107 -        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
   4.108 -        j_dbs_info->prev_cpu_idle = total_idle_ns;
   4.109 -
   4.110 -        if (tmp_idle_ns < idle_ns)
   4.111 -            idle_ns = tmp_idle_ns;
   4.112 -    }
   4.113 -
   4.114 -    if (likely(total_ns > idle_ns))
   4.115 -        load = (100 * (total_ns - idle_ns)) / total_ns;
   4.116 -
   4.117 -    /* Check for frequency increase */
   4.118 -    if (load > dbs_tuners_ins.up_threshold) {
   4.119 -        /* if we are already at full speed then break out early */
   4.120 -        if (policy->cur == policy->max)
   4.121 -            return;
   4.122 -        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
   4.123 -        return;
   4.124 -    }
   4.125 -
   4.126 -    /* Check for frequency decrease */
   4.127 -    /* if we cannot reduce the frequency anymore, break out early */
   4.128 -    if (policy->cur == policy->min)
   4.129 -        return;
   4.130 -
   4.131 -    /*
   4.132 -     * The optimal frequency is the frequency that is the lowest that
   4.133 -     * can support the current CPU usage without triggering the up
   4.134 -     * policy. To be safe, we focus 10 points under the threshold.
   4.135 -     */
   4.136 -    if (load < (dbs_tuners_ins.up_threshold - 10)) {
   4.137 -        unsigned int freq_next, freq_cur;
   4.138 -
   4.139 -        freq_cur = __cpufreq_driver_getavg(policy);
   4.140 -        if (!freq_cur)
   4.141 -            freq_cur = policy->cur;
   4.142 -
   4.143 -        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
   4.144 -
   4.145 -        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
   4.146 -    }
   4.147 -}
   4.148 -
   4.149 -static void do_dbs_timer(void *dbs)
   4.150 -{
   4.151 -    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
   4.152 -
   4.153 -    if (!dbs_info->enable)
   4.154 -        return;
   4.155 -
   4.156 -    dbs_check_cpu(dbs_info);
   4.157 -
   4.158 -    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
   4.159 -}
   4.160 -
   4.161 -static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
   4.162 -{
   4.163 -    dbs_info->enable = 1;
   4.164 -
   4.165 -    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
   4.166 -        (void *)dbs_info, dbs_info->cpu);
   4.167 -
   4.168 -    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
   4.169 -}
   4.170 -
   4.171 -static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
   4.172 -{
   4.173 -    dbs_info->enable = 0;
   4.174 -    stop_timer(&dbs_timer[dbs_info->cpu]);
   4.175 -}
   4.176 -
   4.177 -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
   4.178 -{
   4.179 -    unsigned int cpu = policy->cpu;
   4.180 -    struct cpu_dbs_info_s *this_dbs_info;
   4.181 -    unsigned int j;
   4.182 -
   4.183 -    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
   4.184 -
   4.185 -    switch (event) {
   4.186 -    case CPUFREQ_GOV_START:
   4.187 -        if ((!cpu_online(cpu)) || (!policy->cur))
   4.188 -            return -EINVAL;
   4.189 -
   4.190 -        if (policy->cpuinfo.transition_latency >
   4.191 -            (TRANSITION_LATENCY_LIMIT * 1000)) {
   4.192 -            printk(KERN_WARNING "ondemand governor failed to load "
   4.193 -                "due to too long transition latency\n");
   4.194 -            return -EINVAL;
   4.195 -        }
   4.196 -        if (this_dbs_info->enable)
   4.197 -            /* Already enabled */
   4.198 -            break;
   4.199 -
   4.200 -        dbs_enable++;
   4.201 -
   4.202 -        for_each_cpu_mask(j, policy->cpus) {
   4.203 -            struct cpu_dbs_info_s *j_dbs_info;
   4.204 -            j_dbs_info = &per_cpu(cpu_dbs_info, j);
   4.205 -            j_dbs_info->cur_policy = policy;
   4.206 -
   4.207 -            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
   4.208 -            j_dbs_info->prev_cpu_wall = NOW();
   4.209 -        }
   4.210 -        this_dbs_info->cpu = cpu;
   4.211 -        /*
   4.212 -         * Start the timerschedule work, when this governor
   4.213 -         * is used for first time
   4.214 -         */
   4.215 -        if (dbs_enable == 1) {
   4.216 -            def_sampling_rate = policy->cpuinfo.transition_latency *
   4.217 -                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
   4.218 -
   4.219 -            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
   4.220 -                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
   4.221 -
   4.222 -            dbs_tuners_ins.sampling_rate = def_sampling_rate;
   4.223 -        }
   4.224 -        dbs_timer_init(this_dbs_info);
   4.225 -
   4.226 -        break;
   4.227 -
   4.228 -    case CPUFREQ_GOV_STOP:
   4.229 -        dbs_timer_exit(this_dbs_info);
   4.230 -        dbs_enable--;
   4.231 -
   4.232 -        break;
   4.233 -
   4.234 -    case CPUFREQ_GOV_LIMITS:
   4.235 -        if (policy->max < this_dbs_info->cur_policy->cur)
   4.236 -            __cpufreq_driver_target(this_dbs_info->cur_policy,
   4.237 -                policy->max, CPUFREQ_RELATION_H);
   4.238 -        else if (policy->min > this_dbs_info->cur_policy->cur)
   4.239 -            __cpufreq_driver_target(this_dbs_info->cur_policy,
   4.240 -                policy->min, CPUFREQ_RELATION_L);
   4.241 -        break;
   4.242 -    }
   4.243 -    return 0;
   4.244 -}
   4.245 -
   4.246 -struct cpufreq_governor cpufreq_gov_dbs = {
   4.247 -    .name = "ondemand",
   4.248 -    .governor = cpufreq_governor_dbs,
   4.249 -};
     5.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c	Fri Sep 26 11:12:29 2008 +0100
     5.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.3 @@ -1,368 +0,0 @@
     5.4 -/*
     5.5 - *  utility.c - misc functions for cpufreq driver and Px statistic
     5.6 - *
     5.7 - *  Copyright (C) 2001 Russell King
     5.8 - *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
     5.9 - *
    5.10 - *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
    5.11 - *    Added handling for CPU hotplug
    5.12 - *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
    5.13 - *    Fix handling for CPU hotplug -- affected CPUs
    5.14 - *  Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
    5.15 - *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
    5.16 - *    And poring to Xen hypervisor
    5.17 - *    2. some Px statistic interface funcdtions
    5.18 - *
    5.19 - * This program is free software; you can redistribute it and/or modify
    5.20 - * it under the terms of the GNU General Public License version 2 as
    5.21 - * published by the Free Software Foundation.
    5.22 - *
    5.23 - */
    5.24 -
    5.25 -#include <xen/errno.h>
    5.26 -#include <xen/cpumask.h>
    5.27 -#include <xen/types.h>
    5.28 -#include <xen/spinlock.h>
    5.29 -#include <xen/percpu.h>
    5.30 -#include <xen/types.h>
    5.31 -#include <xen/sched.h>
    5.32 -#include <xen/timer.h>
    5.33 -#include <asm/config.h>
    5.34 -#include <acpi/cpufreq/cpufreq.h>
    5.35 -#include <public/sysctl.h>
    5.36 -
    5.37 -struct cpufreq_driver   *cpufreq_driver;
    5.38 -struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
    5.39 -struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
    5.40 -
    5.41 -/*********************************************************************
    5.42 - *                    Px STATISTIC INFO                              *
    5.43 - *********************************************************************/
    5.44 -
    5.45 -void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
    5.46 -{
    5.47 -    uint32_t i;
    5.48 -    uint64_t now;
    5.49 -
    5.50 -    now = NOW();
    5.51 -
    5.52 -    for_each_cpu_mask(i, cpumask) {
    5.53 -        struct pm_px *pxpt = px_statistic_data[i];
    5.54 -        struct processor_pminfo *pmpt = processor_pminfo[i];
    5.55 -        uint64_t total_idle_ns;
    5.56 -        uint64_t tmp_idle_ns;
    5.57 -
    5.58 -        if ( !pxpt || !pmpt )
    5.59 -            continue;
    5.60 -
    5.61 -        total_idle_ns = get_cpu_idle_time(i);
    5.62 -        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
    5.63 -
    5.64 -        pxpt->u.last = from;
    5.65 -        pxpt->u.cur = to;
    5.66 -        pxpt->u.pt[to].count++;
    5.67 -        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
    5.68 -        pxpt->u.pt[from].residency -= tmp_idle_ns;
    5.69 -
    5.70 -        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
    5.71 -
    5.72 -        pxpt->prev_state_wall = now;
    5.73 -        pxpt->prev_idle_wall = total_idle_ns;
    5.74 -    }
    5.75 -}
    5.76 -
    5.77 -int px_statistic_init(unsigned int cpuid)
    5.78 -{
    5.79 -    uint32_t i, count;
    5.80 -    struct pm_px *pxpt = px_statistic_data[cpuid];
    5.81 -    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
    5.82 -
    5.83 -    count = pmpt->perf.state_count;
    5.84 -
    5.85 -    if ( !pmpt )
    5.86 -        return -EINVAL;
    5.87 -
    5.88 -    if ( !pxpt )
    5.89 -    {
    5.90 -        pxpt = xmalloc(struct pm_px);
    5.91 -        if ( !pxpt )
    5.92 -            return -ENOMEM;
    5.93 -        memset(pxpt, 0, sizeof(*pxpt));
    5.94 -        px_statistic_data[cpuid] = pxpt;
    5.95 -    }
    5.96 -
    5.97 -    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
    5.98 -    if (!pxpt->u.trans_pt)
    5.99 -        return -ENOMEM;
   5.100 -
   5.101 -    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
   5.102 -    if (!pxpt->u.pt) {
   5.103 -        xfree(pxpt->u.trans_pt);
   5.104 -        return -ENOMEM;
   5.105 -    }
   5.106 -
   5.107 -    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
   5.108 -    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
   5.109 -
   5.110 -    pxpt->u.total = pmpt->perf.state_count;
   5.111 -    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
   5.112 -
   5.113 -    for (i=0; i < pmpt->perf.state_count; i++)
   5.114 -        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
   5.115 -
   5.116 -    pxpt->prev_state_wall = NOW();
   5.117 -    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
   5.118 -
   5.119 -    return 0;
   5.120 -}
   5.121 -
   5.122 -void px_statistic_exit(unsigned int cpuid)
   5.123 -{
   5.124 -    struct pm_px *pxpt = px_statistic_data[cpuid];
   5.125 -
   5.126 -    if (!pxpt)
   5.127 -        return;
   5.128 -    xfree(pxpt->u.trans_pt);
   5.129 -    xfree(pxpt->u.pt);
   5.130 -    memset(pxpt, 0, sizeof(struct pm_px));
   5.131 -}
   5.132 -
   5.133 -void px_statistic_reset(unsigned int cpuid)
   5.134 -{
   5.135 -    uint32_t i, j, count;
   5.136 -    struct pm_px *pxpt = px_statistic_data[cpuid];
   5.137 -    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
   5.138 -
   5.139 -    if ( !pxpt || !pmpt )
   5.140 -        return;
   5.141 -
   5.142 -    count = pmpt->perf.state_count;
   5.143 -
   5.144 -    for (i=0; i < count; i++) {
   5.145 -        pxpt->u.pt[i].residency = 0;
   5.146 -        pxpt->u.pt[i].count = 0;
   5.147 -
   5.148 -        for (j=0; j < count; j++)
   5.149 -            *(pxpt->u.trans_pt + i*count + j) = 0;
   5.150 -    }
   5.151 -
   5.152 -    pxpt->prev_state_wall = NOW();
   5.153 -    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
   5.154 -}
   5.155 -
   5.156 -
   5.157 -/*********************************************************************
   5.158 - *                   FREQUENCY TABLE HELPERS                         *
   5.159 - *********************************************************************/
   5.160 -
   5.161 -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
   5.162 -                                    struct cpufreq_frequency_table *table)
   5.163 -{
   5.164 -    unsigned int min_freq = ~0;
   5.165 -    unsigned int max_freq = 0;
   5.166 -    unsigned int i;
   5.167 -
   5.168 -    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
   5.169 -        unsigned int freq = table[i].frequency;
   5.170 -        if (freq == CPUFREQ_ENTRY_INVALID)
   5.171 -            continue;
   5.172 -        if (freq < min_freq)
   5.173 -            min_freq = freq;
   5.174 -        if (freq > max_freq)
   5.175 -            max_freq = freq;
   5.176 -    }
   5.177 -
   5.178 -    policy->min = policy->cpuinfo.min_freq = min_freq;
   5.179 -    policy->max = policy->cpuinfo.max_freq = max_freq;
   5.180 -
   5.181 -    if (policy->min == ~0)
   5.182 -        return -EINVAL;
   5.183 -    else
   5.184 -        return 0;
   5.185 -}
   5.186 -
   5.187 -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
   5.188 -                                   struct cpufreq_frequency_table *table)
   5.189 -{
   5.190 -    unsigned int next_larger = ~0;
   5.191 -    unsigned int i;
   5.192 -    unsigned int count = 0;
   5.193 -
   5.194 -    if (!cpu_online(policy->cpu))
   5.195 -        return -EINVAL;
   5.196 -
   5.197 -    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
   5.198 -                                 policy->cpuinfo.max_freq);
   5.199 -
   5.200 -    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
   5.201 -        unsigned int freq = table[i].frequency;
   5.202 -        if (freq == CPUFREQ_ENTRY_INVALID)
   5.203 -            continue;
   5.204 -        if ((freq >= policy->min) && (freq <= policy->max))
   5.205 -            count++;
   5.206 -        else if ((next_larger > freq) && (freq > policy->max))
   5.207 -            next_larger = freq;
   5.208 -    }
   5.209 -
   5.210 -    if (!count)
   5.211 -        policy->max = next_larger;
   5.212 -
   5.213 -    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
   5.214 -                                 policy->cpuinfo.max_freq);
   5.215 -
   5.216 -    return 0;
   5.217 -}
   5.218 -
   5.219 -int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
   5.220 -                                   struct cpufreq_frequency_table *table,
   5.221 -                                   unsigned int target_freq,
   5.222 -                                   unsigned int relation,
   5.223 -                                   unsigned int *index)
   5.224 -{
   5.225 -    struct cpufreq_frequency_table optimal = {
   5.226 -        .index = ~0,
   5.227 -        .frequency = 0,
   5.228 -    };
   5.229 -    struct cpufreq_frequency_table suboptimal = {
   5.230 -        .index = ~0,
   5.231 -        .frequency = 0,
   5.232 -    };
   5.233 -    unsigned int i;
   5.234 -
   5.235 -    switch (relation) {
   5.236 -    case CPUFREQ_RELATION_H:
   5.237 -        suboptimal.frequency = ~0;
   5.238 -        break;
   5.239 -    case CPUFREQ_RELATION_L:
   5.240 -        optimal.frequency = ~0;
   5.241 -        break;
   5.242 -    }
   5.243 -
   5.244 -    if (!cpu_online(policy->cpu))
   5.245 -        return -EINVAL;
   5.246 -
   5.247 -    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
   5.248 -        unsigned int freq = table[i].frequency;
   5.249 -        if (freq == CPUFREQ_ENTRY_INVALID)
   5.250 -            continue;
   5.251 -        if ((freq < policy->min) || (freq > policy->max))
   5.252 -            continue;
   5.253 -        switch(relation) {
   5.254 -        case CPUFREQ_RELATION_H:
   5.255 -            if (freq <= target_freq) {
   5.256 -                if (freq >= optimal.frequency) {
   5.257 -                    optimal.frequency = freq;
   5.258 -                    optimal.index = i;
   5.259 -                }
   5.260 -            } else {
   5.261 -                if (freq <= suboptimal.frequency) {
   5.262 -                    suboptimal.frequency = freq;
   5.263 -                    suboptimal.index = i;
   5.264 -                }
   5.265 -            }
   5.266 -            break;
   5.267 -        case CPUFREQ_RELATION_L:
   5.268 -            if (freq >= target_freq) {
   5.269 -                if (freq <= optimal.frequency) {
   5.270 -                    optimal.frequency = freq;
   5.271 -                    optimal.index = i;
   5.272 -                }
   5.273 -            } else {
   5.274 -                if (freq >= suboptimal.frequency) {
   5.275 -                    suboptimal.frequency = freq;
   5.276 -                    suboptimal.index = i;
   5.277 -                }
   5.278 -            }
   5.279 -            break;
   5.280 -        }
   5.281 -    }
   5.282 -    if (optimal.index > i) {
   5.283 -        if (suboptimal.index > i)
   5.284 -            return -EINVAL;
   5.285 -        *index = suboptimal.index;
   5.286 -    } else
   5.287 -        *index = optimal.index;
   5.288 -
   5.289 -    return 0;
   5.290 -}
   5.291 -
   5.292 -
   5.293 -/*********************************************************************
   5.294 - *               GOVERNORS                                           *
   5.295 - *********************************************************************/
   5.296 -
   5.297 -int __cpufreq_driver_target(struct cpufreq_policy *policy,
   5.298 -                            unsigned int target_freq,
   5.299 -                            unsigned int relation)
   5.300 -{
   5.301 -    int retval = -EINVAL;
   5.302 -
   5.303 -    if (cpu_online(policy->cpu) && cpufreq_driver->target)
   5.304 -        retval = cpufreq_driver->target(policy, target_freq, relation);
   5.305 -
   5.306 -    return retval;
   5.307 -}
   5.308 -
   5.309 -int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
   5.310 -{
   5.311 -    int ret = 0;
   5.312 -
   5.313 -    if (!policy)
   5.314 -        return -EINVAL;
   5.315 -
   5.316 -    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
   5.317 -        ret = cpufreq_driver->getavg(policy->cpu);
   5.318 -
   5.319 -    return ret;
   5.320 -}
   5.321 -
   5.322 -
   5.323 -/*********************************************************************
   5.324 - *                 POLICY                                            *
   5.325 - *********************************************************************/
   5.326 -
   5.327 -/*
   5.328 - * data   : current policy.
   5.329 - * policy : policy to be set.
   5.330 - */
   5.331 -int __cpufreq_set_policy(struct cpufreq_policy *data,
   5.332 -                                struct cpufreq_policy *policy)
   5.333 -{
   5.334 -    int ret = 0;
   5.335 -
   5.336 -    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
   5.337 -
   5.338 -    if (policy->min > data->min && policy->min > policy->max)
   5.339 -        return -EINVAL;
   5.340 -
   5.341 -    /* verify the cpu speed can be set within this limit */
   5.342 -    ret = cpufreq_driver->verify(policy);
   5.343 -    if (ret)
   5.344 -        return ret;
   5.345 -
   5.346 -    data->min = policy->min;
   5.347 -    data->max = policy->max;
   5.348 -
   5.349 -    if (policy->governor != data->governor) {
   5.350 -        /* save old, working values */
   5.351 -        struct cpufreq_governor *old_gov = data->governor;
   5.352 -
   5.353 -        /* end old governor */
   5.354 -        if (data->governor)
   5.355 -            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
   5.356 -
   5.357 -        /* start new governor */
   5.358 -        data->governor = policy->governor;
   5.359 -        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
   5.360 -            /* new governor failed, so re-start old one */
   5.361 -            if (old_gov) {
   5.362 -                data->governor = old_gov;
   5.363 -                __cpufreq_governor(data, CPUFREQ_GOV_START);
   5.364 -            }
   5.365 -            return -EINVAL;
   5.366 -        }
   5.367 -        /* might be a policy change, too, so fall through */
   5.368 -    }
   5.369 -
   5.370 -    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
   5.371 -}
     6.1 --- a/xen/arch/x86/acpi/pmstat.c	Fri Sep 26 11:12:29 2008 +0100
     6.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.3 @@ -1,155 +0,0 @@
     6.4 -/*****************************************************************************
     6.5 -#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
     6.6 -#
     6.7 -#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
     6.8 -#
     6.9 -# This program is free software; you can redistribute it and/or modify it 
    6.10 -# under the terms of the GNU General Public License as published by the Free 
    6.11 -# Software Foundation; either version 2 of the License, or (at your option) 
    6.12 -# any later version.
    6.13 -#
    6.14 -# This program is distributed in the hope that it will be useful, but WITHOUT 
    6.15 -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
    6.16 -# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
    6.17 -# more details.
    6.18 -#
    6.19 -# You should have received a copy of the GNU General Public License along with
    6.20 -# this program; if not, write to the Free Software Foundation, Inc., 59 
    6.21 -# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
    6.22 -#
    6.23 -# The full GNU General Public License is included in this distribution in the
    6.24 -# file called LICENSE.
    6.25 -#
    6.26 -*****************************************************************************/
    6.27 -
    6.28 -#include <xen/config.h>
    6.29 -#include <xen/lib.h>
    6.30 -#include <xen/errno.h>
    6.31 -#include <xen/sched.h>
    6.32 -#include <xen/event.h>
    6.33 -#include <xen/irq.h>
    6.34 -#include <xen/iocap.h>
    6.35 -#include <xen/compat.h>
    6.36 -#include <xen/guest_access.h>
    6.37 -#include <asm/current.h>
    6.38 -#include <public/xen.h>
    6.39 -#include <xen/cpumask.h>
    6.40 -#include <asm/processor.h>
    6.41 -#include <xen/percpu.h>
    6.42 -#include <xen/domain.h>
    6.43 -
    6.44 -#include <public/sysctl.h>
    6.45 -#include <acpi/cpufreq/cpufreq.h>
    6.46 -
    6.47 -struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
    6.48 -
    6.49 -extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
    6.50 -extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
    6.51 -extern int pmstat_reset_cx_stat(uint32_t cpuid);
    6.52 -
    6.53 -int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
    6.54 -{
    6.55 -    int ret = 0;
    6.56 -    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
    6.57 -
    6.58 -    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
    6.59 -        return -EINVAL;
    6.60 -
    6.61 -    switch ( op->type & PMSTAT_CATEGORY_MASK )
    6.62 -    {
    6.63 -    case PMSTAT_CX:
    6.64 -        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
    6.65 -            return -ENODEV;
    6.66 -        break;
    6.67 -    case PMSTAT_PX:
    6.68 -        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
    6.69 -            return -ENODEV;
    6.70 -        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
    6.71 -            return -EINVAL;
    6.72 -        break;
    6.73 -    default:
    6.74 -        return -ENODEV;
    6.75 -    }
    6.76 -
    6.77 -    switch ( op->type )
    6.78 -    {
    6.79 -    case PMSTAT_get_max_px:
    6.80 -    {
    6.81 -        op->u.getpx.total = pmpt->perf.state_count;
    6.82 -        break;
    6.83 -    }
    6.84 -
    6.85 -    case PMSTAT_get_pxstat:
    6.86 -    {
    6.87 -        uint64_t now, ct;
    6.88 -        uint64_t total_idle_ns;
    6.89 -        uint64_t tmp_idle_ns;
    6.90 -        struct pm_px *pxpt = px_statistic_data[op->cpuid];
    6.91 -
    6.92 -        if ( !pxpt )
    6.93 -            return -ENODATA;
    6.94 -
    6.95 -        total_idle_ns = get_cpu_idle_time(op->cpuid);
    6.96 -        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
    6.97 -
    6.98 -        now = NOW();
    6.99 -        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
   6.100 -        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
   6.101 -        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
   6.102 -        pxpt->prev_state_wall = now;
   6.103 -        pxpt->prev_idle_wall = total_idle_ns;
   6.104 -
   6.105 -        ct = pmpt->perf.state_count;
   6.106 -        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
   6.107 -        {
   6.108 -            ret = -EFAULT;
   6.109 -            break;
   6.110 -        }
   6.111 -
   6.112 -        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
   6.113 -        {
   6.114 -            ret = -EFAULT;
   6.115 -            break;
   6.116 -        }
   6.117 -
   6.118 -        op->u.getpx.total = pxpt->u.total;
   6.119 -        op->u.getpx.usable = pxpt->u.usable;
   6.120 -        op->u.getpx.last = pxpt->u.last;
   6.121 -        op->u.getpx.cur = pxpt->u.cur;
   6.122 -
   6.123 -        break;
   6.124 -    }
   6.125 -
   6.126 -    case PMSTAT_reset_pxstat:
   6.127 -    {
   6.128 -        px_statistic_reset(op->cpuid);
   6.129 -        break;
   6.130 -    }
   6.131 -
   6.132 -    case PMSTAT_get_max_cx:
   6.133 -    {
   6.134 -        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
   6.135 -        ret = 0;
   6.136 -        break;
   6.137 -    }
   6.138 -
   6.139 -    case PMSTAT_get_cxstat:
   6.140 -    {
   6.141 -        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
   6.142 -        break;
   6.143 -    }
   6.144 -
   6.145 -    case PMSTAT_reset_cxstat:
   6.146 -    {
   6.147 -        ret = pmstat_reset_cx_stat(op->cpuid);
   6.148 -        break;
   6.149 -    }
   6.150 -
   6.151 -    default:
   6.152 -        printk("not defined sub-hypercall @ do_get_pm_info\n");
   6.153 -        ret = -ENOSYS;
   6.154 -        break;
   6.155 -    }
   6.156 -
   6.157 -    return ret;
   6.158 -}
     7.1 --- a/xen/drivers/Makefile	Fri Sep 26 11:12:29 2008 +0100
     7.2 +++ b/xen/drivers/Makefile	Fri Sep 26 14:04:38 2008 +0100
     7.3 @@ -1,4 +1,5 @@
     7.4  subdir-y += char
     7.5 +subdir-y += cpufreq
     7.6  subdir-$(x86) += passthrough
     7.7  subdir-$(HAS_ACPI) += acpi
     7.8  subdir-$(HAS_VGA) += video
     8.1 --- a/xen/drivers/acpi/Makefile	Fri Sep 26 11:12:29 2008 +0100
     8.2 +++ b/xen/drivers/acpi/Makefile	Fri Sep 26 14:04:38 2008 +0100
     8.3 @@ -4,6 +4,7 @@ subdir-y += utilities
     8.4  obj-y += tables.o
     8.5  obj-y += numa.o
     8.6  obj-y += osl.o
     8.7 +obj-y += pmstat.o
     8.8  
     8.9  obj-$(x86) += hwregs.o
    8.10  obj-$(x86) += reboot.o
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/xen/drivers/acpi/pmstat.c	Fri Sep 26 14:04:38 2008 +0100
     9.3 @@ -0,0 +1,155 @@
     9.4 +/*****************************************************************************
     9.5 +#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
     9.6 +#
     9.7 +#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
     9.8 +#
     9.9 +# This program is free software; you can redistribute it and/or modify it 
    9.10 +# under the terms of the GNU General Public License as published by the Free 
    9.11 +# Software Foundation; either version 2 of the License, or (at your option) 
    9.12 +# any later version.
    9.13 +#
    9.14 +# This program is distributed in the hope that it will be useful, but WITHOUT 
    9.15 +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
    9.16 +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
    9.17 +# more details.
    9.18 +#
    9.19 +# You should have received a copy of the GNU General Public License along with
    9.20 +# this program; if not, write to the Free Software Foundation, Inc., 59 
    9.21 +# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
    9.22 +#
    9.23 +# The full GNU General Public License is included in this distribution in the
    9.24 +# file called LICENSE.
    9.25 +#
    9.26 +*****************************************************************************/
    9.27 +
    9.28 +#include <xen/config.h>
    9.29 +#include <xen/lib.h>
    9.30 +#include <xen/errno.h>
    9.31 +#include <xen/sched.h>
    9.32 +#include <xen/event.h>
    9.33 +#include <xen/irq.h>
    9.34 +#include <xen/iocap.h>
    9.35 +#include <xen/compat.h>
    9.36 +#include <xen/guest_access.h>
    9.37 +#include <asm/current.h>
    9.38 +#include <public/xen.h>
    9.39 +#include <xen/cpumask.h>
    9.40 +#include <asm/processor.h>
    9.41 +#include <xen/percpu.h>
    9.42 +#include <xen/domain.h>
    9.43 +
    9.44 +#include <public/sysctl.h>
    9.45 +#include <acpi/cpufreq/cpufreq.h>
    9.46 +
    9.47 +struct pm_px *__read_mostly cpufreq_statistic_data[NR_CPUS];
    9.48 +
    9.49 +extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
    9.50 +extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
    9.51 +extern int pmstat_reset_cx_stat(uint32_t cpuid);
    9.52 +
    9.53 +int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
    9.54 +{
    9.55 +    int ret = 0;
    9.56 +    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
    9.57 +
    9.58 +    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
    9.59 +        return -EINVAL;
    9.60 +
    9.61 +    switch ( op->type & PMSTAT_CATEGORY_MASK )
    9.62 +    {
    9.63 +    case PMSTAT_CX:
    9.64 +        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
    9.65 +            return -ENODEV;
    9.66 +        break;
    9.67 +    case PMSTAT_PX:
    9.68 +        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
    9.69 +            return -ENODEV;
    9.70 +        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
    9.71 +            return -EINVAL;
    9.72 +        break;
    9.73 +    default:
    9.74 +        return -ENODEV;
    9.75 +    }
    9.76 +
    9.77 +    switch ( op->type )
    9.78 +    {
    9.79 +    case PMSTAT_get_max_px:
    9.80 +    {
    9.81 +        op->u.getpx.total = pmpt->perf.state_count;
    9.82 +        break;
    9.83 +    }
    9.84 +
    9.85 +    case PMSTAT_get_pxstat:
    9.86 +    {
    9.87 +        uint64_t now, ct;
    9.88 +        uint64_t total_idle_ns;
    9.89 +        uint64_t tmp_idle_ns;
    9.90 +        struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
    9.91 +
    9.92 +        if ( !pxpt )
    9.93 +            return -ENODATA;
    9.94 +
    9.95 +        total_idle_ns = get_cpu_idle_time(op->cpuid);
    9.96 +        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
    9.97 +
    9.98 +        now = NOW();
    9.99 +        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
   9.100 +        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
   9.101 +        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
   9.102 +        pxpt->prev_state_wall = now;
   9.103 +        pxpt->prev_idle_wall = total_idle_ns;
   9.104 +
   9.105 +        ct = pmpt->perf.state_count;
   9.106 +        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
   9.107 +        {
   9.108 +            ret = -EFAULT;
   9.109 +            break;
   9.110 +        }
   9.111 +
   9.112 +        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
   9.113 +        {
   9.114 +            ret = -EFAULT;
   9.115 +            break;
   9.116 +        }
   9.117 +
   9.118 +        op->u.getpx.total = pxpt->u.total;
   9.119 +        op->u.getpx.usable = pxpt->u.usable;
   9.120 +        op->u.getpx.last = pxpt->u.last;
   9.121 +        op->u.getpx.cur = pxpt->u.cur;
   9.122 +
   9.123 +        break;
   9.124 +    }
   9.125 +
   9.126 +    case PMSTAT_reset_pxstat:
   9.127 +    {
   9.128 +        cpufreq_statistic_reset(op->cpuid);
   9.129 +        break;
   9.130 +    }
   9.131 +
   9.132 +    case PMSTAT_get_max_cx:
   9.133 +    {
   9.134 +        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
   9.135 +        ret = 0;
   9.136 +        break;
   9.137 +    }
   9.138 +
   9.139 +    case PMSTAT_get_cxstat:
   9.140 +    {
   9.141 +        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
   9.142 +        break;
   9.143 +    }
   9.144 +
   9.145 +    case PMSTAT_reset_cxstat:
   9.146 +    {
   9.147 +        ret = pmstat_reset_cx_stat(op->cpuid);
   9.148 +        break;
   9.149 +    }
   9.150 +
   9.151 +    default:
   9.152 +        printk("not defined sub-hypercall @ do_get_pm_info\n");
   9.153 +        ret = -ENOSYS;
   9.154 +        break;
   9.155 +    }
   9.156 +
   9.157 +    return ret;
   9.158 +}
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/xen/drivers/cpufreq/Makefile	Fri Sep 26 14:04:38 2008 +0100
    10.3 @@ -0,0 +1,3 @@
    10.4 +obj-y += cpufreq.o
    10.5 +obj-y += cpufreq_ondemand.o
    10.6 +obj-y += utility.o
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/drivers/cpufreq/cpufreq.c	Fri Sep 26 14:04:38 2008 +0100
    11.3 @@ -0,0 +1,188 @@
    11.4 +/*
    11.5 + *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
    11.6 + *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
    11.7 + *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
    11.8 + *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@intel.com>
    11.9 + *
   11.10 + *  Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
   11.11 + *      Add cpufreq limit change handle and per-cpu cpufreq add/del
   11.12 + *      to cope with cpu hotplug
   11.13 + *
   11.14 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   11.15 + *
   11.16 + *  This program is free software; you can redistribute it and/or modify
   11.17 + *  it under the terms of the GNU General Public License as published by
   11.18 + *  the Free Software Foundation; either version 2 of the License, or (at
   11.19 + *  your option) any later version.
   11.20 + *
   11.21 + *  This program is distributed in the hope that it will be useful, but
   11.22 + *  WITHOUT ANY WARRANTY; without even the implied warranty of
   11.23 + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   11.24 + *  General Public License for more details.
   11.25 + *
   11.26 + *  You should have received a copy of the GNU General Public License along
   11.27 + *  with this program; if not, write to the Free Software Foundation, Inc.,
   11.28 + *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
   11.29 + *
   11.30 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   11.31 + */
   11.32 +
   11.33 +#include <xen/types.h>
   11.34 +#include <xen/errno.h>
   11.35 +#include <xen/delay.h>
   11.36 +#include <xen/cpumask.h>
   11.37 +#include <xen/sched.h>
   11.38 +#include <xen/timer.h>
   11.39 +#include <xen/xmalloc.h>
   11.40 +#include <asm/bug.h>
   11.41 +#include <asm/msr.h>
   11.42 +#include <asm/io.h>
   11.43 +#include <asm/config.h>
   11.44 +#include <asm/processor.h>
   11.45 +#include <asm/percpu.h>
   11.46 +#include <asm/cpufeature.h>
   11.47 +#include <acpi/acpi.h>
   11.48 +#include <acpi/cpufreq/cpufreq.h>
   11.49 +
   11.50 +/* TODO: change to link list later as domain number may be sparse */
   11.51 +static cpumask_t cpufreq_dom_map[NR_CPUS];
   11.52 +
   11.53 +int cpufreq_limit_change(unsigned int cpu)
   11.54 +{
   11.55 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
   11.56 +    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
   11.57 +    struct cpufreq_policy policy;
   11.58 +
   11.59 +    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
   11.60 +        return -ENODEV;
   11.61 +
   11.62 +    if ((perf->platform_limit < 0) || 
   11.63 +        (perf->platform_limit >= perf->state_count))
   11.64 +        return -EINVAL;
   11.65 +
   11.66 +    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
   11.67 +
   11.68 +    policy.max =
   11.69 +        perf->states[perf->platform_limit].core_frequency * 1000;
   11.70 +
   11.71 +    return __cpufreq_set_policy(data, &policy);
   11.72 +}
   11.73 +
   11.74 +int cpufreq_add_cpu(unsigned int cpu)
   11.75 +{
   11.76 +    int ret = 0;
   11.77 +    unsigned int firstcpu;
   11.78 +    unsigned int dom;
   11.79 +    unsigned int j;
   11.80 +    struct cpufreq_policy new_policy;
   11.81 +    struct cpufreq_policy *policy;
   11.82 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
   11.83 +
   11.84 +    /* to protect the case when Px was not controlled by xen */
   11.85 +    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
   11.86 +        return 0;
   11.87 +
   11.88 +    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
   11.89 +        return -EINVAL;
   11.90 +
   11.91 +    ret = cpufreq_statistic_init(cpu);
   11.92 +    if (ret)
   11.93 +        return ret;
   11.94 +
   11.95 +    dom = perf->domain_info.domain;
   11.96 +    if (cpus_weight(cpufreq_dom_map[dom])) {
   11.97 +        /* share policy with the first cpu since on same boat */
   11.98 +        firstcpu = first_cpu(cpufreq_dom_map[dom]);
   11.99 +        policy = cpufreq_cpu_policy[firstcpu];
  11.100 +
  11.101 +        cpufreq_cpu_policy[cpu] = policy;
  11.102 +        cpu_set(cpu, cpufreq_dom_map[dom]);
  11.103 +        cpu_set(cpu, policy->cpus);
  11.104 +
  11.105 +        printk(KERN_EMERG"adding CPU %u\n", cpu);
  11.106 +    } else {
  11.107 +        /* for the first cpu, setup policy and do init work */
  11.108 +        policy = xmalloc(struct cpufreq_policy);
  11.109 +        if (!policy) {
  11.110 +            cpufreq_statistic_exit(cpu);
  11.111 +            return -ENOMEM;
  11.112 +        }
  11.113 +        memset(policy, 0, sizeof(struct cpufreq_policy));
  11.114 +
  11.115 +        cpufreq_cpu_policy[cpu] = policy;
  11.116 +        cpu_set(cpu, cpufreq_dom_map[dom]);
  11.117 +        cpu_set(cpu, policy->cpus);
  11.118 +
  11.119 +        policy->cpu = cpu;
  11.120 +        ret = cpufreq_driver->init(policy);
  11.121 +        if (ret)
  11.122 +            goto err1;
  11.123 +        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
  11.124 +    }
  11.125 +
  11.126 +    /*
  11.127 +     * After get full cpumap of the coordination domain,
  11.128 +     * we can safely start gov here.
  11.129 +     */
  11.130 +    if (cpus_weight(cpufreq_dom_map[dom]) ==
  11.131 +        perf->domain_info.num_processors) {
  11.132 +        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
  11.133 +        policy->governor = NULL;
  11.134 +        ret = __cpufreq_set_policy(policy, &new_policy);
  11.135 +        if (ret)
  11.136 +            goto err2;
  11.137 +    }
  11.138 +
  11.139 +    return 0;
  11.140 +
  11.141 +err2:
  11.142 +    cpufreq_driver->exit(policy);
  11.143 +err1:
  11.144 +    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
  11.145 +        cpufreq_cpu_policy[j] = NULL;
  11.146 +        cpufreq_statistic_exit(j);
  11.147 +    }
  11.148 +
  11.149 +    cpus_clear(cpufreq_dom_map[dom]);
  11.150 +    xfree(policy);
  11.151 +    return ret;
  11.152 +}
  11.153 +
  11.154 +int cpufreq_del_cpu(unsigned int cpu)
  11.155 +{
  11.156 +    unsigned int dom;
  11.157 +    struct cpufreq_policy *policy;
  11.158 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
  11.159 +
  11.160 +    /* to protect the case when Px was not controlled by xen */
  11.161 +    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
  11.162 +        return 0;
  11.163 +
  11.164 +    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
  11.165 +        return -EINVAL;
  11.166 +
  11.167 +    dom = perf->domain_info.domain;
  11.168 +    policy = cpufreq_cpu_policy[cpu];
  11.169 +
  11.170 +    printk(KERN_EMERG"deleting CPU %u\n", cpu);
  11.171 +
  11.172 +    /* for the first cpu of the domain, stop gov */
  11.173 +    if (cpus_weight(cpufreq_dom_map[dom]) ==
  11.174 +        perf->domain_info.num_processors)
  11.175 +        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
  11.176 +
  11.177 +    cpufreq_cpu_policy[cpu] = NULL;
  11.178 +    cpu_clear(cpu, policy->cpus);
  11.179 +    cpu_clear(cpu, cpufreq_dom_map[dom]);
  11.180 +    cpufreq_statistic_exit(cpu);
  11.181 +
  11.182 +    /* for the last cpu of the domain, clean room */
  11.183 +    /* It's safe here to free freq_table, drv_data and policy */
  11.184 +    if (!cpus_weight(cpufreq_dom_map[dom])) {
  11.185 +        cpufreq_driver->exit(policy);
  11.186 +        xfree(policy);
  11.187 +    }
  11.188 +
  11.189 +    return 0;
  11.190 +}
  11.191 +
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Sep 26 14:04:38 2008 +0100
    12.3 @@ -0,0 +1,246 @@
    12.4 +/*
    12.5 + *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
    12.6 + *
    12.7 + *  Copyright (C)  2001 Russell King
    12.8 + *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
    12.9 + *                      Jun Nakajima <jun.nakajima@intel.com>
   12.10 + *             Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
   12.11 + *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
   12.12 + *
   12.13 + * This program is free software; you can redistribute it and/or modify
   12.14 + * it under the terms of the GNU General Public License version 2 as
   12.15 + * published by the Free Software Foundation.
   12.16 + */
   12.17 +
   12.18 +#include <xen/types.h>
   12.19 +#include <xen/percpu.h>
   12.20 +#include <xen/cpumask.h>
   12.21 +#include <xen/types.h>
   12.22 +#include <xen/sched.h>
   12.23 +#include <xen/timer.h>
   12.24 +#include <asm/config.h>
   12.25 +#include <acpi/cpufreq/cpufreq.h>
   12.26 +
   12.27 +#define DEF_FREQUENCY_UP_THRESHOLD              (80)
   12.28 +
   12.29 +#define MIN_DBS_INTERVAL                        (MICROSECS(100))
   12.30 +#define MIN_SAMPLING_MILLISECS                  (20)
   12.31 +#define MIN_STAT_SAMPLING_RATE                   \
   12.32 +    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
   12.33 +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
   12.34 +#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
   12.35 +
   12.36 +static uint64_t def_sampling_rate;
   12.37 +
   12.38 +/* Sampling types */
   12.39 +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
   12.40 +
   12.41 +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
   12.42 +
   12.43 +static unsigned int dbs_enable;    /* number of CPUs using this policy */
   12.44 +
   12.45 +static struct dbs_tuners {
   12.46 +    uint64_t     sampling_rate;
   12.47 +    unsigned int up_threshold;
   12.48 +    unsigned int ignore_nice;
   12.49 +    unsigned int powersave_bias;
   12.50 +} dbs_tuners_ins = {
   12.51 +    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
   12.52 +    .ignore_nice = 0,
   12.53 +    .powersave_bias = 0,
   12.54 +};
   12.55 +
   12.56 +static struct timer dbs_timer[NR_CPUS];
   12.57 +
   12.58 +uint64_t get_cpu_idle_time(unsigned int cpu)
   12.59 +{
   12.60 +    uint64_t idle_ns;
   12.61 +    struct vcpu *v;
   12.62 +
   12.63 +    if ((v = idle_vcpu[cpu]) == NULL)
   12.64 +        return 0;
   12.65 +
   12.66 +    idle_ns = v->runstate.time[RUNSTATE_running];
   12.67 +    if (v->is_running)
   12.68 +        idle_ns += NOW() - v->runstate.state_entry_time;
   12.69 +
   12.70 +    return idle_ns;
   12.71 +}
   12.72 +
   12.73 +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
   12.74 +{
   12.75 +    unsigned int load = 0;
   12.76 +    uint64_t cur_ns, idle_ns, total_ns;
   12.77 +
   12.78 +    struct cpufreq_policy *policy;
   12.79 +    unsigned int j;
   12.80 +
   12.81 +    if (!this_dbs_info->enable)
   12.82 +        return;
   12.83 +
   12.84 +    policy = this_dbs_info->cur_policy;
   12.85 +
   12.86 +    if (unlikely(policy->resume)) {
   12.87 +        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
   12.88 +        return;
   12.89 +    }
   12.90 +
   12.91 +    cur_ns = NOW();
   12.92 +    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
   12.93 +    this_dbs_info->prev_cpu_wall = NOW();
   12.94 +
   12.95 +    if (total_ns < MIN_DBS_INTERVAL)
   12.96 +        return;
   12.97 +
   12.98 +    /* Get Idle Time */
   12.99 +    idle_ns = UINT_MAX;
  12.100 +    for_each_cpu_mask(j, policy->cpus) {
  12.101 +        uint64_t total_idle_ns;
  12.102 +        unsigned int tmp_idle_ns;
  12.103 +        struct cpu_dbs_info_s *j_dbs_info;
  12.104 +
  12.105 +        j_dbs_info = &per_cpu(cpu_dbs_info, j);
  12.106 +        total_idle_ns = get_cpu_idle_time(j);
  12.107 +        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
  12.108 +        j_dbs_info->prev_cpu_idle = total_idle_ns;
  12.109 +
  12.110 +        if (tmp_idle_ns < idle_ns)
  12.111 +            idle_ns = tmp_idle_ns;
  12.112 +    }
  12.113 +
  12.114 +    if (likely(total_ns > idle_ns))
  12.115 +        load = (100 * (total_ns - idle_ns)) / total_ns;
  12.116 +
  12.117 +    /* Check for frequency increase */
  12.118 +    if (load > dbs_tuners_ins.up_threshold) {
  12.119 +        /* if we are already at full speed then break out early */
  12.120 +        if (policy->cur == policy->max)
  12.121 +            return;
  12.122 +        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
  12.123 +        return;
  12.124 +    }
  12.125 +
  12.126 +    /* Check for frequency decrease */
  12.127 +    /* if we cannot reduce the frequency anymore, break out early */
  12.128 +    if (policy->cur == policy->min)
  12.129 +        return;
  12.130 +
  12.131 +    /*
  12.132 +     * The optimal frequency is the frequency that is the lowest that
  12.133 +     * can support the current CPU usage without triggering the up
  12.134 +     * policy. To be safe, we focus 10 points under the threshold.
  12.135 +     */
  12.136 +    if (load < (dbs_tuners_ins.up_threshold - 10)) {
  12.137 +        unsigned int freq_next, freq_cur;
  12.138 +
  12.139 +        freq_cur = __cpufreq_driver_getavg(policy);
  12.140 +        if (!freq_cur)
  12.141 +            freq_cur = policy->cur;
  12.142 +
  12.143 +        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
  12.144 +
  12.145 +        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
  12.146 +    }
  12.147 +}
  12.148 +
  12.149 +static void do_dbs_timer(void *dbs)
  12.150 +{
  12.151 +    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
  12.152 +
  12.153 +    if (!dbs_info->enable)
  12.154 +        return;
  12.155 +
  12.156 +    dbs_check_cpu(dbs_info);
  12.157 +
  12.158 +    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
  12.159 +}
  12.160 +
  12.161 +static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
  12.162 +{
  12.163 +    dbs_info->enable = 1;
  12.164 +
  12.165 +    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
  12.166 +        (void *)dbs_info, dbs_info->cpu);
  12.167 +
  12.168 +    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
  12.169 +}
  12.170 +
  12.171 +static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
  12.172 +{
  12.173 +    dbs_info->enable = 0;
  12.174 +    stop_timer(&dbs_timer[dbs_info->cpu]);
  12.175 +}
  12.176 +
  12.177 +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
  12.178 +{
  12.179 +    unsigned int cpu = policy->cpu;
  12.180 +    struct cpu_dbs_info_s *this_dbs_info;
  12.181 +    unsigned int j;
  12.182 +
  12.183 +    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
  12.184 +
  12.185 +    switch (event) {
  12.186 +    case CPUFREQ_GOV_START:
  12.187 +        if ((!cpu_online(cpu)) || (!policy->cur))
  12.188 +            return -EINVAL;
  12.189 +
  12.190 +        if (policy->cpuinfo.transition_latency >
  12.191 +            (TRANSITION_LATENCY_LIMIT * 1000)) {
  12.192 +            printk(KERN_WARNING "ondemand governor failed to load "
  12.193 +                "due to too long transition latency\n");
  12.194 +            return -EINVAL;
  12.195 +        }
  12.196 +        if (this_dbs_info->enable)
  12.197 +            /* Already enabled */
  12.198 +            break;
  12.199 +
  12.200 +        dbs_enable++;
  12.201 +
  12.202 +        for_each_cpu_mask(j, policy->cpus) {
  12.203 +            struct cpu_dbs_info_s *j_dbs_info;
  12.204 +            j_dbs_info = &per_cpu(cpu_dbs_info, j);
  12.205 +            j_dbs_info->cur_policy = policy;
  12.206 +
  12.207 +            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
  12.208 +            j_dbs_info->prev_cpu_wall = NOW();
  12.209 +        }
  12.210 +        this_dbs_info->cpu = cpu;
  12.211 +        /*
  12.212 +         * Start the timerschedule work, when this governor
  12.213 +         * is used for first time
  12.214 +         */
  12.215 +        if (dbs_enable == 1) {
  12.216 +            def_sampling_rate = policy->cpuinfo.transition_latency *
  12.217 +                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
  12.218 +
  12.219 +            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
  12.220 +                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
  12.221 +
  12.222 +            dbs_tuners_ins.sampling_rate = def_sampling_rate;
  12.223 +        }
  12.224 +        dbs_timer_init(this_dbs_info);
  12.225 +
  12.226 +        break;
  12.227 +
  12.228 +    case CPUFREQ_GOV_STOP:
  12.229 +        dbs_timer_exit(this_dbs_info);
  12.230 +        dbs_enable--;
  12.231 +
  12.232 +        break;
  12.233 +
  12.234 +    case CPUFREQ_GOV_LIMITS:
  12.235 +        if (policy->max < this_dbs_info->cur_policy->cur)
  12.236 +            __cpufreq_driver_target(this_dbs_info->cur_policy,
  12.237 +                policy->max, CPUFREQ_RELATION_H);
  12.238 +        else if (policy->min > this_dbs_info->cur_policy->cur)
  12.239 +            __cpufreq_driver_target(this_dbs_info->cur_policy,
  12.240 +                policy->min, CPUFREQ_RELATION_L);
  12.241 +        break;
  12.242 +    }
  12.243 +    return 0;
  12.244 +}
  12.245 +
  12.246 +struct cpufreq_governor cpufreq_gov_dbs = {
  12.247 +    .name = "ondemand",
  12.248 +    .governor = cpufreq_governor_dbs,
  12.249 +};
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xen/drivers/cpufreq/utility.c	Fri Sep 26 14:04:38 2008 +0100
    13.3 @@ -0,0 +1,368 @@
    13.4 +/*
    13.5 + *  utility.c - misc functions for cpufreq driver and Px statistic
    13.6 + *
    13.7 + *  Copyright (C) 2001 Russell King
    13.8 + *            (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de>
    13.9 + *
   13.10 + *  Oct 2005 - Ashok Raj <ashok.raj@intel.com>
   13.11 + *    Added handling for CPU hotplug
   13.12 + *  Feb 2006 - Jacob Shin <jacob.shin@amd.com>
   13.13 + *    Fix handling for CPU hotplug -- affected CPUs
   13.14 + *  Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
   13.15 + *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
   13.16 + *    And poring to Xen hypervisor
   13.17 + *    2. some Px statistic interface funcdtions
   13.18 + *
   13.19 + * This program is free software; you can redistribute it and/or modify
   13.20 + * it under the terms of the GNU General Public License version 2 as
   13.21 + * published by the Free Software Foundation.
   13.22 + *
   13.23 + */
   13.24 +
   13.25 +#include <xen/errno.h>
   13.26 +#include <xen/cpumask.h>
   13.27 +#include <xen/types.h>
   13.28 +#include <xen/spinlock.h>
   13.29 +#include <xen/percpu.h>
   13.30 +#include <xen/types.h>
   13.31 +#include <xen/sched.h>
   13.32 +#include <xen/timer.h>
   13.33 +#include <asm/config.h>
   13.34 +#include <acpi/cpufreq/cpufreq.h>
   13.35 +#include <public/sysctl.h>
   13.36 +
   13.37 +struct cpufreq_driver   *cpufreq_driver;
   13.38 +struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
   13.39 +struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
   13.40 +
   13.41 +/*********************************************************************
   13.42 + *                    Px STATISTIC INFO                              *
   13.43 + *********************************************************************/
   13.44 +
   13.45 +void cpufreq_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
   13.46 +{
   13.47 +    uint32_t i;
   13.48 +    uint64_t now;
   13.49 +
   13.50 +    now = NOW();
   13.51 +
   13.52 +    for_each_cpu_mask(i, cpumask) {
   13.53 +        struct pm_px *pxpt = cpufreq_statistic_data[i];
   13.54 +        struct processor_pminfo *pmpt = processor_pminfo[i];
   13.55 +        uint64_t total_idle_ns;
   13.56 +        uint64_t tmp_idle_ns;
   13.57 +
   13.58 +        if ( !pxpt || !pmpt )
   13.59 +            continue;
   13.60 +
   13.61 +        total_idle_ns = get_cpu_idle_time(i);
   13.62 +        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
   13.63 +
   13.64 +        pxpt->u.last = from;
   13.65 +        pxpt->u.cur = to;
   13.66 +        pxpt->u.pt[to].count++;
   13.67 +        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
   13.68 +        pxpt->u.pt[from].residency -= tmp_idle_ns;
   13.69 +
   13.70 +        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
   13.71 +
   13.72 +        pxpt->prev_state_wall = now;
   13.73 +        pxpt->prev_idle_wall = total_idle_ns;
   13.74 +    }
   13.75 +}
   13.76 +
   13.77 +int cpufreq_statistic_init(unsigned int cpuid)
   13.78 +{
   13.79 +    uint32_t i, count;
   13.80 +    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
   13.81 +    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
   13.82 +
   13.83 +    count = pmpt->perf.state_count;
   13.84 +
   13.85 +    if ( !pmpt )
   13.86 +        return -EINVAL;
   13.87 +
   13.88 +    if ( !pxpt )
   13.89 +    {
   13.90 +        pxpt = xmalloc(struct pm_px);
   13.91 +        if ( !pxpt )
   13.92 +            return -ENOMEM;
   13.93 +        memset(pxpt, 0, sizeof(*pxpt));
   13.94 +        cpufreq_statistic_data[cpuid] = pxpt;
   13.95 +    }
   13.96 +
   13.97 +    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
   13.98 +    if (!pxpt->u.trans_pt)
   13.99 +        return -ENOMEM;
  13.100 +
  13.101 +    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
  13.102 +    if (!pxpt->u.pt) {
  13.103 +        xfree(pxpt->u.trans_pt);
  13.104 +        return -ENOMEM;
  13.105 +    }
  13.106 +
  13.107 +    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
  13.108 +    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
  13.109 +
  13.110 +    pxpt->u.total = pmpt->perf.state_count;
  13.111 +    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
  13.112 +
  13.113 +    for (i=0; i < pmpt->perf.state_count; i++)
  13.114 +        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
  13.115 +
  13.116 +    pxpt->prev_state_wall = NOW();
  13.117 +    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
  13.118 +
  13.119 +    return 0;
  13.120 +}
  13.121 +
  13.122 +void cpufreq_statistic_exit(unsigned int cpuid)
  13.123 +{
  13.124 +    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
  13.125 +
  13.126 +    if (!pxpt)
  13.127 +        return;
  13.128 +    xfree(pxpt->u.trans_pt);
  13.129 +    xfree(pxpt->u.pt);
  13.130 +    memset(pxpt, 0, sizeof(struct pm_px));
  13.131 +}
  13.132 +
  13.133 +void cpufreq_statistic_reset(unsigned int cpuid)
  13.134 +{
  13.135 +    uint32_t i, j, count;
  13.136 +    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
  13.137 +    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
  13.138 +
  13.139 +    if ( !pxpt || !pmpt )
  13.140 +        return;
  13.141 +
  13.142 +    count = pmpt->perf.state_count;
  13.143 +
  13.144 +    for (i=0; i < count; i++) {
  13.145 +        pxpt->u.pt[i].residency = 0;
  13.146 +        pxpt->u.pt[i].count = 0;
  13.147 +
  13.148 +        for (j=0; j < count; j++)
  13.149 +            *(pxpt->u.trans_pt + i*count + j) = 0;
  13.150 +    }
  13.151 +
  13.152 +    pxpt->prev_state_wall = NOW();
  13.153 +    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
  13.154 +}
  13.155 +
  13.156 +
  13.157 +/*********************************************************************
  13.158 + *                   FREQUENCY TABLE HELPERS                         *
  13.159 + *********************************************************************/
  13.160 +
  13.161 +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
  13.162 +                                    struct cpufreq_frequency_table *table)
  13.163 +{
  13.164 +    unsigned int min_freq = ~0;
  13.165 +    unsigned int max_freq = 0;
  13.166 +    unsigned int i;
  13.167 +
  13.168 +    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
  13.169 +        unsigned int freq = table[i].frequency;
  13.170 +        if (freq == CPUFREQ_ENTRY_INVALID)
  13.171 +            continue;
  13.172 +        if (freq < min_freq)
  13.173 +            min_freq = freq;
  13.174 +        if (freq > max_freq)
  13.175 +            max_freq = freq;
  13.176 +    }
  13.177 +
  13.178 +    policy->min = policy->cpuinfo.min_freq = min_freq;
  13.179 +    policy->max = policy->cpuinfo.max_freq = max_freq;
  13.180 +
  13.181 +    if (policy->min == ~0)
  13.182 +        return -EINVAL;
  13.183 +    else
  13.184 +        return 0;
  13.185 +}
  13.186 +
  13.187 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
  13.188 +                                   struct cpufreq_frequency_table *table)
  13.189 +{
  13.190 +    unsigned int next_larger = ~0;
  13.191 +    unsigned int i;
  13.192 +    unsigned int count = 0;
  13.193 +
  13.194 +    if (!cpu_online(policy->cpu))
  13.195 +        return -EINVAL;
  13.196 +
  13.197 +    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
  13.198 +                                 policy->cpuinfo.max_freq);
  13.199 +
  13.200 +    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
  13.201 +        unsigned int freq = table[i].frequency;
  13.202 +        if (freq == CPUFREQ_ENTRY_INVALID)
  13.203 +            continue;
  13.204 +        if ((freq >= policy->min) && (freq <= policy->max))
  13.205 +            count++;
  13.206 +        else if ((next_larger > freq) && (freq > policy->max))
  13.207 +            next_larger = freq;
  13.208 +    }
  13.209 +
  13.210 +    if (!count)
  13.211 +        policy->max = next_larger;
  13.212 +
  13.213 +    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
  13.214 +                                 policy->cpuinfo.max_freq);
  13.215 +
  13.216 +    return 0;
  13.217 +}
  13.218 +
  13.219 +int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
  13.220 +                                   struct cpufreq_frequency_table *table,
  13.221 +                                   unsigned int target_freq,
  13.222 +                                   unsigned int relation,
  13.223 +                                   unsigned int *index)
  13.224 +{
  13.225 +    struct cpufreq_frequency_table optimal = {
  13.226 +        .index = ~0,
  13.227 +        .frequency = 0,
  13.228 +    };
  13.229 +    struct cpufreq_frequency_table suboptimal = {
  13.230 +        .index = ~0,
  13.231 +        .frequency = 0,
  13.232 +    };
  13.233 +    unsigned int i;
  13.234 +
  13.235 +    switch (relation) {
  13.236 +    case CPUFREQ_RELATION_H:
  13.237 +        suboptimal.frequency = ~0;
  13.238 +        break;
  13.239 +    case CPUFREQ_RELATION_L:
  13.240 +        optimal.frequency = ~0;
  13.241 +        break;
  13.242 +    }
  13.243 +
  13.244 +    if (!cpu_online(policy->cpu))
  13.245 +        return -EINVAL;
  13.246 +
  13.247 +    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
  13.248 +        unsigned int freq = table[i].frequency;
  13.249 +        if (freq == CPUFREQ_ENTRY_INVALID)
  13.250 +            continue;
  13.251 +        if ((freq < policy->min) || (freq > policy->max))
  13.252 +            continue;
  13.253 +        switch(relation) {
  13.254 +        case CPUFREQ_RELATION_H:
  13.255 +            if (freq <= target_freq) {
  13.256 +                if (freq >= optimal.frequency) {
  13.257 +                    optimal.frequency = freq;
  13.258 +                    optimal.index = i;
  13.259 +                }
  13.260 +            } else {
  13.261 +                if (freq <= suboptimal.frequency) {
  13.262 +                    suboptimal.frequency = freq;
  13.263 +                    suboptimal.index = i;
  13.264 +                }
  13.265 +            }
  13.266 +            break;
  13.267 +        case CPUFREQ_RELATION_L:
  13.268 +            if (freq >= target_freq) {
  13.269 +                if (freq <= optimal.frequency) {
  13.270 +                    optimal.frequency = freq;
  13.271 +                    optimal.index = i;
  13.272 +                }
  13.273 +            } else {
  13.274 +                if (freq >= suboptimal.frequency) {
  13.275 +                    suboptimal.frequency = freq;
  13.276 +                    suboptimal.index = i;
  13.277 +                }
  13.278 +            }
  13.279 +            break;
  13.280 +        }
  13.281 +    }
  13.282 +    if (optimal.index > i) {
  13.283 +        if (suboptimal.index > i)
  13.284 +            return -EINVAL;
  13.285 +        *index = suboptimal.index;
  13.286 +    } else
  13.287 +        *index = optimal.index;
  13.288 +
  13.289 +    return 0;
  13.290 +}
  13.291 +
  13.292 +
  13.293 +/*********************************************************************
  13.294 + *               GOVERNORS                                           *
  13.295 + *********************************************************************/
  13.296 +
  13.297 +int __cpufreq_driver_target(struct cpufreq_policy *policy,
  13.298 +                            unsigned int target_freq,
  13.299 +                            unsigned int relation)
  13.300 +{
  13.301 +    int retval = -EINVAL;
  13.302 +
  13.303 +    if (cpu_online(policy->cpu) && cpufreq_driver->target)
  13.304 +        retval = cpufreq_driver->target(policy, target_freq, relation);
  13.305 +
  13.306 +    return retval;
  13.307 +}
  13.308 +
  13.309 +int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
  13.310 +{
  13.311 +    int ret = 0;
  13.312 +
  13.313 +    if (!policy)
  13.314 +        return -EINVAL;
  13.315 +
  13.316 +    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
  13.317 +        ret = cpufreq_driver->getavg(policy->cpu);
  13.318 +
  13.319 +    return ret;
  13.320 +}
  13.321 +
  13.322 +
  13.323 +/*********************************************************************
  13.324 + *                 POLICY                                            *
  13.325 + *********************************************************************/
  13.326 +
  13.327 +/*
  13.328 + * data   : current policy.
  13.329 + * policy : policy to be set.
  13.330 + */
  13.331 +int __cpufreq_set_policy(struct cpufreq_policy *data,
  13.332 +                                struct cpufreq_policy *policy)
  13.333 +{
  13.334 +    int ret = 0;
  13.335 +
  13.336 +    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
  13.337 +
  13.338 +    if (policy->min > data->min && policy->min > policy->max)
  13.339 +        return -EINVAL;
  13.340 +
  13.341 +    /* verify the cpu speed can be set within this limit */
  13.342 +    ret = cpufreq_driver->verify(policy);
  13.343 +    if (ret)
  13.344 +        return ret;
  13.345 +
  13.346 +    data->min = policy->min;
  13.347 +    data->max = policy->max;
  13.348 +
  13.349 +    if (policy->governor != data->governor) {
  13.350 +        /* save old, working values */
  13.351 +        struct cpufreq_governor *old_gov = data->governor;
  13.352 +
  13.353 +        /* end old governor */
  13.354 +        if (data->governor)
  13.355 +            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
  13.356 +
  13.357 +        /* start new governor */
  13.358 +        data->governor = policy->governor;
  13.359 +        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
  13.360 +            /* new governor failed, so re-start old one */
  13.361 +            if (old_gov) {
  13.362 +                data->governor = old_gov;
  13.363 +                __cpufreq_governor(data, CPUFREQ_GOV_START);
  13.364 +            }
  13.365 +            return -EINVAL;
  13.366 +        }
  13.367 +        /* might be a policy change, too, so fall through */
  13.368 +    }
  13.369 +
  13.370 +    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
  13.371 +}
    14.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Fri Sep 26 11:12:29 2008 +0100
    14.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Sep 26 14:04:38 2008 +0100
    14.3 @@ -21,6 +21,13 @@
    14.4  
    14.5  struct cpufreq_governor;
    14.6  
    14.7 +struct acpi_cpufreq_data {
    14.8 +    struct processor_performance *acpi_data;
    14.9 +    struct cpufreq_frequency_table *freq_table;
   14.10 +    unsigned int max_freq;
   14.11 +    unsigned int cpu_feature;
   14.12 +};
   14.13 +
   14.14  struct cpufreq_cpuinfo {
   14.15      unsigned int        max_freq;
   14.16      unsigned int        min_freq;
    15.1 --- a/xen/include/acpi/cpufreq/processor_perf.h	Fri Sep 26 11:12:29 2008 +0100
    15.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h	Fri Sep 26 14:04:38 2008 +0100
    15.3 @@ -9,10 +9,10 @@
    15.4  int get_cpu_id(u8);
    15.5  int powernow_cpufreq_init(void);
    15.6  
    15.7 -void px_statistic_update(cpumask_t, uint8_t, uint8_t);
    15.8 -int  px_statistic_init(unsigned int);
    15.9 -void px_statistic_exit(unsigned int);
   15.10 -void px_statistic_reset(unsigned int);
   15.11 +void cpufreq_statistic_update(cpumask_t, uint8_t, uint8_t);
   15.12 +int  cpufreq_statistic_init(unsigned int);
   15.13 +void cpufreq_statistic_exit(unsigned int);
   15.14 +void cpufreq_statistic_reset(unsigned int);
   15.15  
   15.16  int  cpufreq_limit_change(unsigned int);
   15.17  
   15.18 @@ -58,6 +58,6 @@ struct pm_px {
   15.19      uint64_t prev_idle_wall;
   15.20  };
   15.21  
   15.22 -extern struct pm_px *px_statistic_data[NR_CPUS];
   15.23 +extern struct pm_px *cpufreq_statistic_data[NR_CPUS];
   15.24  
   15.25  #endif /* __XEN_PROCESSOR_PM_H__ */