ia64/xen-unstable

changeset 19412:532e25fda238

cpufreq: Update cpufreq aperf and mperf read, so that it can be used
by both ondemand gov and user program

Current __get_measured_perf read aperf and mperf MSR and then clear
them for the sake of ondemand governor. This solution block user
program to get aperf and mperf on their purpose. In this patch, it no
longer clear aperf and mperf MSR, so that it can be used by both
ondemand gov and user program.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Mar 20 08:48:17 2009 +0000 (2009-03-20)
parents c657fc593306
children 0fc0de02856a
files xen/arch/x86/acpi/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/drivers/cpufreq/utility.c xen/include/acpi/cpufreq/cpufreq.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Mar 20 08:44:54 2009 +0000
     1.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Mar 20 08:48:17 2009 +0000
     1.3 @@ -232,6 +232,26 @@ static u32 get_cur_val(cpumask_t mask)
     1.4      return cmd.val;
     1.5  }
     1.6  
     1.7 +struct perf_pair {
     1.8 +    union {
     1.9 +        struct {
    1.10 +            uint32_t lo;
    1.11 +            uint32_t hi;
    1.12 +        } split;
    1.13 +        uint64_t whole;
    1.14 +    } aperf, mperf;
    1.15 +};
    1.16 +static DEFINE_PER_CPU(struct perf_pair, gov_perf_pair);
    1.17 +static DEFINE_PER_CPU(struct perf_pair, usr_perf_pair);
    1.18 +
    1.19 +static void read_measured_perf_ctrs(void *_readin)
    1.20 +{
    1.21 +    struct perf_pair *readin = _readin;
    1.22 +
    1.23 +    rdmsr(MSR_IA32_APERF, readin->aperf.split.lo, readin->aperf.split.hi);
    1.24 +    rdmsr(MSR_IA32_MPERF, readin->mperf.split.lo, readin->mperf.split.hi);
    1.25 +}
    1.26 +
    1.27  /*
    1.28   * Return the measured active (C0) frequency on this CPU since last call
    1.29   * to this function.
    1.30 @@ -245,40 +265,13 @@ static u32 get_cur_val(cpumask_t mask)
    1.31   * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
    1.32   * no meaning should be associated with absolute values of these MSRs.
    1.33   */
    1.34 -static void  __get_measured_perf(void *perf_percent)
    1.35 +static unsigned int get_measured_perf(unsigned int cpu, unsigned int flag)
    1.36  {
    1.37 -    unsigned int *ratio = perf_percent;
    1.38 -    union {
    1.39 -        struct {
    1.40 -            uint32_t lo;
    1.41 -            uint32_t hi;
    1.42 -        } split;
    1.43 -        uint64_t whole;
    1.44 -    } aperf_cur, mperf_cur;
    1.45 -
    1.46 -    rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
    1.47 -    rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
    1.48 -
    1.49 -    wrmsr(MSR_IA32_APERF, 0,0);
    1.50 -    wrmsr(MSR_IA32_MPERF, 0,0);
    1.51 -
    1.52 -    if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
    1.53 -        int shift_count = 7;
    1.54 -        aperf_cur.whole >>= shift_count;
    1.55 -        mperf_cur.whole >>= shift_count;
    1.56 -    }
    1.57 -
    1.58 -    if (aperf_cur.whole && mperf_cur.whole)
    1.59 -        *ratio = (aperf_cur.whole * 100) / mperf_cur.whole;
    1.60 -    else
    1.61 -        *ratio = 0;
    1.62 -}
    1.63 -
    1.64 -static unsigned int get_measured_perf(unsigned int cpu)
    1.65 -{
    1.66 -    struct cpufreq_policy *policy;
    1.67 +    struct cpufreq_policy *policy;    
    1.68 +    struct perf_pair readin, cur, *saved;
    1.69      unsigned int perf_percent;
    1.70      cpumask_t cpumask;
    1.71 +    unsigned int retval;
    1.72  
    1.73      if (!cpu_online(cpu))
    1.74          return 0;
    1.75 @@ -287,16 +280,80 @@ static unsigned int get_measured_perf(un
    1.76      if (!policy)
    1.77          return 0;
    1.78  
    1.79 -    /* Usually we take the short path (no IPI) for the sake of performance. */
    1.80 +    switch (flag)
    1.81 +    {
    1.82 +    case GOV_GETAVG:
    1.83 +    {
    1.84 +        saved = &per_cpu(gov_perf_pair, cpu);
    1.85 +        break;
    1.86 +    }
    1.87 +    case USR_GETAVG:
    1.88 +    {
    1.89 +        saved = &per_cpu(usr_perf_pair, cpu);
    1.90 +        break;
    1.91 +    }
    1.92 +    default:
    1.93 +        return 0;
    1.94 +    }
    1.95 +
    1.96      if (cpu == smp_processor_id()) {
    1.97 -        __get_measured_perf((void *)&perf_percent);
    1.98 +        read_measured_perf_ctrs((void *)&readin);
    1.99      } else {
   1.100          cpumask = cpumask_of_cpu(cpu);
   1.101 -        on_selected_cpus(cpumask, __get_measured_perf, 
   1.102 -                        (void *)&perf_percent,0,1);
   1.103 +        on_selected_cpus(cpumask, read_measured_perf_ctrs, 
   1.104 +                        (void *)&readin, 0, 1);
   1.105      }
   1.106  
   1.107 -    return drv_data[cpu]->max_freq * perf_percent / 100;
   1.108 +    cur.aperf.whole = readin.aperf.whole - saved->aperf.whole;
   1.109 +    cur.mperf.whole = readin.mperf.whole - saved->mperf.whole;
   1.110 +    saved->aperf.whole = readin.aperf.whole;
   1.111 +    saved->mperf.whole = readin.mperf.whole;
   1.112 +
   1.113 +#ifdef __i386__
   1.114 +    /*
   1.115 +     * We dont want to do 64 bit divide with 32 bit kernel
   1.116 +     * Get an approximate value. Return failure in case we cannot get
   1.117 +     * an approximate value.
   1.118 +     */
   1.119 +    if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
   1.120 +        int shift_count;
   1.121 +        uint32_t h;
   1.122 +
   1.123 +        h = max_t(uint32_t, cur.aperf.split.hi, cur.mperf.split.hi);
   1.124 +        shift_count = fls(h);
   1.125 +
   1.126 +        cur.aperf.whole >>= shift_count;
   1.127 +        cur.mperf.whole >>= shift_count;
   1.128 +    }
   1.129 +
   1.130 +    if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
   1.131 +        int shift_count = 7;
   1.132 +        cur.aperf.split.lo >>= shift_count;
   1.133 +        cur.mperf.split.lo >>= shift_count;
   1.134 +    }
   1.135 +
   1.136 +    if (cur.aperf.split.lo && cur.mperf.split.lo)
   1.137 +        perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
   1.138 +    else
   1.139 +        perf_percent = 0;
   1.140 +
   1.141 +#else
   1.142 +    if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
   1.143 +        int shift_count = 7;
   1.144 +        cur.aperf.whole >>= shift_count;
   1.145 +        cur.mperf.whole >>= shift_count;
   1.146 +    }
   1.147 +
   1.148 +    if (cur.aperf.whole && cur.mperf.whole)
   1.149 +        perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
   1.150 +    else
   1.151 +        perf_percent = 0;
   1.152 +
   1.153 +#endif
   1.154 +
   1.155 +    retval = drv_data[policy->cpu]->max_freq * perf_percent / 100;
   1.156 +
   1.157 +    return retval;
   1.158  }
   1.159  
   1.160  static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
     2.1 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Mar 20 08:44:54 2009 +0000
     2.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Mar 20 08:48:17 2009 +0000
     2.3 @@ -161,9 +161,7 @@ static void dbs_check_cpu(struct cpu_dbs
     2.4      if (load < (dbs_tuners_ins.up_threshold - 10)) {
     2.5          unsigned int freq_next, freq_cur;
     2.6  
     2.7 -        freq_cur = __cpufreq_driver_getavg(policy);
     2.8 -        if (!freq_cur)
     2.9 -            freq_cur = policy->cur;
    2.10 +        freq_cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG);
    2.11  
    2.12          freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
    2.13  
     3.1 --- a/xen/drivers/cpufreq/utility.c	Fri Mar 20 08:44:54 2009 +0000
     3.2 +++ b/xen/drivers/cpufreq/utility.c	Fri Mar 20 08:48:17 2009 +0000
     3.3 @@ -357,17 +357,23 @@ int __cpufreq_driver_target(struct cpufr
     3.4      return retval;
     3.5  }
     3.6  
     3.7 -int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
     3.8 +int cpufreq_driver_getavg(unsigned int cpu, unsigned int flag)
     3.9  {
    3.10 -    int ret = 0;
    3.11 +    struct cpufreq_policy *policy;
    3.12 +    int freq_avg;
    3.13  
    3.14 -    if (!policy)
    3.15 -        return -EINVAL;
    3.16 +    policy = cpufreq_cpu_policy[cpu];
    3.17 +    if (!cpu_online(cpu) || !policy)
    3.18 +        return 0;
    3.19  
    3.20 -    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
    3.21 -        ret = cpufreq_driver->getavg(policy->cpu);
    3.22 +    if (cpufreq_driver->getavg)
    3.23 +    {
    3.24 +        freq_avg = cpufreq_driver->getavg(cpu, flag);
    3.25 +        if (freq_avg > 0)
    3.26 +            return freq_avg;
    3.27 +    }
    3.28  
    3.29 -    return ret;
    3.30 +    return policy->cur;
    3.31  }
    3.32  
    3.33  
     4.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Fri Mar 20 08:44:54 2009 +0000
     4.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Mar 20 08:48:17 2009 +0000
     4.3 @@ -106,7 +106,10 @@ extern struct cpufreq_governor *__find_g
     4.4  extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
     4.5                                     unsigned int target_freq,
     4.6                                     unsigned int relation);
     4.7 -extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
     4.8 +
     4.9 +#define GOV_GETAVG     1
    4.10 +#define USR_GETAVG     2
    4.11 +extern int cpufreq_driver_getavg(unsigned int cpu, unsigned int flag);
    4.12  
    4.13  static __inline__ int 
    4.14  __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
    4.15 @@ -130,7 +133,7 @@ struct cpufreq_driver {
    4.16                       unsigned int target_freq,
    4.17                       unsigned int relation);
    4.18      unsigned int    (*get)(unsigned int cpu);
    4.19 -    unsigned int    (*getavg)(unsigned int cpu);
    4.20 +    unsigned int    (*getavg)(unsigned int cpu, unsigned int flag);
    4.21      int    (*exit)(struct cpufreq_policy *policy);
    4.22  };
    4.23