ia64/xen-unstable

changeset 18889:de7fd862ada2

cpufreq: allow customization of some parameters

Short of having a way for powersaved to dynamically adjust these
values, at least allow specifying them on the command line. In
particular, always running at an up-threshold of 80% is perhaps nice
for laptop use, but certainly not desirable on servers. On shell
scripts invoking large numbers of short-lived processes I noticed a
50% performance degradation on a dual-socket quad-core Barcelona just
because of the load of an individual core never crossing the 80%
boundary that would have resulted in increasing the frequency.

(Powersaved on SLE10 sets this on native kernels to 60% or 80%,
depending on whether performance or power reduction is preferred,
*divided* by the number of CPUs, but capped at the lower limit of
20%.)

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Dec 05 15:23:32 2008 +0000 (2008-12-05)
parents 3905cbf523b2
children d206692cbcbe
files xen/common/domain.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/include/acpi/cpufreq/cpufreq.h
line diff
     1.1 --- a/xen/common/domain.c	Fri Dec 05 15:22:43 2008 +0000
     1.2 +++ b/xen/common/domain.c	Fri Dec 05 15:23:32 2008 +0000
     1.3 @@ -25,6 +25,7 @@
     1.4  #include <xen/percpu.h>
     1.5  #include <xen/multicall.h>
     1.6  #include <xen/rcupdate.h>
     1.7 +#include <acpi/cpufreq/cpufreq.h>
     1.8  #include <asm/debugger.h>
     1.9  #include <public/sched.h>
    1.10  #include <public/vcpu.h>
    1.11 @@ -41,16 +42,25 @@ boolean_param("dom0_vcpus_pin", opt_dom0
    1.12  enum cpufreq_controller cpufreq_controller;
    1.13  static void __init setup_cpufreq_option(char *str)
    1.14  {
    1.15 +    char *arg;
    1.16 +
    1.17      if ( !strcmp(str, "dom0-kernel") )
    1.18      {
    1.19          xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
    1.20          cpufreq_controller = FREQCTL_dom0_kernel;
    1.21          opt_dom0_vcpus_pin = 1;
    1.22 +        return;
    1.23      }
    1.24 -    else if ( !strcmp(str, "xen") )
    1.25 +
    1.26 +    if ( (arg = strpbrk(str, ",:")) != NULL )
    1.27 +        *arg++ = '\0';
    1.28 +
    1.29 +    if ( !strcmp(str, "xen") )
    1.30      {
    1.31          xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
    1.32          cpufreq_controller = FREQCTL_xen;
    1.33 +        if ( arg && *arg )
    1.34 +            cpufreq_cmdline_parse(arg);
    1.35      }
    1.36  }
    1.37  custom_param("cpufreq", setup_cpufreq_option);
     2.1 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Dec 05 15:22:43 2008 +0000
     2.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Dec 05 15:23:32 2008 +0000
     2.3 @@ -22,15 +22,22 @@
     2.4  #include <acpi/cpufreq/cpufreq.h>
     2.5  
     2.6  #define DEF_FREQUENCY_UP_THRESHOLD              (80)
     2.7 +#define MIN_FREQUENCY_UP_THRESHOLD              (11)
     2.8 +#define MAX_FREQUENCY_UP_THRESHOLD              (100)
     2.9  
    2.10  #define MIN_DBS_INTERVAL                        (MICROSECS(100))
    2.11 -#define MIN_SAMPLING_MILLISECS                  (20)
    2.12 -#define MIN_STAT_SAMPLING_RATE                   \
    2.13 +#define MIN_SAMPLING_RATE_RATIO                 (2)
    2.14 +#define MIN_SAMPLING_MILLISECS                  (MIN_SAMPLING_RATE_RATIO * 10)
    2.15 +#define MIN_STAT_SAMPLING_RATE                  \
    2.16      (MIN_SAMPLING_MILLISECS * MILLISECS(1))
    2.17 +#define MIN_SAMPLING_RATE                       \
    2.18 +    (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
    2.19 +#define MAX_SAMPLING_RATE                       (500 * def_sampling_rate)
    2.20  #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
    2.21  #define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
    2.22  
    2.23  static uint64_t def_sampling_rate;
    2.24 +static uint64_t usr_sampling_rate;
    2.25  
    2.26  /* Sampling types */
    2.27  enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
    2.28 @@ -42,11 +49,9 @@ static unsigned int dbs_enable;    /* nu
    2.29  static struct dbs_tuners {
    2.30      uint64_t     sampling_rate;
    2.31      unsigned int up_threshold;
    2.32 -    unsigned int ignore_nice;
    2.33      unsigned int powersave_bias;
    2.34  } dbs_tuners_ins = {
    2.35      .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
    2.36 -    .ignore_nice = 0,
    2.37      .powersave_bias = 0,
    2.38  };
    2.39  
    2.40 @@ -216,7 +221,20 @@ int cpufreq_governor_dbs(struct cpufreq_
    2.41              if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
    2.42                  def_sampling_rate = MIN_STAT_SAMPLING_RATE;
    2.43  
    2.44 -            dbs_tuners_ins.sampling_rate = def_sampling_rate;
    2.45 +            if (!usr_sampling_rate)
    2.46 +                dbs_tuners_ins.sampling_rate = def_sampling_rate;
    2.47 +            else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
    2.48 +                printk(KERN_WARNING "cpufreq/ondemand: "
    2.49 +                       "specified sampling rate too low, using %"PRIu64"\n",
    2.50 +                       MIN_SAMPLING_RATE);
    2.51 +                dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
    2.52 +            } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
    2.53 +                printk(KERN_WARNING "cpufreq/ondemand: "
    2.54 +                       "specified sampling rate too high, using %"PRIu64"\n",
    2.55 +                       MAX_SAMPLING_RATE);
    2.56 +                dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
    2.57 +            } else
    2.58 +                dbs_tuners_ins.sampling_rate = usr_sampling_rate;
    2.59          }
    2.60          dbs_timer_init(this_dbs_info);
    2.61  
    2.62 @@ -244,3 +262,55 @@ struct cpufreq_governor cpufreq_gov_dbs 
    2.63      .name = "ondemand",
    2.64      .governor = cpufreq_governor_dbs,
    2.65  };
    2.66 +
    2.67 +void __init cpufreq_cmdline_parse(char *str)
    2.68 +{
    2.69 +    do {
    2.70 +        char *val, *end = strchr(str, ',');
    2.71 +
    2.72 +        if ( end )
    2.73 +            *end++ = '\0';
    2.74 +        val = strchr(str, '=');
    2.75 +        if ( val )
    2.76 +            *val = '\0';
    2.77 +
    2.78 +        if ( !strcmp(str, "rate") && val )
    2.79 +        {
    2.80 +            usr_sampling_rate = simple_strtoull(val, NULL, 0);
    2.81 +        }
    2.82 +        else if ( !strcmp(str, "threshold") && val )
    2.83 +        {
    2.84 +            unsigned long tmp = simple_strtoul(val, NULL, 0);
    2.85 +
    2.86 +            if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
    2.87 +            {
    2.88 +                printk(XENLOG_WARNING "cpufreq/ondemand: "
    2.89 +                       "specified threshold too low, using %d\n",
    2.90 +                       MIN_FREQUENCY_UP_THRESHOLD);
    2.91 +                tmp = MIN_FREQUENCY_UP_THRESHOLD;
    2.92 +            }
    2.93 +            else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
    2.94 +            {
    2.95 +                printk(XENLOG_WARNING "cpufreq/ondemand: "
    2.96 +                       "specified threshold too high, using %d\n",
    2.97 +                       MAX_FREQUENCY_UP_THRESHOLD);
    2.98 +                tmp = MAX_FREQUENCY_UP_THRESHOLD;
    2.99 +            }
   2.100 +            dbs_tuners_ins.up_threshold = tmp;
   2.101 +        }
   2.102 +        else if ( !strcmp(str, "bias") && val )
   2.103 +        {
   2.104 +            unsigned long tmp = simple_strtoul(val, NULL, 0);
   2.105 +
   2.106 +            if ( tmp > 1000 )
   2.107 +            {
   2.108 +                printk(XENLOG_WARNING "cpufreq/ondemand: "
   2.109 +                       "specified bias too high, using 1000\n");
   2.110 +                tmp = 1000;
   2.111 +            }
   2.112 +            dbs_tuners_ins.powersave_bias = tmp;
   2.113 +        }
   2.114 +
   2.115 +        str = end;
   2.116 +    } while ( str );
   2.117 +}
     3.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Fri Dec 05 15:22:43 2008 +0000
     3.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Dec 05 15:23:32 2008 +0000
     3.3 @@ -55,6 +55,8 @@ extern struct cpufreq_policy *cpufreq_cp
     3.4  extern int __cpufreq_set_policy(struct cpufreq_policy *data,
     3.5                                  struct cpufreq_policy *policy);
     3.6  
     3.7 +void cpufreq_cmdline_parse(char *);
     3.8 +
     3.9  #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
    3.10  #define CPUFREQ_SHARED_TYPE_HW   (1) /* HW does needed coordination */
    3.11  #define CPUFREQ_SHARED_TYPE_ALL  (2) /* All dependent CPUs should set freq */