ia64/xen-unstable

changeset 19541:0108af6efdae

Fix cpufreq HW-ALL coordination handle

Currently cpufreq HW-ALL coordination is handled same way as SW-ALL.
However, SW-ALL will bring more IPIs which is bad for cpuidle.
This patch implement HW-ALL coordination handled in different way from
SW-ALL, for the sake of performance and reduce IPIs. We also
suspend/resume HW-ALL dbs timer for idle.

Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Tian, Kevin <kevin.tian@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Apr 14 11:20:55 2009 +0100 (2009-04-14)
parents e15d30dfb600
children cbaae05c2902
files xen/arch/x86/acpi/cpu_idle.c xen/arch/x86/acpi/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/include/acpi/cpufreq/cpufreq.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Tue Apr 14 11:20:02 2009 +0100
     1.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Tue Apr 14 11:20:55 2009 +0100
     1.3 @@ -47,6 +47,7 @@
     1.4  #include <asm/processor.h>
     1.5  #include <public/platform.h>
     1.6  #include <public/sysctl.h>
     1.7 +#include <acpi/cpufreq/cpufreq.h>
     1.8  
     1.9  /*#define DEBUG_PM_CX*/
    1.10  
    1.11 @@ -195,6 +196,8 @@ static void acpi_processor_idle(void)
    1.12      int sleep_ticks = 0;
    1.13      u32 t1, t2 = 0;
    1.14  
    1.15 +    cpufreq_dbs_timer_suspend();
    1.16 +
    1.17      sched_tick_suspend();
    1.18      /*
    1.19       * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
    1.20 @@ -214,6 +217,7 @@ static void acpi_processor_idle(void)
    1.21      {
    1.22          local_irq_enable();
    1.23          sched_tick_resume();
    1.24 +        cpufreq_dbs_timer_resume();
    1.25          return;
    1.26      }
    1.27  
    1.28 @@ -234,6 +238,7 @@ static void acpi_processor_idle(void)
    1.29          else
    1.30              acpi_safe_halt();
    1.31          sched_tick_resume();
    1.32 +        cpufreq_dbs_timer_resume();
    1.33          return;
    1.34      }
    1.35  
    1.36 @@ -341,6 +346,7 @@ static void acpi_processor_idle(void)
    1.37      default:
    1.38          local_irq_enable();
    1.39          sched_tick_resume();
    1.40 +        cpufreq_dbs_timer_resume();
    1.41          return;
    1.42      }
    1.43  
    1.44 @@ -352,6 +358,7 @@ static void acpi_processor_idle(void)
    1.45      }
    1.46  
    1.47      sched_tick_resume();
    1.48 +    cpufreq_dbs_timer_resume();
    1.49  
    1.50      if ( cpuidle_current_governor->reflect )
    1.51          cpuidle_current_governor->reflect(power);
     2.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Tue Apr 14 11:20:02 2009 +0100
     2.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Tue Apr 14 11:20:55 2009 +0100
     2.3 @@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd
     2.4  
     2.5  static void drv_write(struct drv_cmd *cmd)
     2.6  {
     2.7 -    on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
     2.8 +    if ((cpus_weight(cmd->mask) ==  1) &&
     2.9 +        cpu_isset(smp_processor_id(), cmd->mask))
    2.10 +        do_drv_write((void *)cmd);
    2.11 +    else
    2.12 +        on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
    2.13  }
    2.14  
    2.15  static u32 get_cur_val(cpumask_t mask)
     3.1 --- a/xen/drivers/cpufreq/cpufreq.c	Tue Apr 14 11:20:02 2009 +0100
     3.2 +++ b/xen/drivers/cpufreq/cpufreq.c	Tue Apr 14 11:20:55 2009 +0100
     3.3 @@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu)
     3.4      int ret = 0;
     3.5      unsigned int firstcpu;
     3.6      unsigned int dom, domexist = 0;
     3.7 -    unsigned int j;
     3.8 +    unsigned int hw_all = 0;
     3.9      struct list_head *pos;
    3.10      struct cpufreq_dom *cpufreq_dom = NULL;
    3.11      struct cpufreq_policy new_policy;
    3.12 @@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu)
    3.13      if (cpufreq_cpu_policy[cpu])
    3.14          return 0;
    3.15  
    3.16 -    ret = cpufreq_statistic_init(cpu);
    3.17 -    if (ret)
    3.18 -        return ret;
    3.19 +    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
    3.20 +        hw_all = 1;
    3.21  
    3.22      dom = perf->domain_info.domain;
    3.23  
    3.24 @@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu)
    3.25          }
    3.26      }
    3.27  
    3.28 -    if (domexist) {
    3.29 -        /* share policy with the first cpu since on same boat */
    3.30 +    if (!domexist) {
    3.31 +        cpufreq_dom = xmalloc(struct cpufreq_dom);
    3.32 +        if (!cpufreq_dom)
    3.33 +            return -ENOMEM;
    3.34 +
    3.35 +        memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
    3.36 +        cpufreq_dom->dom = dom;
    3.37 +        list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
    3.38 +    } else {
    3.39 +        /* domain sanity check under whatever coordination type */
    3.40 +        firstcpu = first_cpu(cpufreq_dom->map);
    3.41 +        if ((perf->domain_info.coord_type !=
    3.42 +            processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
    3.43 +            (perf->domain_info.num_processors !=
    3.44 +            processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
    3.45 +            return -EINVAL;
    3.46 +        }
    3.47 +    }
    3.48 +
    3.49 +    if (!domexist || hw_all) {
    3.50 +        policy = xmalloc(struct cpufreq_policy);
    3.51 +        if (!policy)
    3.52 +            ret = -ENOMEM;
    3.53 +
    3.54 +        memset(policy, 0, sizeof(struct cpufreq_policy));
    3.55 +        policy->cpu = cpu;
    3.56 +        cpufreq_cpu_policy[cpu] = policy;
    3.57 +
    3.58 +        ret = cpufreq_driver->init(policy);
    3.59 +        if (ret) {
    3.60 +            xfree(policy);
    3.61 +            return ret;
    3.62 +        }
    3.63 +        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
    3.64 +    } else {
    3.65          firstcpu = first_cpu(cpufreq_dom->map);
    3.66          policy = cpufreq_cpu_policy[firstcpu];
    3.67  
    3.68          cpufreq_cpu_policy[cpu] = policy;
    3.69 -        cpu_set(cpu, cpufreq_dom->map);
    3.70 -        cpu_set(cpu, policy->cpus);
    3.71 -
    3.72 -        /* domain coordination sanity check */
    3.73 -        if ((perf->domain_info.coord_type !=
    3.74 -             processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
    3.75 -            (perf->domain_info.num_processors !=
    3.76 -             processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
    3.77 -            ret = -EINVAL;
    3.78 -            goto err2;
    3.79 -        }
    3.80 -
    3.81          printk(KERN_EMERG"adding CPU %u\n", cpu);
    3.82 -    } else {
    3.83 -        cpufreq_dom = xmalloc(struct cpufreq_dom);
    3.84 -        if (!cpufreq_dom) {
    3.85 -            cpufreq_statistic_exit(cpu);
    3.86 -            return -ENOMEM;
    3.87 -        }
    3.88 -        memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
    3.89 -        cpufreq_dom->dom = dom;
    3.90 -        cpu_set(cpu, cpufreq_dom->map);
    3.91 -        list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
    3.92 -
    3.93 -        /* for the first cpu, setup policy and do init work */
    3.94 -        policy = xmalloc(struct cpufreq_policy);
    3.95 -        if (!policy) {
    3.96 -            list_del(&cpufreq_dom->node);
    3.97 -            xfree(cpufreq_dom);
    3.98 -            cpufreq_statistic_exit(cpu);
    3.99 -            return -ENOMEM;
   3.100 -        }
   3.101 -        memset(policy, 0, sizeof(struct cpufreq_policy));
   3.102 -        policy->cpu = cpu;
   3.103 -        cpu_set(cpu, policy->cpus);
   3.104 -        cpufreq_cpu_policy[cpu] = policy;
   3.105 -
   3.106 -        ret = cpufreq_driver->init(policy);
   3.107 -        if (ret)
   3.108 -            goto err1;
   3.109 -        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
   3.110      }
   3.111  
   3.112 -    /*
   3.113 -     * After get full cpumap of the coordination domain,
   3.114 -     * we can safely start gov here.
   3.115 -     */
   3.116 -    if (cpus_weight(cpufreq_dom->map) ==
   3.117 -        perf->domain_info.num_processors) {
   3.118 +    cpu_set(cpu, policy->cpus);
   3.119 +    cpu_set(cpu, cpufreq_dom->map);
   3.120 +
   3.121 +    ret = cpufreq_statistic_init(cpu);
   3.122 +    if (ret)
   3.123 +        goto err1;
   3.124 +
   3.125 +    if (hw_all ||
   3.126 +        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) {
   3.127          memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
   3.128          policy->governor = NULL;
   3.129  
   3.130 @@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu)
   3.131      return 0;
   3.132  
   3.133  err2:
   3.134 -    cpufreq_driver->exit(policy);
   3.135 +    cpufreq_statistic_exit(cpu);
   3.136  err1:
   3.137 -    for_each_cpu_mask(j, cpufreq_dom->map) {
   3.138 -        cpufreq_cpu_policy[j] = NULL;
   3.139 -        cpufreq_statistic_exit(j);
   3.140 +    cpufreq_cpu_policy[cpu] = NULL;
   3.141 +    cpu_clear(cpu, policy->cpus);
   3.142 +    cpu_clear(cpu, cpufreq_dom->map);
   3.143 +
   3.144 +    if (cpus_empty(policy->cpus)) {
   3.145 +        cpufreq_driver->exit(policy);
   3.146 +        xfree(policy);
   3.147      }
   3.148  
   3.149 -    list_del(&cpufreq_dom->node);
   3.150 -    xfree(cpufreq_dom);
   3.151 -    xfree(policy);
   3.152 +    if (cpus_empty(cpufreq_dom->map)) {
   3.153 +        list_del(&cpufreq_dom->node);
   3.154 +        xfree(cpufreq_dom);
   3.155 +    }
   3.156 +
   3.157      return ret;
   3.158  }
   3.159  
   3.160  int cpufreq_del_cpu(unsigned int cpu)
   3.161  {
   3.162      unsigned int dom, domexist = 0;
   3.163 +    unsigned int hw_all = 0;
   3.164      struct list_head *pos;
   3.165      struct cpufreq_dom *cpufreq_dom = NULL;
   3.166      struct cpufreq_policy *policy;
   3.167 @@ -270,6 +272,9 @@ int cpufreq_del_cpu(unsigned int cpu)
   3.168      if (!cpufreq_cpu_policy[cpu])
   3.169          return 0;
   3.170  
   3.171 +    if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
   3.172 +        hw_all = 1;
   3.173 +
   3.174      dom = perf->domain_info.domain;
   3.175      policy = cpufreq_cpu_policy[cpu];
   3.176  
   3.177 @@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu)
   3.178      if (!domexist)
   3.179          return -EINVAL;
   3.180  
   3.181 -    /* for the first cpu of the domain, stop gov */
   3.182 -    if (cpus_weight(cpufreq_dom->map) ==
   3.183 -        perf->domain_info.num_processors)
   3.184 +    /* for HW_ALL, stop gov for each core of the _PSD domain */
   3.185 +    /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
   3.186 +    if (hw_all ||
   3.187 +        (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors))
   3.188          __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
   3.189  
   3.190 +    cpufreq_statistic_exit(cpu);
   3.191      cpufreq_cpu_policy[cpu] = NULL;
   3.192      cpu_clear(cpu, policy->cpus);
   3.193      cpu_clear(cpu, cpufreq_dom->map);
   3.194 -    cpufreq_statistic_exit(cpu);
   3.195 +
   3.196 +    if (cpus_empty(policy->cpus)) {
   3.197 +        cpufreq_driver->exit(policy);
   3.198 +        xfree(policy);
   3.199 +    }
   3.200  
   3.201      /* for the last cpu of the domain, clean room */
   3.202      /* It's safe here to free freq_table, drv_data and policy */
   3.203 -    if (!cpus_weight(cpufreq_dom->map)) {
   3.204 -        cpufreq_driver->exit(policy);
   3.205 +    if (cpus_empty(cpufreq_dom->map)) {
   3.206          list_del(&cpufreq_dom->node);
   3.207          xfree(cpufreq_dom);
   3.208 -        xfree(policy);
   3.209      }
   3.210  
   3.211      printk(KERN_EMERG"deleting CPU %u\n", cpu);
     4.1 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c	Tue Apr 14 11:20:02 2009 +0100
     4.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c	Tue Apr 14 11:20:55 2009 +0100
     4.3 @@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_db
     4.4          (void *)dbs_info, dbs_info->cpu);
     4.5  
     4.6      set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
     4.7 +
     4.8 +    if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
     4.9 +            == CPUFREQ_SHARED_TYPE_HW )
    4.10 +    {
    4.11 +        dbs_info->stoppable = 1;
    4.12 +    }
    4.13  }
    4.14  
    4.15  static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
    4.16 @@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(
    4.17      cpufreq_unregister_governor(&cpufreq_gov_dbs);
    4.18  }
    4.19  __exitcall(cpufreq_gov_dbs_exit);
    4.20 +
    4.21 +void cpufreq_dbs_timer_suspend(void)
    4.22 +{
    4.23 +    int cpu;
    4.24 +
    4.25 +    cpu = smp_processor_id();
    4.26 +
    4.27 +    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
    4.28 +    {
    4.29 +        stop_timer( &dbs_timer[cpu] );
    4.30 +    }
    4.31 +}
    4.32 +
    4.33 +void cpufreq_dbs_timer_resume(void)
    4.34 +{
    4.35 +    int cpu;
    4.36 +    struct timer* t;
    4.37 +    s_time_t now;
    4.38 +
    4.39 +    cpu = smp_processor_id();
    4.40 +
    4.41 +    if ( per_cpu(cpu_dbs_info,cpu).stoppable )
    4.42 +    {
    4.43 +        now = NOW();
    4.44 +        t = &dbs_timer[cpu];
    4.45 +        if (t->expires <= now)
    4.46 +        {
    4.47 +            t->function(t->data);
    4.48 +        }
    4.49 +        else
    4.50 +        {
    4.51 +            set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
    4.52 +        }
    4.53 +    }
    4.54 +}
     5.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Tue Apr 14 11:20:02 2009 +0100
     5.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Tue Apr 14 11:20:55 2009 +0100
     5.3 @@ -221,6 +221,7 @@ struct cpu_dbs_info_s {
     5.4      struct cpufreq_frequency_table *freq_table;
     5.5      int cpu;
     5.6      unsigned int enable:1;
     5.7 +    unsigned int stoppable:1;
     5.8  };
     5.9  
    5.10  int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
    5.11 @@ -232,4 +233,7 @@ int write_ondemand_sampling_rate(unsigne
    5.12  int write_ondemand_up_threshold(unsigned int up_threshold);
    5.13  
    5.14  int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
    5.15 +
    5.16 +void cpufreq_dbs_timer_suspend(void);
    5.17 +void cpufreq_dbs_timer_resume(void);
    5.18  #endif /* __XEN_CPUFREQ_PM_H__ */