ia64/xen-unstable

changeset 18529:d8a2d117225c

x86/cpufreq: don't use static array for large per-CPU data structures

... as this is rather wasteful when Xen is configured to support many
CPUs but is running on systems having only a few.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Sep 22 15:24:03 2008 +0100 (2008-09-22)
parents 1c09b810f977
children ae29cd95ba7d
files xen/arch/x86/acpi/cpu_idle.c xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/powernow.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/pmstat.c xen/arch/x86/platform_hypercall.c xen/include/acpi/cpufreq/processor_perf.h
line diff
     1.1 --- a/xen/arch/x86/acpi/cpu_idle.c	Mon Sep 22 15:20:25 2008 +0100
     1.2 +++ b/xen/arch/x86/acpi/cpu_idle.c	Mon Sep 22 15:24:03 2008 +0100
     1.3 @@ -66,7 +66,7 @@ integer_param("max_cstate", max_cstate);
     1.4  static int local_apic_timer_c2_ok __read_mostly = 0;
     1.5  boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
     1.6  
     1.7 -static struct acpi_processor_power processor_powers[NR_CPUS];
     1.8 +static struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
     1.9  
    1.10  static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
    1.11  {
    1.12 @@ -91,8 +91,11 @@ static void print_acpi_power(uint32_t cp
    1.13  
    1.14  static void dump_cx(unsigned char key)
    1.15  {
    1.16 -    for( int i = 0; i < num_online_cpus(); i++ )
    1.17 -        print_acpi_power(i, &processor_powers[i]);
    1.18 +    unsigned int cpu;
    1.19 +
    1.20 +    for_each_online_cpu ( cpu )
    1.21 +        if (processor_powers[cpu])
    1.22 +            print_acpi_power(cpu, processor_powers[cpu]);
    1.23  }
    1.24  
    1.25  static int __init cpu_idle_key_init(void)
    1.26 @@ -193,14 +196,12 @@ static struct {
    1.27  
    1.28  static void acpi_processor_idle(void)
    1.29  {
    1.30 -    struct acpi_processor_power *power = NULL;
    1.31 +    struct acpi_processor_power *power = processor_powers[smp_processor_id()];
    1.32      struct acpi_processor_cx *cx = NULL;
    1.33      int next_state;
    1.34      int sleep_ticks = 0;
    1.35      u32 t1, t2 = 0;
    1.36  
    1.37 -    power = &processor_powers[smp_processor_id()];
    1.38 -
    1.39      /*
    1.40       * Interrupts must be disabled during bus mastering calculations and
    1.41       * for C2/C3 transitions.
    1.42 @@ -213,7 +214,7 @@ static void acpi_processor_idle(void)
    1.43          return;
    1.44      }
    1.45  
    1.46 -    next_state = cpuidle_current_governor->select(power);
    1.47 +    next_state = power ? cpuidle_current_governor->select(power) : -1;
    1.48      if ( next_state > 0 )
    1.49      {
    1.50          cx = &power->states[next_state];
    1.51 @@ -675,7 +676,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
    1.52          return -EFAULT;
    1.53      }
    1.54  
    1.55 -    acpi_power = &processor_powers[cpu_id];
    1.56 +    acpi_power = processor_powers[cpu_id];
    1.57 +    if ( !acpi_power )
    1.58 +    {
    1.59 +        acpi_power = xmalloc(struct acpi_processor_power);
    1.60 +        if ( !acpi_power )
    1.61 +            return -ENOMEM;
    1.62 +        memset(acpi_power, 0, sizeof(*acpi_power));
    1.63 +        processor_powers[cpu_id] = acpi_power;
    1.64 +    }
    1.65  
    1.66      init_cx_pminfo(acpi_power);
    1.67  
    1.68 @@ -713,19 +722,27 @@ long set_cx_pminfo(uint32_t cpu, struct 
    1.69  
    1.70  uint32_t pmstat_get_cx_nr(uint32_t cpuid)
    1.71  {
    1.72 -    return processor_powers[cpuid].count;
    1.73 +    return processor_powers[cpuid] ? processor_powers[cpuid]->count : 0;
    1.74  }
    1.75  
    1.76  int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
    1.77  {
    1.78 -    struct acpi_processor_power *power = &processor_powers[cpuid];
    1.79 +    const struct acpi_processor_power *power = processor_powers[cpuid];
    1.80      struct vcpu *v = idle_vcpu[cpuid];
    1.81      uint64_t usage;
    1.82      int i;
    1.83  
    1.84 +    if ( power == NULL )
    1.85 +    {
    1.86 +        stat->last = 0;
    1.87 +        stat->nr = 0;
    1.88 +        stat->idle_time = 0;
    1.89 +        return 0;
    1.90 +    }
    1.91 +
    1.92      stat->last = (power->last_state) ?
    1.93          (int)(power->last_state - &power->states[0]) : 0;
    1.94 -    stat->nr = processor_powers[cpuid].count;
    1.95 +    stat->nr = power->count;
    1.96      stat->idle_time = v->runstate.time[RUNSTATE_running];
    1.97      if ( v->is_running )
    1.98          stat->idle_time += NOW() - v->runstate.state_entry_time;
     2.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Mon Sep 22 15:20:25 2008 +0100
     2.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Mon Sep 22 15:24:03 2008 +0100
     2.3 @@ -389,12 +389,15 @@ static int acpi_cpufreq_target(struct cp
     2.4  
     2.5  static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
     2.6  {
     2.7 -    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
     2.8 -    struct processor_performance *perf = &processor_pminfo[policy->cpu].perf;
     2.9 +    struct acpi_cpufreq_data *data;
    2.10 +    struct processor_performance *perf;
    2.11  
    2.12 -    if (!policy || !data)
    2.13 +    if (!policy || !(data = drv_data[policy->cpu]) ||
    2.14 +        !processor_pminfo[policy->cpu])
    2.15          return -EINVAL;
    2.16  
    2.17 +    perf = &processor_pminfo[policy->cpu]->perf;
    2.18 +
    2.19      cpufreq_verify_within_limits(policy, 0, 
    2.20          perf->states[perf->platform_limit].core_frequency * 1000);
    2.21  
    2.22 @@ -447,7 +450,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
    2.23  
    2.24      drv_data[cpu] = data;
    2.25  
    2.26 -    data->acpi_data = &processor_pminfo[cpu].perf;
    2.27 +    data->acpi_data = &processor_pminfo[cpu]->perf;
    2.28  
    2.29      perf = data->acpi_data;
    2.30      policy->shared_type = perf->shared_type;
    2.31 @@ -580,11 +583,11 @@ static struct cpufreq_driver acpi_cpufre
    2.32  
    2.33  int cpufreq_limit_change(unsigned int cpu)
    2.34  {
    2.35 -    struct processor_performance *perf = &processor_pminfo[cpu].perf;
    2.36 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
    2.37      struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
    2.38      struct cpufreq_policy policy;
    2.39  
    2.40 -    if (!cpu_online(cpu) || !data)
    2.41 +    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
    2.42          return -ENODEV;
    2.43  
    2.44      if ((perf->platform_limit < 0) || 
    2.45 @@ -607,10 +610,10 @@ int cpufreq_add_cpu(unsigned int cpu)
    2.46      unsigned int j;
    2.47      struct cpufreq_policy new_policy;
    2.48      struct cpufreq_policy *policy;
    2.49 -    struct processor_performance *perf = &processor_pminfo[cpu].perf;
    2.50 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
    2.51  
    2.52      /* to protect the case when Px was not controlled by xen */
    2.53 -    if (!(perf->init & XEN_PX_INIT))
    2.54 +    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
    2.55          return 0;
    2.56  
    2.57      if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
    2.58 @@ -683,10 +686,10 @@ int cpufreq_del_cpu(unsigned int cpu)
    2.59  {
    2.60      unsigned int dom;
    2.61      struct cpufreq_policy *policy;
    2.62 -    struct processor_performance *perf = &processor_pminfo[cpu].perf;
    2.63 +    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
    2.64  
    2.65      /* to protect the case when Px was not controlled by xen */
    2.66 -    if (!(perf->init & XEN_PX_INIT))
    2.67 +    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
    2.68          return 0;
    2.69  
    2.70      if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
     3.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c	Mon Sep 22 15:20:25 2008 +0100
     3.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c	Mon Sep 22 15:24:03 2008 +0100
     3.3 @@ -49,9 +49,6 @@
     3.4  #define MSR_PSTATE_CTRL         0xc0010062 /* Pstate control MSR */
     3.5  #define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
     3.6  
     3.7 -extern struct processor_pminfo processor_pminfo[NR_CPUS];
     3.8 -extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
     3.9 -
    3.10  struct powernow_cpufreq_data {
    3.11      struct processor_performance *acpi_data;
    3.12      struct cpufreq_frequency_table *freq_table;
    3.13 @@ -149,7 +146,7 @@ static int powernow_cpufreq_cpu_init(str
    3.14  
    3.15      drv_data[cpu] = data;
    3.16  
    3.17 -    data->acpi_data = &processor_pminfo[cpu].perf;
    3.18 +    data->acpi_data = &processor_pminfo[cpu]->perf;
    3.19  
    3.20      perf = data->acpi_data;
    3.21      policy->shared_type = perf->shared_type;
    3.22 @@ -257,8 +254,8 @@ int powernow_cpufreq_init(void)
    3.23  	}
    3.24          if (ret)
    3.25              return ret;
    3.26 -        if (max_dom < processor_pminfo[i].perf.domain_info.domain)
    3.27 -            max_dom = processor_pminfo[i].perf.domain_info.domain;
    3.28 +        if (max_dom < processor_pminfo[i]->perf.domain_info.domain)
    3.29 +            max_dom = processor_pminfo[i]->perf.domain_info.domain;
    3.30      }
    3.31      max_dom++;
    3.32  
    3.33 @@ -274,13 +271,13 @@ int powernow_cpufreq_init(void)
    3.34  
    3.35      /* get cpumask of each psd domain */
    3.36      for_each_online_cpu(i) {
    3.37 -        __set_bit(processor_pminfo[i].perf.domain_info.domain, dom_mask);
    3.38 -        cpu_set(i, pt[processor_pminfo[i].perf.domain_info.domain]);
    3.39 +        __set_bit(processor_pminfo[i]->perf.domain_info.domain, dom_mask);
    3.40 +        cpu_set(i, pt[processor_pminfo[i]->perf.domain_info.domain]);
    3.41      }
    3.42  
    3.43      for_each_online_cpu(i)
    3.44 -        processor_pminfo[i].perf.shared_cpu_map = 
    3.45 -            pt[processor_pminfo[i].perf.domain_info.domain];
    3.46 +        processor_pminfo[i]->perf.shared_cpu_map =
    3.47 +            pt[processor_pminfo[i]->perf.domain_info.domain];
    3.48  
    3.49      cpufreq_driver = &powernow_cpufreq_driver;
    3.50  
     4.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c	Mon Sep 22 15:20:25 2008 +0100
     4.2 +++ b/xen/arch/x86/acpi/cpufreq/utility.c	Mon Sep 22 15:24:03 2008 +0100
     4.3 @@ -32,8 +32,8 @@
     4.4  #include <public/sysctl.h>
     4.5  
     4.6  struct cpufreq_driver   *cpufreq_driver;
     4.7 -struct processor_pminfo processor_pminfo[NR_CPUS];
     4.8 -struct cpufreq_policy   *cpufreq_cpu_policy[NR_CPUS];
     4.9 +struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
    4.10 +struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
    4.11  
    4.12  /*********************************************************************
    4.13   *                    Px STATISTIC INFO                              *
    4.14 @@ -47,11 +47,14 @@ void px_statistic_update(cpumask_t cpuma
    4.15      now = NOW();
    4.16  
    4.17      for_each_cpu_mask(i, cpumask) {
    4.18 -        struct pm_px *pxpt = &px_statistic_data[i];
    4.19 -        uint32_t statnum = processor_pminfo[i].perf.state_count;
    4.20 +        struct pm_px *pxpt = px_statistic_data[i];
    4.21 +        struct processor_pminfo *pmpt = processor_pminfo[i];
    4.22          uint64_t total_idle_ns;
    4.23          uint64_t tmp_idle_ns;
    4.24  
    4.25 +        if ( !pxpt || !pmpt )
    4.26 +            continue;
    4.27 +
    4.28          total_idle_ns = get_cpu_idle_time(i);
    4.29          tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
    4.30  
    4.31 @@ -61,7 +64,7 @@ void px_statistic_update(cpumask_t cpuma
    4.32          pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
    4.33          pxpt->u.pt[from].residency -= tmp_idle_ns;
    4.34  
    4.35 -        (*(pxpt->u.trans_pt + from*statnum + to))++;
    4.36 +        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
    4.37  
    4.38          pxpt->prev_state_wall = now;
    4.39          pxpt->prev_idle_wall = total_idle_ns;
    4.40 @@ -71,11 +74,23 @@ void px_statistic_update(cpumask_t cpuma
    4.41  int px_statistic_init(unsigned int cpuid)
    4.42  {
    4.43      uint32_t i, count;
    4.44 -    struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.45 -    struct processor_pminfo *pmpt = &processor_pminfo[cpuid];
    4.46 +    struct pm_px *pxpt = px_statistic_data[cpuid];
    4.47 +    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
    4.48  
    4.49      count = pmpt->perf.state_count;
    4.50  
    4.51 +    if ( !pmpt )
    4.52 +        return -EINVAL;
    4.53 +
    4.54 +    if ( !pxpt )
    4.55 +    {
    4.56 +        pxpt = xmalloc(struct pm_px);
    4.57 +        if ( !pxpt )
    4.58 +            return -ENOMEM;
    4.59 +        memset(pxpt, 0, sizeof(*pxpt));
    4.60 +        px_statistic_data[cpuid] = pxpt;
    4.61 +    }
    4.62 +
    4.63      pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
    4.64      if (!pxpt->u.trans_pt)
    4.65          return -ENOMEM;
    4.66 @@ -103,8 +118,10 @@ int px_statistic_init(unsigned int cpuid
    4.67  
    4.68  void px_statistic_exit(unsigned int cpuid)
    4.69  {
    4.70 -    struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.71 +    struct pm_px *pxpt = px_statistic_data[cpuid];
    4.72  
    4.73 +    if (!pxpt)
    4.74 +        return;
    4.75      xfree(pxpt->u.trans_pt);
    4.76      xfree(pxpt->u.pt);
    4.77      memset(pxpt, 0, sizeof(struct pm_px));
    4.78 @@ -113,9 +130,13 @@ void px_statistic_exit(unsigned int cpui
    4.79  void px_statistic_reset(unsigned int cpuid)
    4.80  {
    4.81      uint32_t i, j, count;
    4.82 -    struct pm_px *pxpt = &px_statistic_data[cpuid];
    4.83 +    struct pm_px *pxpt = px_statistic_data[cpuid];
    4.84 +    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
    4.85  
    4.86 -    count = processor_pminfo[cpuid].perf.state_count;
    4.87 +    if ( !pxpt || !pmpt )
    4.88 +        return;
    4.89 +
    4.90 +    count = pmpt->perf.state_count;
    4.91  
    4.92      for (i=0; i < count; i++) {
    4.93          pxpt->u.pt[i].residency = 0;
     5.1 --- a/xen/arch/x86/acpi/pmstat.c	Mon Sep 22 15:20:25 2008 +0100
     5.2 +++ b/xen/arch/x86/acpi/pmstat.c	Mon Sep 22 15:24:03 2008 +0100
     5.3 @@ -40,7 +40,7 @@
     5.4  #include <public/sysctl.h>
     5.5  #include <acpi/cpufreq/cpufreq.h>
     5.6  
     5.7 -struct pm_px px_statistic_data[NR_CPUS];
     5.8 +struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
     5.9  
    5.10  extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
    5.11  extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
    5.12 @@ -49,15 +49,14 @@ extern int pmstat_reset_cx_stat(uint32_t
    5.13  int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
    5.14  {
    5.15      int ret = 0;
    5.16 -    struct pm_px *pxpt = &px_statistic_data[op->cpuid];
    5.17 -    struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
    5.18 +    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
    5.19  
    5.20      /* to protect the case when Px was not controlled by xen */
    5.21 -    if ( (!(pmpt->perf.init & XEN_PX_INIT)) && 
    5.22 +    if ( (!pmpt || !(pmpt->perf.init & XEN_PX_INIT)) &&
    5.23          (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
    5.24          return -EINVAL;
    5.25  
    5.26 -    if ( !cpu_online(op->cpuid) )
    5.27 +    if ( op->cpuid >= NR_CPUS || !cpu_online(op->cpuid) )
    5.28          return -EINVAL;
    5.29  
    5.30      switch( op->type )
    5.31 @@ -73,6 +72,10 @@ int do_get_pm_info(struct xen_sysctl_get
    5.32          uint64_t now, ct;
    5.33          uint64_t total_idle_ns;
    5.34          uint64_t tmp_idle_ns;
    5.35 +        struct pm_px *pxpt = px_statistic_data[op->cpuid];
    5.36 +
    5.37 +        if ( !pxpt )
    5.38 +            return -ENODATA;
    5.39  
    5.40          total_idle_ns = get_cpu_idle_time(op->cpuid);
    5.41          tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
     6.1 --- a/xen/arch/x86/platform_hypercall.c	Mon Sep 22 15:20:25 2008 +0100
     6.2 +++ b/xen/arch/x86/platform_hypercall.c	Mon Sep 22 15:24:03 2008 +0100
     6.3 @@ -380,8 +380,19 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     6.4                  ret = -EINVAL;
     6.5                  break;
     6.6              }
     6.7 -            pmpt = &processor_pminfo[cpuid];
     6.8 -            pxpt = &processor_pminfo[cpuid].perf;
     6.9 +            pmpt = processor_pminfo[cpuid];
    6.10 +            if ( !pmpt )
    6.11 +            {
    6.12 +                pmpt = xmalloc(struct processor_pminfo);
    6.13 +                if ( !pmpt )
    6.14 +                {
    6.15 +                    ret = -ENOMEM;
    6.16 +                    break;
    6.17 +                }
    6.18 +                memset(pmpt, 0, sizeof(*pmpt));
    6.19 +                processor_pminfo[cpuid] = pmpt;
    6.20 +            }
    6.21 +            pxpt = &pmpt->perf;
    6.22              pmpt->acpi_id = xenpmpt->id;
    6.23              pmpt->id = cpuid;
    6.24  
     7.1 --- a/xen/include/acpi/cpufreq/processor_perf.h	Mon Sep 22 15:20:25 2008 +0100
     7.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h	Mon Sep 22 15:24:03 2008 +0100
     7.3 @@ -41,7 +41,7 @@ struct processor_pminfo {
     7.4      struct processor_performance    perf;
     7.5  };
     7.6  
     7.7 -extern struct processor_pminfo processor_pminfo[NR_CPUS];
     7.8 +extern struct processor_pminfo *processor_pminfo[NR_CPUS];
     7.9  
    7.10  struct px_stat {
    7.11      uint8_t total;        /* total Px states */
    7.12 @@ -58,6 +58,6 @@ struct pm_px {
    7.13      uint64_t prev_idle_wall;
    7.14  };
    7.15  
    7.16 -extern struct pm_px px_statistic_data[NR_CPUS];
    7.17 +extern struct pm_px *px_statistic_data[NR_CPUS];
    7.18  
    7.19  #endif /* __XEN_PROCESSOR_PM_H__ */