ia64/xen-unstable
changeset 18553:08374be21318
X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
arch
Rebase cpufreq logic for supporting both x86 and ia64 arch:
1. move cpufreq arch-independent logic into common dir
(xen/drivers/acpi
and xen/drivers/cpufreq dir);
2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir;
Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
arch
Rebase cpufreq logic for supporting both x86 and ia64 arch:
1. move cpufreq arch-independent logic into common dir
(xen/drivers/acpi
and xen/drivers/cpufreq dir);
2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir;
Signed-off-by: Yu, Ke <ke.yu@intel.com>
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Fri Sep 26 14:04:38 2008 +0100 (2008-09-26) |
parents | 5274aa966231 |
children | d1d9915041de |
files | xen/arch/x86/acpi/Makefile xen/arch/x86/acpi/cpufreq/Makefile xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/pmstat.c xen/drivers/Makefile xen/drivers/acpi/Makefile xen/drivers/acpi/pmstat.c xen/drivers/cpufreq/Makefile xen/drivers/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/drivers/cpufreq/utility.c xen/include/acpi/cpufreq/cpufreq.h xen/include/acpi/cpufreq/processor_perf.h |
line diff
1.1 --- a/xen/arch/x86/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100 1.2 +++ b/xen/arch/x86/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100 1.3 @@ -2,4 +2,3 @@ subdir-y += cpufreq 1.4 1.5 obj-y += boot.o 1.6 obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o 1.7 -obj-y += pmstat.o
2.1 --- a/xen/arch/x86/acpi/cpufreq/Makefile Fri Sep 26 11:12:29 2008 +0100 2.2 +++ b/xen/arch/x86/acpi/cpufreq/Makefile Fri Sep 26 14:04:38 2008 +0100 2.3 @@ -1,4 +1,2 @@ 2.4 obj-y += cpufreq.o 2.5 -obj-y += utility.o 2.6 -obj-y += cpufreq_ondemand.o 2.7 obj-y += powernow.o
3.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 26 11:12:29 2008 +0100 3.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 26 14:04:38 2008 +0100 3.3 @@ -45,9 +45,6 @@ 3.4 #include <acpi/acpi.h> 3.5 #include <acpi/cpufreq/cpufreq.h> 3.6 3.7 -/* TODO: change to link list later as domain number may be sparse */ 3.8 -static cpumask_t cpufreq_dom_map[NR_CPUS]; 3.9 - 3.10 enum { 3.11 UNDEFINED_CAPABLE = 0, 3.12 SYSTEM_INTEL_MSR_CAPABLE, 3.13 @@ -57,13 +54,6 @@ enum { 3.14 #define INTEL_MSR_RANGE (0xffff) 3.15 #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) 3.16 3.17 -struct acpi_cpufreq_data { 3.18 - struct processor_performance *acpi_data; 3.19 - struct cpufreq_frequency_table *freq_table; 3.20 - unsigned int max_freq; 3.21 - unsigned int cpu_feature; 3.22 -}; 3.23 - 3.24 static struct acpi_cpufreq_data *drv_data[NR_CPUS]; 3.25 3.26 static struct cpufreq_driver acpi_cpufreq_driver; 3.27 @@ -342,7 +332,7 @@ static int acpi_cpufreq_target(struct cp 3.28 policy->resume = 0; 3.29 } 3.30 else { 3.31 - printk(KERN_INFO "Already at target state (P%d)\n", 3.32 + printk(KERN_DEBUG "Already at target state (P%d)\n", 3.33 next_perf_state); 3.34 return 0; 3.35 } 3.36 @@ -379,7 +369,7 @@ static int acpi_cpufreq_target(struct cp 3.37 if (!check_freqs(cmd.mask, freqs.new, data)) 3.38 return -EAGAIN; 3.39 3.40 - px_statistic_update(cmd.mask, perf->state, next_perf_state); 3.41 + cpufreq_statistic_update(cmd.mask, perf->state, next_perf_state); 3.42 3.43 perf->state = next_perf_state; 3.44 policy->cur = freqs.new; 3.45 @@ -581,145 +571,6 @@ static struct cpufreq_driver acpi_cpufre 3.46 .exit = acpi_cpufreq_cpu_exit, 3.47 }; 3.48 3.49 -int cpufreq_limit_change(unsigned int cpu) 3.50 -{ 3.51 - struct processor_performance *perf = &processor_pminfo[cpu]->perf; 3.52 - struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; 3.53 - struct cpufreq_policy policy; 3.54 - 3.55 - if (!cpu_online(cpu) || !data || !processor_pminfo[cpu]) 3.56 - return -ENODEV; 3.57 - 3.58 - if ((perf->platform_limit < 0) || 3.59 - (perf->platform_limit >= perf->state_count)) 3.60 - return -EINVAL; 3.61 - 3.62 - memcpy(&policy, data, sizeof(struct cpufreq_policy)); 3.63 - 3.64 - policy.max = 3.65 - perf->states[perf->platform_limit].core_frequency * 1000; 3.66 - 3.67 - return __cpufreq_set_policy(data, &policy); 3.68 -} 3.69 - 3.70 -int cpufreq_add_cpu(unsigned int cpu) 3.71 -{ 3.72 - int ret = 0; 3.73 - unsigned int firstcpu; 3.74 - unsigned int dom; 3.75 - unsigned int j; 3.76 - struct cpufreq_policy new_policy; 3.77 - struct cpufreq_policy *policy; 3.78 - struct processor_performance *perf = &processor_pminfo[cpu]->perf; 3.79 - 3.80 - /* to protect the case when Px was not controlled by xen */ 3.81 - if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) 3.82 - return 0; 3.83 - 3.84 - if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) 3.85 - return -EINVAL; 3.86 - 3.87 - ret = px_statistic_init(cpu); 3.88 - if (ret) 3.89 - return ret; 3.90 - 3.91 - dom = perf->domain_info.domain; 3.92 - if (cpus_weight(cpufreq_dom_map[dom])) { 3.93 - /* share policy with the first cpu since on same boat */ 3.94 - firstcpu = first_cpu(cpufreq_dom_map[dom]); 3.95 - policy = cpufreq_cpu_policy[firstcpu]; 3.96 - 3.97 - cpufreq_cpu_policy[cpu] = policy; 3.98 - cpu_set(cpu, cpufreq_dom_map[dom]); 3.99 - cpu_set(cpu, policy->cpus); 3.100 - 3.101 - printk(KERN_EMERG"adding CPU %u\n", cpu); 3.102 - } else { 3.103 - /* for the first cpu, setup policy and do init work */ 3.104 - policy = xmalloc(struct cpufreq_policy); 3.105 - if (!policy) { 3.106 - px_statistic_exit(cpu); 3.107 - return -ENOMEM; 3.108 - } 3.109 - memset(policy, 0, sizeof(struct cpufreq_policy)); 3.110 - 3.111 - cpufreq_cpu_policy[cpu] = policy; 3.112 - cpu_set(cpu, cpufreq_dom_map[dom]); 3.113 - cpu_set(cpu, policy->cpus); 3.114 - 3.115 - policy->cpu = cpu; 3.116 - ret = cpufreq_driver->init(policy); 3.117 - if (ret) 3.118 - goto err1; 3.119 - printk(KERN_EMERG"CPU %u initialization completed\n", cpu); 3.120 - } 3.121 - 3.122 - /* 3.123 - * After get full cpumap of the coordination domain, 3.124 - * we can safely start gov here. 3.125 - */ 3.126 - if (cpus_weight(cpufreq_dom_map[dom]) == 3.127 - perf->domain_info.num_processors) { 3.128 - memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); 3.129 - policy->governor = NULL; 3.130 - ret = __cpufreq_set_policy(policy, &new_policy); 3.131 - if (ret) 3.132 - goto err2; 3.133 - } 3.134 - 3.135 - return 0; 3.136 - 3.137 -err2: 3.138 - cpufreq_driver->exit(policy); 3.139 -err1: 3.140 - for_each_cpu_mask(j, cpufreq_dom_map[dom]) { 3.141 - cpufreq_cpu_policy[j] = NULL; 3.142 - px_statistic_exit(j); 3.143 - } 3.144 - 3.145 - cpus_clear(cpufreq_dom_map[dom]); 3.146 - xfree(policy); 3.147 - return ret; 3.148 -} 3.149 - 3.150 -int cpufreq_del_cpu(unsigned int cpu) 3.151 -{ 3.152 - unsigned int dom; 3.153 - struct cpufreq_policy *policy; 3.154 - struct processor_performance *perf = &processor_pminfo[cpu]->perf; 3.155 - 3.156 - /* to protect the case when Px was not controlled by xen */ 3.157 - if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) 3.158 - return 0; 3.159 - 3.160 - if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) 3.161 - return -EINVAL; 3.162 - 3.163 - dom = perf->domain_info.domain; 3.164 - policy = cpufreq_cpu_policy[cpu]; 3.165 - 3.166 - printk(KERN_EMERG"deleting CPU %u\n", cpu); 3.167 - 3.168 - /* for the first cpu of the domain, stop gov */ 3.169 - if (cpus_weight(cpufreq_dom_map[dom]) == 3.170 - perf->domain_info.num_processors) 3.171 - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 3.172 - 3.173 - cpufreq_cpu_policy[cpu] = NULL; 3.174 - cpu_clear(cpu, policy->cpus); 3.175 - cpu_clear(cpu, cpufreq_dom_map[dom]); 3.176 - px_statistic_exit(cpu); 3.177 - 3.178 - /* for the last cpu of the domain, clean room */ 3.179 - /* It's safe here to free freq_table, drv_data and policy */ 3.180 - if (!cpus_weight(cpufreq_dom_map[dom])) { 3.181 - cpufreq_driver->exit(policy); 3.182 - xfree(policy); 3.183 - } 3.184 - 3.185 - return 0; 3.186 -} 3.187 - 3.188 static int __init cpufreq_driver_init(void) 3.189 { 3.190 int ret = 0;
4.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c Fri Sep 26 11:12:29 2008 +0100 4.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 4.3 @@ -1,246 +0,0 @@ 4.4 -/* 4.5 - * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c 4.6 - * 4.7 - * Copyright (C) 2001 Russell King 4.8 - * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 4.9 - * Jun Nakajima <jun.nakajima@intel.com> 4.10 - * Feb 2008 Liu Jinsong <jinsong.liu@intel.com> 4.11 - * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 4.12 - * 4.13 - * This program is free software; you can redistribute it and/or modify 4.14 - * it under the terms of the GNU General Public License version 2 as 4.15 - * published by the Free Software Foundation. 4.16 - */ 4.17 - 4.18 -#include <xen/types.h> 4.19 -#include <xen/percpu.h> 4.20 -#include <xen/cpumask.h> 4.21 -#include <xen/types.h> 4.22 -#include <xen/sched.h> 4.23 -#include <xen/timer.h> 4.24 -#include <asm/config.h> 4.25 -#include <acpi/cpufreq/cpufreq.h> 4.26 - 4.27 -#define DEF_FREQUENCY_UP_THRESHOLD (80) 4.28 - 4.29 -#define MIN_DBS_INTERVAL (MICROSECS(100)) 4.30 -#define MIN_SAMPLING_MILLISECS (20) 4.31 -#define MIN_STAT_SAMPLING_RATE \ 4.32 - (MIN_SAMPLING_MILLISECS * MILLISECS(1)) 4.33 -#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 4.34 -#define TRANSITION_LATENCY_LIMIT (10 * 1000 ) 4.35 - 4.36 -static uint64_t def_sampling_rate; 4.37 - 4.38 -/* Sampling types */ 4.39 -enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 4.40 - 4.41 -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 4.42 - 4.43 -static unsigned int dbs_enable; /* number of CPUs using this policy */ 4.44 - 4.45 -static struct dbs_tuners { 4.46 - uint64_t sampling_rate; 4.47 - unsigned int up_threshold; 4.48 - unsigned int ignore_nice; 4.49 - unsigned int powersave_bias; 4.50 -} dbs_tuners_ins = { 4.51 - .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 4.52 - .ignore_nice = 0, 4.53 - .powersave_bias = 0, 4.54 -}; 4.55 - 4.56 -static struct timer dbs_timer[NR_CPUS]; 4.57 - 4.58 -uint64_t get_cpu_idle_time(unsigned int cpu) 4.59 -{ 4.60 - uint64_t idle_ns; 4.61 - struct vcpu *v; 4.62 - 4.63 - if ((v = idle_vcpu[cpu]) == NULL) 4.64 - return 0; 4.65 - 4.66 - idle_ns = v->runstate.time[RUNSTATE_running]; 4.67 - if (v->is_running) 4.68 - idle_ns += NOW() - v->runstate.state_entry_time; 4.69 - 4.70 - return idle_ns; 4.71 -} 4.72 - 4.73 -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 4.74 -{ 4.75 - unsigned int load = 0; 4.76 - uint64_t cur_ns, idle_ns, total_ns; 4.77 - 4.78 - struct cpufreq_policy *policy; 4.79 - unsigned int j; 4.80 - 4.81 - if (!this_dbs_info->enable) 4.82 - return; 4.83 - 4.84 - policy = this_dbs_info->cur_policy; 4.85 - 4.86 - if (unlikely(policy->resume)) { 4.87 - __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); 4.88 - return; 4.89 - } 4.90 - 4.91 - cur_ns = NOW(); 4.92 - total_ns = cur_ns - this_dbs_info->prev_cpu_wall; 4.93 - this_dbs_info->prev_cpu_wall = NOW(); 4.94 - 4.95 - if (total_ns < MIN_DBS_INTERVAL) 4.96 - return; 4.97 - 4.98 - /* Get Idle Time */ 4.99 - idle_ns = UINT_MAX; 4.100 - for_each_cpu_mask(j, policy->cpus) { 4.101 - uint64_t total_idle_ns; 4.102 - unsigned int tmp_idle_ns; 4.103 - struct cpu_dbs_info_s *j_dbs_info; 4.104 - 4.105 - j_dbs_info = &per_cpu(cpu_dbs_info, j); 4.106 - total_idle_ns = get_cpu_idle_time(j); 4.107 - tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; 4.108 - j_dbs_info->prev_cpu_idle = total_idle_ns; 4.109 - 4.110 - if (tmp_idle_ns < idle_ns) 4.111 - idle_ns = tmp_idle_ns; 4.112 - } 4.113 - 4.114 - if (likely(total_ns > idle_ns)) 4.115 - load = (100 * (total_ns - idle_ns)) / total_ns; 4.116 - 4.117 - /* Check for frequency increase */ 4.118 - if (load > dbs_tuners_ins.up_threshold) { 4.119 - /* if we are already at full speed then break out early */ 4.120 - if (policy->cur == policy->max) 4.121 - return; 4.122 - __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); 4.123 - return; 4.124 - } 4.125 - 4.126 - /* Check for frequency decrease */ 4.127 - /* if we cannot reduce the frequency anymore, break out early */ 4.128 - if (policy->cur == policy->min) 4.129 - return; 4.130 - 4.131 - /* 4.132 - * The optimal frequency is the frequency that is the lowest that 4.133 - * can support the current CPU usage without triggering the up 4.134 - * policy. To be safe, we focus 10 points under the threshold. 4.135 - */ 4.136 - if (load < (dbs_tuners_ins.up_threshold - 10)) { 4.137 - unsigned int freq_next, freq_cur; 4.138 - 4.139 - freq_cur = __cpufreq_driver_getavg(policy); 4.140 - if (!freq_cur) 4.141 - freq_cur = policy->cur; 4.142 - 4.143 - freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); 4.144 - 4.145 - __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); 4.146 - } 4.147 -} 4.148 - 4.149 -static void do_dbs_timer(void *dbs) 4.150 -{ 4.151 - struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; 4.152 - 4.153 - if (!dbs_info->enable) 4.154 - return; 4.155 - 4.156 - dbs_check_cpu(dbs_info); 4.157 - 4.158 - set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); 4.159 -} 4.160 - 4.161 -static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 4.162 -{ 4.163 - dbs_info->enable = 1; 4.164 - 4.165 - init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 4.166 - (void *)dbs_info, dbs_info->cpu); 4.167 - 4.168 - set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); 4.169 -} 4.170 - 4.171 -static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 4.172 -{ 4.173 - dbs_info->enable = 0; 4.174 - stop_timer(&dbs_timer[dbs_info->cpu]); 4.175 -} 4.176 - 4.177 -int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) 4.178 -{ 4.179 - unsigned int cpu = policy->cpu; 4.180 - struct cpu_dbs_info_s *this_dbs_info; 4.181 - unsigned int j; 4.182 - 4.183 - this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 4.184 - 4.185 - switch (event) { 4.186 - case CPUFREQ_GOV_START: 4.187 - if ((!cpu_online(cpu)) || (!policy->cur)) 4.188 - return -EINVAL; 4.189 - 4.190 - if (policy->cpuinfo.transition_latency > 4.191 - (TRANSITION_LATENCY_LIMIT * 1000)) { 4.192 - printk(KERN_WARNING "ondemand governor failed to load " 4.193 - "due to too long transition latency\n"); 4.194 - return -EINVAL; 4.195 - } 4.196 - if (this_dbs_info->enable) 4.197 - /* Already enabled */ 4.198 - break; 4.199 - 4.200 - dbs_enable++; 4.201 - 4.202 - for_each_cpu_mask(j, policy->cpus) { 4.203 - struct cpu_dbs_info_s *j_dbs_info; 4.204 - j_dbs_info = &per_cpu(cpu_dbs_info, j); 4.205 - j_dbs_info->cur_policy = policy; 4.206 - 4.207 - j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); 4.208 - j_dbs_info->prev_cpu_wall = NOW(); 4.209 - } 4.210 - this_dbs_info->cpu = cpu; 4.211 - /* 4.212 - * Start the timerschedule work, when this governor 4.213 - * is used for first time 4.214 - */ 4.215 - if (dbs_enable == 1) { 4.216 - def_sampling_rate = policy->cpuinfo.transition_latency * 4.217 - DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 4.218 - 4.219 - if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 4.220 - def_sampling_rate = MIN_STAT_SAMPLING_RATE; 4.221 - 4.222 - dbs_tuners_ins.sampling_rate = def_sampling_rate; 4.223 - } 4.224 - dbs_timer_init(this_dbs_info); 4.225 - 4.226 - break; 4.227 - 4.228 - case CPUFREQ_GOV_STOP: 4.229 - dbs_timer_exit(this_dbs_info); 4.230 - dbs_enable--; 4.231 - 4.232 - break; 4.233 - 4.234 - case CPUFREQ_GOV_LIMITS: 4.235 - if (policy->max < this_dbs_info->cur_policy->cur) 4.236 - __cpufreq_driver_target(this_dbs_info->cur_policy, 4.237 - policy->max, CPUFREQ_RELATION_H); 4.238 - else if (policy->min > this_dbs_info->cur_policy->cur) 4.239 - __cpufreq_driver_target(this_dbs_info->cur_policy, 4.240 - policy->min, CPUFREQ_RELATION_L); 4.241 - break; 4.242 - } 4.243 - return 0; 4.244 -} 4.245 - 4.246 -struct cpufreq_governor cpufreq_gov_dbs = { 4.247 - .name = "ondemand", 4.248 - .governor = cpufreq_governor_dbs, 4.249 -};
5.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c Fri Sep 26 11:12:29 2008 +0100 5.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 5.3 @@ -1,368 +0,0 @@ 5.4 -/* 5.5 - * utility.c - misc functions for cpufreq driver and Px statistic 5.6 - * 5.7 - * Copyright (C) 2001 Russell King 5.8 - * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> 5.9 - * 5.10 - * Oct 2005 - Ashok Raj <ashok.raj@intel.com> 5.11 - * Added handling for CPU hotplug 5.12 - * Feb 2006 - Jacob Shin <jacob.shin@amd.com> 5.13 - * Fix handling for CPU hotplug -- affected CPUs 5.14 - * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com> 5.15 - * 1. Merge cpufreq.c and freq_table.c of linux 2.6.23 5.16 - * And poring to Xen hypervisor 5.17 - * 2. some Px statistic interface funcdtions 5.18 - * 5.19 - * This program is free software; you can redistribute it and/or modify 5.20 - * it under the terms of the GNU General Public License version 2 as 5.21 - * published by the Free Software Foundation. 5.22 - * 5.23 - */ 5.24 - 5.25 -#include <xen/errno.h> 5.26 -#include <xen/cpumask.h> 5.27 -#include <xen/types.h> 5.28 -#include <xen/spinlock.h> 5.29 -#include <xen/percpu.h> 5.30 -#include <xen/types.h> 5.31 -#include <xen/sched.h> 5.32 -#include <xen/timer.h> 5.33 -#include <asm/config.h> 5.34 -#include <acpi/cpufreq/cpufreq.h> 5.35 -#include <public/sysctl.h> 5.36 - 5.37 -struct cpufreq_driver *cpufreq_driver; 5.38 -struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; 5.39 -struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; 5.40 - 5.41 -/********************************************************************* 5.42 - * Px STATISTIC INFO * 5.43 - *********************************************************************/ 5.44 - 5.45 -void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) 5.46 -{ 5.47 - uint32_t i; 5.48 - uint64_t now; 5.49 - 5.50 - now = NOW(); 5.51 - 5.52 - for_each_cpu_mask(i, cpumask) { 5.53 - struct pm_px *pxpt = px_statistic_data[i]; 5.54 - struct processor_pminfo *pmpt = processor_pminfo[i]; 5.55 - uint64_t total_idle_ns; 5.56 - uint64_t tmp_idle_ns; 5.57 - 5.58 - if ( !pxpt || !pmpt ) 5.59 - continue; 5.60 - 5.61 - total_idle_ns = get_cpu_idle_time(i); 5.62 - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 5.63 - 5.64 - pxpt->u.last = from; 5.65 - pxpt->u.cur = to; 5.66 - pxpt->u.pt[to].count++; 5.67 - pxpt->u.pt[from].residency += now - pxpt->prev_state_wall; 5.68 - pxpt->u.pt[from].residency -= tmp_idle_ns; 5.69 - 5.70 - (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++; 5.71 - 5.72 - pxpt->prev_state_wall = now; 5.73 - pxpt->prev_idle_wall = total_idle_ns; 5.74 - } 5.75 -} 5.76 - 5.77 -int px_statistic_init(unsigned int cpuid) 5.78 -{ 5.79 - uint32_t i, count; 5.80 - struct pm_px *pxpt = px_statistic_data[cpuid]; 5.81 - const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; 5.82 - 5.83 - count = pmpt->perf.state_count; 5.84 - 5.85 - if ( !pmpt ) 5.86 - return -EINVAL; 5.87 - 5.88 - if ( !pxpt ) 5.89 - { 5.90 - pxpt = xmalloc(struct pm_px); 5.91 - if ( !pxpt ) 5.92 - return -ENOMEM; 5.93 - memset(pxpt, 0, sizeof(*pxpt)); 5.94 - px_statistic_data[cpuid] = pxpt; 5.95 - } 5.96 - 5.97 - pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); 5.98 - if (!pxpt->u.trans_pt) 5.99 - return -ENOMEM; 5.100 - 5.101 - pxpt->u.pt = xmalloc_array(struct pm_px_val, count); 5.102 - if (!pxpt->u.pt) { 5.103 - xfree(pxpt->u.trans_pt); 5.104 - return -ENOMEM; 5.105 - } 5.106 - 5.107 - memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t))); 5.108 - memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); 5.109 - 5.110 - pxpt->u.total = pmpt->perf.state_count; 5.111 - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 5.112 - 5.113 - for (i=0; i < pmpt->perf.state_count; i++) 5.114 - pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; 5.115 - 5.116 - pxpt->prev_state_wall = NOW(); 5.117 - pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); 5.118 - 5.119 - return 0; 5.120 -} 5.121 - 5.122 -void px_statistic_exit(unsigned int cpuid) 5.123 -{ 5.124 - struct pm_px *pxpt = px_statistic_data[cpuid]; 5.125 - 5.126 - if (!pxpt) 5.127 - return; 5.128 - xfree(pxpt->u.trans_pt); 5.129 - xfree(pxpt->u.pt); 5.130 - memset(pxpt, 0, sizeof(struct pm_px)); 5.131 -} 5.132 - 5.133 -void px_statistic_reset(unsigned int cpuid) 5.134 -{ 5.135 - uint32_t i, j, count; 5.136 - struct pm_px *pxpt = px_statistic_data[cpuid]; 5.137 - const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; 5.138 - 5.139 - if ( !pxpt || !pmpt ) 5.140 - return; 5.141 - 5.142 - count = pmpt->perf.state_count; 5.143 - 5.144 - for (i=0; i < count; i++) { 5.145 - pxpt->u.pt[i].residency = 0; 5.146 - pxpt->u.pt[i].count = 0; 5.147 - 5.148 - for (j=0; j < count; j++) 5.149 - *(pxpt->u.trans_pt + i*count + j) = 0; 5.150 - } 5.151 - 5.152 - pxpt->prev_state_wall = NOW(); 5.153 - pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); 5.154 -} 5.155 - 5.156 - 5.157 -/********************************************************************* 5.158 - * FREQUENCY TABLE HELPERS * 5.159 - *********************************************************************/ 5.160 - 5.161 -int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, 5.162 - struct cpufreq_frequency_table *table) 5.163 -{ 5.164 - unsigned int min_freq = ~0; 5.165 - unsigned int max_freq = 0; 5.166 - unsigned int i; 5.167 - 5.168 - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 5.169 - unsigned int freq = table[i].frequency; 5.170 - if (freq == CPUFREQ_ENTRY_INVALID) 5.171 - continue; 5.172 - if (freq < min_freq) 5.173 - min_freq = freq; 5.174 - if (freq > max_freq) 5.175 - max_freq = freq; 5.176 - } 5.177 - 5.178 - policy->min = policy->cpuinfo.min_freq = min_freq; 5.179 - policy->max = policy->cpuinfo.max_freq = max_freq; 5.180 - 5.181 - if (policy->min == ~0) 5.182 - return -EINVAL; 5.183 - else 5.184 - return 0; 5.185 -} 5.186 - 5.187 -int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, 5.188 - struct cpufreq_frequency_table *table) 5.189 -{ 5.190 - unsigned int next_larger = ~0; 5.191 - unsigned int i; 5.192 - unsigned int count = 0; 5.193 - 5.194 - if (!cpu_online(policy->cpu)) 5.195 - return -EINVAL; 5.196 - 5.197 - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 5.198 - policy->cpuinfo.max_freq); 5.199 - 5.200 - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 5.201 - unsigned int freq = table[i].frequency; 5.202 - if (freq == CPUFREQ_ENTRY_INVALID) 5.203 - continue; 5.204 - if ((freq >= policy->min) && (freq <= policy->max)) 5.205 - count++; 5.206 - else if ((next_larger > freq) && (freq > policy->max)) 5.207 - next_larger = freq; 5.208 - } 5.209 - 5.210 - if (!count) 5.211 - policy->max = next_larger; 5.212 - 5.213 - cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 5.214 - policy->cpuinfo.max_freq); 5.215 - 5.216 - return 0; 5.217 -} 5.218 - 5.219 -int cpufreq_frequency_table_target(struct cpufreq_policy *policy, 5.220 - struct cpufreq_frequency_table *table, 5.221 - unsigned int target_freq, 5.222 - unsigned int relation, 5.223 - unsigned int *index) 5.224 -{ 5.225 - struct cpufreq_frequency_table optimal = { 5.226 - .index = ~0, 5.227 - .frequency = 0, 5.228 - }; 5.229 - struct cpufreq_frequency_table suboptimal = { 5.230 - .index = ~0, 5.231 - .frequency = 0, 5.232 - }; 5.233 - unsigned int i; 5.234 - 5.235 - switch (relation) { 5.236 - case CPUFREQ_RELATION_H: 5.237 - suboptimal.frequency = ~0; 5.238 - break; 5.239 - case CPUFREQ_RELATION_L: 5.240 - optimal.frequency = ~0; 5.241 - break; 5.242 - } 5.243 - 5.244 - if (!cpu_online(policy->cpu)) 5.245 - return -EINVAL; 5.246 - 5.247 - for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 5.248 - unsigned int freq = table[i].frequency; 5.249 - if (freq == CPUFREQ_ENTRY_INVALID) 5.250 - continue; 5.251 - if ((freq < policy->min) || (freq > policy->max)) 5.252 - continue; 5.253 - switch(relation) { 5.254 - case CPUFREQ_RELATION_H: 5.255 - if (freq <= target_freq) { 5.256 - if (freq >= optimal.frequency) { 5.257 - optimal.frequency = freq; 5.258 - optimal.index = i; 5.259 - } 5.260 - } else { 5.261 - if (freq <= suboptimal.frequency) { 5.262 - suboptimal.frequency = freq; 5.263 - suboptimal.index = i; 5.264 - } 5.265 - } 5.266 - break; 5.267 - case CPUFREQ_RELATION_L: 5.268 - if (freq >= target_freq) { 5.269 - if (freq <= optimal.frequency) { 5.270 - optimal.frequency = freq; 5.271 - optimal.index = i; 5.272 - } 5.273 - } else { 5.274 - if (freq >= suboptimal.frequency) { 5.275 - suboptimal.frequency = freq; 5.276 - suboptimal.index = i; 5.277 - } 5.278 - } 5.279 - break; 5.280 - } 5.281 - } 5.282 - if (optimal.index > i) { 5.283 - if (suboptimal.index > i) 5.284 - return -EINVAL; 5.285 - *index = suboptimal.index; 5.286 - } else 5.287 - *index = optimal.index; 5.288 - 5.289 - return 0; 5.290 -} 5.291 - 5.292 - 5.293 -/********************************************************************* 5.294 - * GOVERNORS * 5.295 - *********************************************************************/ 5.296 - 5.297 -int __cpufreq_driver_target(struct cpufreq_policy *policy, 5.298 - unsigned int target_freq, 5.299 - unsigned int relation) 5.300 -{ 5.301 - int retval = -EINVAL; 5.302 - 5.303 - if (cpu_online(policy->cpu) && cpufreq_driver->target) 5.304 - retval = cpufreq_driver->target(policy, target_freq, relation); 5.305 - 5.306 - return retval; 5.307 -} 5.308 - 5.309 -int __cpufreq_driver_getavg(struct cpufreq_policy *policy) 5.310 -{ 5.311 - int ret = 0; 5.312 - 5.313 - if (!policy) 5.314 - return -EINVAL; 5.315 - 5.316 - if (cpu_online(policy->cpu) && cpufreq_driver->getavg) 5.317 - ret = cpufreq_driver->getavg(policy->cpu); 5.318 - 5.319 - return ret; 5.320 -} 5.321 - 5.322 - 5.323 -/********************************************************************* 5.324 - * POLICY * 5.325 - *********************************************************************/ 5.326 - 5.327 -/* 5.328 - * data : current policy. 5.329 - * policy : policy to be set. 5.330 - */ 5.331 -int __cpufreq_set_policy(struct cpufreq_policy *data, 5.332 - struct cpufreq_policy *policy) 5.333 -{ 5.334 - int ret = 0; 5.335 - 5.336 - memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); 5.337 - 5.338 - if (policy->min > data->min && policy->min > policy->max) 5.339 - return -EINVAL; 5.340 - 5.341 - /* verify the cpu speed can be set within this limit */ 5.342 - ret = cpufreq_driver->verify(policy); 5.343 - if (ret) 5.344 - return ret; 5.345 - 5.346 - data->min = policy->min; 5.347 - data->max = policy->max; 5.348 - 5.349 - if (policy->governor != data->governor) { 5.350 - /* save old, working values */ 5.351 - struct cpufreq_governor *old_gov = data->governor; 5.352 - 5.353 - /* end old governor */ 5.354 - if (data->governor) 5.355 - __cpufreq_governor(data, CPUFREQ_GOV_STOP); 5.356 - 5.357 - /* start new governor */ 5.358 - data->governor = policy->governor; 5.359 - if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { 5.360 - /* new governor failed, so re-start old one */ 5.361 - if (old_gov) { 5.362 - data->governor = old_gov; 5.363 - __cpufreq_governor(data, CPUFREQ_GOV_START); 5.364 - } 5.365 - return -EINVAL; 5.366 - } 5.367 - /* might be a policy change, too, so fall through */ 5.368 - } 5.369 - 5.370 - return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); 5.371 -}
6.1 --- a/xen/arch/x86/acpi/pmstat.c Fri Sep 26 11:12:29 2008 +0100 6.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 6.3 @@ -1,155 +0,0 @@ 6.4 -/***************************************************************************** 6.5 -# pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.) 6.6 -# 6.7 -# Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com> 6.8 -# 6.9 -# This program is free software; you can redistribute it and/or modify it 6.10 -# under the terms of the GNU General Public License as published by the Free 6.11 -# Software Foundation; either version 2 of the License, or (at your option) 6.12 -# any later version. 6.13 -# 6.14 -# This program is distributed in the hope that it will be useful, but WITHOUT 6.15 -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 6.16 -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 6.17 -# more details. 6.18 -# 6.19 -# You should have received a copy of the GNU General Public License along with 6.20 -# this program; if not, write to the Free Software Foundation, Inc., 59 6.21 -# Temple Place - Suite 330, Boston, MA 02111-1307, USA. 6.22 -# 6.23 -# The full GNU General Public License is included in this distribution in the 6.24 -# file called LICENSE. 6.25 -# 6.26 -*****************************************************************************/ 6.27 - 6.28 -#include <xen/config.h> 6.29 -#include <xen/lib.h> 6.30 -#include <xen/errno.h> 6.31 -#include <xen/sched.h> 6.32 -#include <xen/event.h> 6.33 -#include <xen/irq.h> 6.34 -#include <xen/iocap.h> 6.35 -#include <xen/compat.h> 6.36 -#include <xen/guest_access.h> 6.37 -#include <asm/current.h> 6.38 -#include <public/xen.h> 6.39 -#include <xen/cpumask.h> 6.40 -#include <asm/processor.h> 6.41 -#include <xen/percpu.h> 6.42 -#include <xen/domain.h> 6.43 - 6.44 -#include <public/sysctl.h> 6.45 -#include <acpi/cpufreq/cpufreq.h> 6.46 - 6.47 -struct pm_px *__read_mostly px_statistic_data[NR_CPUS]; 6.48 - 6.49 -extern uint32_t pmstat_get_cx_nr(uint32_t cpuid); 6.50 -extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat); 6.51 -extern int pmstat_reset_cx_stat(uint32_t cpuid); 6.52 - 6.53 -int do_get_pm_info(struct xen_sysctl_get_pmstat *op) 6.54 -{ 6.55 - int ret = 0; 6.56 - const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid]; 6.57 - 6.58 - if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) 6.59 - return -EINVAL; 6.60 - 6.61 - switch ( op->type & PMSTAT_CATEGORY_MASK ) 6.62 - { 6.63 - case PMSTAT_CX: 6.64 - if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) 6.65 - return -ENODEV; 6.66 - break; 6.67 - case PMSTAT_PX: 6.68 - if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) 6.69 - return -ENODEV; 6.70 - if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) ) 6.71 - return -EINVAL; 6.72 - break; 6.73 - default: 6.74 - return -ENODEV; 6.75 - } 6.76 - 6.77 - switch ( op->type ) 6.78 - { 6.79 - case PMSTAT_get_max_px: 6.80 - { 6.81 - op->u.getpx.total = pmpt->perf.state_count; 6.82 - break; 6.83 - } 6.84 - 6.85 - case PMSTAT_get_pxstat: 6.86 - { 6.87 - uint64_t now, ct; 6.88 - uint64_t total_idle_ns; 6.89 - uint64_t tmp_idle_ns; 6.90 - struct pm_px *pxpt = px_statistic_data[op->cpuid]; 6.91 - 6.92 - if ( !pxpt ) 6.93 - return -ENODATA; 6.94 - 6.95 - total_idle_ns = get_cpu_idle_time(op->cpuid); 6.96 - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 6.97 - 6.98 - now = NOW(); 6.99 - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 6.100 - pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; 6.101 - pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; 6.102 - pxpt->prev_state_wall = now; 6.103 - pxpt->prev_idle_wall = total_idle_ns; 6.104 - 6.105 - ct = pmpt->perf.state_count; 6.106 - if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) 6.107 - { 6.108 - ret = -EFAULT; 6.109 - break; 6.110 - } 6.111 - 6.112 - if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) 6.113 - { 6.114 - ret = -EFAULT; 6.115 - break; 6.116 - } 6.117 - 6.118 - op->u.getpx.total = pxpt->u.total; 6.119 - op->u.getpx.usable = pxpt->u.usable; 6.120 - op->u.getpx.last = pxpt->u.last; 6.121 - op->u.getpx.cur = pxpt->u.cur; 6.122 - 6.123 - break; 6.124 - } 6.125 - 6.126 - case PMSTAT_reset_pxstat: 6.127 - { 6.128 - px_statistic_reset(op->cpuid); 6.129 - break; 6.130 - } 6.131 - 6.132 - case PMSTAT_get_max_cx: 6.133 - { 6.134 - op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid); 6.135 - ret = 0; 6.136 - break; 6.137 - } 6.138 - 6.139 - case PMSTAT_get_cxstat: 6.140 - { 6.141 - ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx); 6.142 - break; 6.143 - } 6.144 - 6.145 - case PMSTAT_reset_cxstat: 6.146 - { 6.147 - ret = pmstat_reset_cx_stat(op->cpuid); 6.148 - break; 6.149 - } 6.150 - 6.151 - default: 6.152 - printk("not defined sub-hypercall @ do_get_pm_info\n"); 6.153 - ret = -ENOSYS; 6.154 - break; 6.155 - } 6.156 - 6.157 - return ret; 6.158 -}
7.1 --- a/xen/drivers/Makefile Fri Sep 26 11:12:29 2008 +0100 7.2 +++ b/xen/drivers/Makefile Fri Sep 26 14:04:38 2008 +0100 7.3 @@ -1,4 +1,5 @@ 7.4 subdir-y += char 7.5 +subdir-y += cpufreq 7.6 subdir-$(x86) += passthrough 7.7 subdir-$(HAS_ACPI) += acpi 7.8 subdir-$(HAS_VGA) += video
8.1 --- a/xen/drivers/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100 8.2 +++ b/xen/drivers/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100 8.3 @@ -4,6 +4,7 @@ subdir-y += utilities 8.4 obj-y += tables.o 8.5 obj-y += numa.o 8.6 obj-y += osl.o 8.7 +obj-y += pmstat.o 8.8 8.9 obj-$(x86) += hwregs.o 8.10 obj-$(x86) += reboot.o
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/xen/drivers/acpi/pmstat.c Fri Sep 26 14:04:38 2008 +0100 9.3 @@ -0,0 +1,155 @@ 9.4 +/***************************************************************************** 9.5 +# pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.) 9.6 +# 9.7 +# Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com> 9.8 +# 9.9 +# This program is free software; you can redistribute it and/or modify it 9.10 +# under the terms of the GNU General Public License as published by the Free 9.11 +# Software Foundation; either version 2 of the License, or (at your option) 9.12 +# any later version. 9.13 +# 9.14 +# This program is distributed in the hope that it will be useful, but WITHOUT 9.15 +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 9.16 +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 9.17 +# more details. 9.18 +# 9.19 +# You should have received a copy of the GNU General Public License along with 9.20 +# this program; if not, write to the Free Software Foundation, Inc., 59 9.21 +# Temple Place - Suite 330, Boston, MA 02111-1307, USA. 9.22 +# 9.23 +# The full GNU General Public License is included in this distribution in the 9.24 +# file called LICENSE. 9.25 +# 9.26 +*****************************************************************************/ 9.27 + 9.28 +#include <xen/config.h> 9.29 +#include <xen/lib.h> 9.30 +#include <xen/errno.h> 9.31 +#include <xen/sched.h> 9.32 +#include <xen/event.h> 9.33 +#include <xen/irq.h> 9.34 +#include <xen/iocap.h> 9.35 +#include <xen/compat.h> 9.36 +#include <xen/guest_access.h> 9.37 +#include <asm/current.h> 9.38 +#include <public/xen.h> 9.39 +#include <xen/cpumask.h> 9.40 +#include <asm/processor.h> 9.41 +#include <xen/percpu.h> 9.42 +#include <xen/domain.h> 9.43 + 9.44 +#include <public/sysctl.h> 9.45 +#include <acpi/cpufreq/cpufreq.h> 9.46 + 9.47 +struct pm_px *__read_mostly cpufreq_statistic_data[NR_CPUS]; 9.48 + 9.49 +extern uint32_t pmstat_get_cx_nr(uint32_t cpuid); 9.50 +extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat); 9.51 +extern int pmstat_reset_cx_stat(uint32_t cpuid); 9.52 + 9.53 +int do_get_pm_info(struct xen_sysctl_get_pmstat *op) 9.54 +{ 9.55 + int ret = 0; 9.56 + const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid]; 9.57 + 9.58 + if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) ) 9.59 + return -EINVAL; 9.60 + 9.61 + switch ( op->type & PMSTAT_CATEGORY_MASK ) 9.62 + { 9.63 + case PMSTAT_CX: 9.64 + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) ) 9.65 + return -ENODEV; 9.66 + break; 9.67 + case PMSTAT_PX: 9.68 + if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) ) 9.69 + return -ENODEV; 9.70 + if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) ) 9.71 + return -EINVAL; 9.72 + break; 9.73 + default: 9.74 + return -ENODEV; 9.75 + } 9.76 + 9.77 + switch ( op->type ) 9.78 + { 9.79 + case PMSTAT_get_max_px: 9.80 + { 9.81 + op->u.getpx.total = pmpt->perf.state_count; 9.82 + break; 9.83 + } 9.84 + 9.85 + case PMSTAT_get_pxstat: 9.86 + { 9.87 + uint64_t now, ct; 9.88 + uint64_t total_idle_ns; 9.89 + uint64_t tmp_idle_ns; 9.90 + struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid]; 9.91 + 9.92 + if ( !pxpt ) 9.93 + return -ENODATA; 9.94 + 9.95 + total_idle_ns = get_cpu_idle_time(op->cpuid); 9.96 + tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 9.97 + 9.98 + now = NOW(); 9.99 + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 9.100 + pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; 9.101 + pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; 9.102 + pxpt->prev_state_wall = now; 9.103 + pxpt->prev_idle_wall = total_idle_ns; 9.104 + 9.105 + ct = pmpt->perf.state_count; 9.106 + if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) ) 9.107 + { 9.108 + ret = -EFAULT; 9.109 + break; 9.110 + } 9.111 + 9.112 + if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) ) 9.113 + { 9.114 + ret = -EFAULT; 9.115 + break; 9.116 + } 9.117 + 9.118 + op->u.getpx.total = pxpt->u.total; 9.119 + op->u.getpx.usable = pxpt->u.usable; 9.120 + op->u.getpx.last = pxpt->u.last; 9.121 + op->u.getpx.cur = pxpt->u.cur; 9.122 + 9.123 + break; 9.124 + } 9.125 + 9.126 + case PMSTAT_reset_pxstat: 9.127 + { 9.128 + cpufreq_statistic_reset(op->cpuid); 9.129 + break; 9.130 + } 9.131 + 9.132 + case PMSTAT_get_max_cx: 9.133 + { 9.134 + op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid); 9.135 + ret = 0; 9.136 + break; 9.137 + } 9.138 + 9.139 + case PMSTAT_get_cxstat: 9.140 + { 9.141 + ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx); 9.142 + break; 9.143 + } 9.144 + 9.145 + case PMSTAT_reset_cxstat: 9.146 + { 9.147 + ret = pmstat_reset_cx_stat(op->cpuid); 9.148 + break; 9.149 + } 9.150 + 9.151 + default: 9.152 + printk("not defined sub-hypercall @ do_get_pm_info\n"); 9.153 + ret = -ENOSYS; 9.154 + break; 9.155 + } 9.156 + 9.157 + return ret; 9.158 +}
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/xen/drivers/cpufreq/Makefile Fri Sep 26 14:04:38 2008 +0100 10.3 @@ -0,0 +1,3 @@ 10.4 +obj-y += cpufreq.o 10.5 +obj-y += cpufreq_ondemand.o 10.6 +obj-y += utility.o
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/xen/drivers/cpufreq/cpufreq.c Fri Sep 26 14:04:38 2008 +0100 11.3 @@ -0,0 +1,188 @@ 11.4 +/* 11.5 + * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com> 11.6 + * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> 11.7 + * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> 11.8 + * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com> 11.9 + * 11.10 + * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com> 11.11 + * Add cpufreq limit change handle and per-cpu cpufreq add/del 11.12 + * to cope with cpu hotplug 11.13 + * 11.14 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11.15 + * 11.16 + * This program is free software; you can redistribute it and/or modify 11.17 + * it under the terms of the GNU General Public License as published by 11.18 + * the Free Software Foundation; either version 2 of the License, or (at 11.19 + * your option) any later version. 11.20 + * 11.21 + * This program is distributed in the hope that it will be useful, but 11.22 + * WITHOUT ANY WARRANTY; without even the implied warranty of 11.23 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11.24 + * General Public License for more details. 11.25 + * 11.26 + * You should have received a copy of the GNU General Public License along 11.27 + * with this program; if not, write to the Free Software Foundation, Inc., 11.28 + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. 11.29 + * 11.30 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 11.31 + */ 11.32 + 11.33 +#include <xen/types.h> 11.34 +#include <xen/errno.h> 11.35 +#include <xen/delay.h> 11.36 +#include <xen/cpumask.h> 11.37 +#include <xen/sched.h> 11.38 +#include <xen/timer.h> 11.39 +#include <xen/xmalloc.h> 11.40 +#include <asm/bug.h> 11.41 +#include <asm/msr.h> 11.42 +#include <asm/io.h> 11.43 +#include <asm/config.h> 11.44 +#include <asm/processor.h> 11.45 +#include <asm/percpu.h> 11.46 +#include <asm/cpufeature.h> 11.47 +#include <acpi/acpi.h> 11.48 +#include <acpi/cpufreq/cpufreq.h> 11.49 + 11.50 +/* TODO: change to link list later as domain number may be sparse */ 11.51 +static cpumask_t cpufreq_dom_map[NR_CPUS]; 11.52 + 11.53 +int cpufreq_limit_change(unsigned int cpu) 11.54 +{ 11.55 + struct processor_performance *perf = &processor_pminfo[cpu]->perf; 11.56 + struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; 11.57 + struct cpufreq_policy policy; 11.58 + 11.59 + if (!cpu_online(cpu) || !data || !processor_pminfo[cpu]) 11.60 + return -ENODEV; 11.61 + 11.62 + if ((perf->platform_limit < 0) || 11.63 + (perf->platform_limit >= perf->state_count)) 11.64 + return -EINVAL; 11.65 + 11.66 + memcpy(&policy, data, sizeof(struct cpufreq_policy)); 11.67 + 11.68 + policy.max = 11.69 + perf->states[perf->platform_limit].core_frequency * 1000; 11.70 + 11.71 + return __cpufreq_set_policy(data, &policy); 11.72 +} 11.73 + 11.74 +int cpufreq_add_cpu(unsigned int cpu) 11.75 +{ 11.76 + int ret = 0; 11.77 + unsigned int firstcpu; 11.78 + unsigned int dom; 11.79 + unsigned int j; 11.80 + struct cpufreq_policy new_policy; 11.81 + struct cpufreq_policy *policy; 11.82 + struct processor_performance *perf = &processor_pminfo[cpu]->perf; 11.83 + 11.84 + /* to protect the case when Px was not controlled by xen */ 11.85 + if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) 11.86 + return 0; 11.87 + 11.88 + if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) 11.89 + return -EINVAL; 11.90 + 11.91 + ret = cpufreq_statistic_init(cpu); 11.92 + if (ret) 11.93 + return ret; 11.94 + 11.95 + dom = perf->domain_info.domain; 11.96 + if (cpus_weight(cpufreq_dom_map[dom])) { 11.97 + /* share policy with the first cpu since on same boat */ 11.98 + firstcpu = first_cpu(cpufreq_dom_map[dom]); 11.99 + policy = cpufreq_cpu_policy[firstcpu]; 11.100 + 11.101 + cpufreq_cpu_policy[cpu] = policy; 11.102 + cpu_set(cpu, cpufreq_dom_map[dom]); 11.103 + cpu_set(cpu, policy->cpus); 11.104 + 11.105 + printk(KERN_EMERG"adding CPU %u\n", cpu); 11.106 + } else { 11.107 + /* for the first cpu, setup policy and do init work */ 11.108 + policy = xmalloc(struct cpufreq_policy); 11.109 + if (!policy) { 11.110 + cpufreq_statistic_exit(cpu); 11.111 + return -ENOMEM; 11.112 + } 11.113 + memset(policy, 0, sizeof(struct cpufreq_policy)); 11.114 + 11.115 + cpufreq_cpu_policy[cpu] = policy; 11.116 + cpu_set(cpu, cpufreq_dom_map[dom]); 11.117 + cpu_set(cpu, policy->cpus); 11.118 + 11.119 + policy->cpu = cpu; 11.120 + ret = cpufreq_driver->init(policy); 11.121 + if (ret) 11.122 + goto err1; 11.123 + printk(KERN_EMERG"CPU %u initialization completed\n", cpu); 11.124 + } 11.125 + 11.126 + /* 11.127 + * After get full cpumap of the coordination domain, 11.128 + * we can safely start gov here. 11.129 + */ 11.130 + if (cpus_weight(cpufreq_dom_map[dom]) == 11.131 + perf->domain_info.num_processors) { 11.132 + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); 11.133 + policy->governor = NULL; 11.134 + ret = __cpufreq_set_policy(policy, &new_policy); 11.135 + if (ret) 11.136 + goto err2; 11.137 + } 11.138 + 11.139 + return 0; 11.140 + 11.141 +err2: 11.142 + cpufreq_driver->exit(policy); 11.143 +err1: 11.144 + for_each_cpu_mask(j, cpufreq_dom_map[dom]) { 11.145 + cpufreq_cpu_policy[j] = NULL; 11.146 + cpufreq_statistic_exit(j); 11.147 + } 11.148 + 11.149 + cpus_clear(cpufreq_dom_map[dom]); 11.150 + xfree(policy); 11.151 + return ret; 11.152 +} 11.153 + 11.154 +int cpufreq_del_cpu(unsigned int cpu) 11.155 +{ 11.156 + unsigned int dom; 11.157 + struct cpufreq_policy *policy; 11.158 + struct processor_performance *perf = &processor_pminfo[cpu]->perf; 11.159 + 11.160 + /* to protect the case when Px was not controlled by xen */ 11.161 + if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT)) 11.162 + return 0; 11.163 + 11.164 + if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) 11.165 + return -EINVAL; 11.166 + 11.167 + dom = perf->domain_info.domain; 11.168 + policy = cpufreq_cpu_policy[cpu]; 11.169 + 11.170 + printk(KERN_EMERG"deleting CPU %u\n", cpu); 11.171 + 11.172 + /* for the first cpu of the domain, stop gov */ 11.173 + if (cpus_weight(cpufreq_dom_map[dom]) == 11.174 + perf->domain_info.num_processors) 11.175 + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 11.176 + 11.177 + cpufreq_cpu_policy[cpu] = NULL; 11.178 + cpu_clear(cpu, policy->cpus); 11.179 + cpu_clear(cpu, cpufreq_dom_map[dom]); 11.180 + cpufreq_statistic_exit(cpu); 11.181 + 11.182 + /* for the last cpu of the domain, clean room */ 11.183 + /* It's safe here to free freq_table, drv_data and policy */ 11.184 + if (!cpus_weight(cpufreq_dom_map[dom])) { 11.185 + cpufreq_driver->exit(policy); 11.186 + xfree(policy); 11.187 + } 11.188 + 11.189 + return 0; 11.190 +} 11.191 +
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Fri Sep 26 14:04:38 2008 +0100 12.3 @@ -0,0 +1,246 @@ 12.4 +/* 12.5 + * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c 12.6 + * 12.7 + * Copyright (C) 2001 Russell King 12.8 + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. 12.9 + * Jun Nakajima <jun.nakajima@intel.com> 12.10 + * Feb 2008 Liu Jinsong <jinsong.liu@intel.com> 12.11 + * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 12.12 + * 12.13 + * This program is free software; you can redistribute it and/or modify 12.14 + * it under the terms of the GNU General Public License version 2 as 12.15 + * published by the Free Software Foundation. 12.16 + */ 12.17 + 12.18 +#include <xen/types.h> 12.19 +#include <xen/percpu.h> 12.20 +#include <xen/cpumask.h> 12.21 +#include <xen/types.h> 12.22 +#include <xen/sched.h> 12.23 +#include <xen/timer.h> 12.24 +#include <asm/config.h> 12.25 +#include <acpi/cpufreq/cpufreq.h> 12.26 + 12.27 +#define DEF_FREQUENCY_UP_THRESHOLD (80) 12.28 + 12.29 +#define MIN_DBS_INTERVAL (MICROSECS(100)) 12.30 +#define MIN_SAMPLING_MILLISECS (20) 12.31 +#define MIN_STAT_SAMPLING_RATE \ 12.32 + (MIN_SAMPLING_MILLISECS * MILLISECS(1)) 12.33 +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) 12.34 +#define TRANSITION_LATENCY_LIMIT (10 * 1000 ) 12.35 + 12.36 +static uint64_t def_sampling_rate; 12.37 + 12.38 +/* Sampling types */ 12.39 +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; 12.40 + 12.41 +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); 12.42 + 12.43 +static unsigned int dbs_enable; /* number of CPUs using this policy */ 12.44 + 12.45 +static struct dbs_tuners { 12.46 + uint64_t sampling_rate; 12.47 + unsigned int up_threshold; 12.48 + unsigned int ignore_nice; 12.49 + unsigned int powersave_bias; 12.50 +} dbs_tuners_ins = { 12.51 + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, 12.52 + .ignore_nice = 0, 12.53 + .powersave_bias = 0, 12.54 +}; 12.55 + 12.56 +static struct timer dbs_timer[NR_CPUS]; 12.57 + 12.58 +uint64_t get_cpu_idle_time(unsigned int cpu) 12.59 +{ 12.60 + uint64_t idle_ns; 12.61 + struct vcpu *v; 12.62 + 12.63 + if ((v = idle_vcpu[cpu]) == NULL) 12.64 + return 0; 12.65 + 12.66 + idle_ns = v->runstate.time[RUNSTATE_running]; 12.67 + if (v->is_running) 12.68 + idle_ns += NOW() - v->runstate.state_entry_time; 12.69 + 12.70 + return idle_ns; 12.71 +} 12.72 + 12.73 +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) 12.74 +{ 12.75 + unsigned int load = 0; 12.76 + uint64_t cur_ns, idle_ns, total_ns; 12.77 + 12.78 + struct cpufreq_policy *policy; 12.79 + unsigned int j; 12.80 + 12.81 + if (!this_dbs_info->enable) 12.82 + return; 12.83 + 12.84 + policy = this_dbs_info->cur_policy; 12.85 + 12.86 + if (unlikely(policy->resume)) { 12.87 + __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); 12.88 + return; 12.89 + } 12.90 + 12.91 + cur_ns = NOW(); 12.92 + total_ns = cur_ns - this_dbs_info->prev_cpu_wall; 12.93 + this_dbs_info->prev_cpu_wall = NOW(); 12.94 + 12.95 + if (total_ns < MIN_DBS_INTERVAL) 12.96 + return; 12.97 + 12.98 + /* Get Idle Time */ 12.99 + idle_ns = UINT_MAX; 12.100 + for_each_cpu_mask(j, policy->cpus) { 12.101 + uint64_t total_idle_ns; 12.102 + unsigned int tmp_idle_ns; 12.103 + struct cpu_dbs_info_s *j_dbs_info; 12.104 + 12.105 + j_dbs_info = &per_cpu(cpu_dbs_info, j); 12.106 + total_idle_ns = get_cpu_idle_time(j); 12.107 + tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle; 12.108 + j_dbs_info->prev_cpu_idle = total_idle_ns; 12.109 + 12.110 + if (tmp_idle_ns < idle_ns) 12.111 + idle_ns = tmp_idle_ns; 12.112 + } 12.113 + 12.114 + if (likely(total_ns > idle_ns)) 12.115 + load = (100 * (total_ns - idle_ns)) / total_ns; 12.116 + 12.117 + /* Check for frequency increase */ 12.118 + if (load > dbs_tuners_ins.up_threshold) { 12.119 + /* if we are already at full speed then break out early */ 12.120 + if (policy->cur == policy->max) 12.121 + return; 12.122 + __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H); 12.123 + return; 12.124 + } 12.125 + 12.126 + /* Check for frequency decrease */ 12.127 + /* if we cannot reduce the frequency anymore, break out early */ 12.128 + if (policy->cur == policy->min) 12.129 + return; 12.130 + 12.131 + /* 12.132 + * The optimal frequency is the frequency that is the lowest that 12.133 + * can support the current CPU usage without triggering the up 12.134 + * policy. To be safe, we focus 10 points under the threshold. 12.135 + */ 12.136 + if (load < (dbs_tuners_ins.up_threshold - 10)) { 12.137 + unsigned int freq_next, freq_cur; 12.138 + 12.139 + freq_cur = __cpufreq_driver_getavg(policy); 12.140 + if (!freq_cur) 12.141 + freq_cur = policy->cur; 12.142 + 12.143 + freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10); 12.144 + 12.145 + __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L); 12.146 + } 12.147 +} 12.148 + 12.149 +static void do_dbs_timer(void *dbs) 12.150 +{ 12.151 + struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs; 12.152 + 12.153 + if (!dbs_info->enable) 12.154 + return; 12.155 + 12.156 + dbs_check_cpu(dbs_info); 12.157 + 12.158 + set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); 12.159 +} 12.160 + 12.161 +static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) 12.162 +{ 12.163 + dbs_info->enable = 1; 12.164 + 12.165 + init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 12.166 + (void *)dbs_info, dbs_info->cpu); 12.167 + 12.168 + set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate); 12.169 +} 12.170 + 12.171 +static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) 12.172 +{ 12.173 + dbs_info->enable = 0; 12.174 + stop_timer(&dbs_timer[dbs_info->cpu]); 12.175 +} 12.176 + 12.177 +int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) 12.178 +{ 12.179 + unsigned int cpu = policy->cpu; 12.180 + struct cpu_dbs_info_s *this_dbs_info; 12.181 + unsigned int j; 12.182 + 12.183 + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); 12.184 + 12.185 + switch (event) { 12.186 + case CPUFREQ_GOV_START: 12.187 + if ((!cpu_online(cpu)) || (!policy->cur)) 12.188 + return -EINVAL; 12.189 + 12.190 + if (policy->cpuinfo.transition_latency > 12.191 + (TRANSITION_LATENCY_LIMIT * 1000)) { 12.192 + printk(KERN_WARNING "ondemand governor failed to load " 12.193 + "due to too long transition latency\n"); 12.194 + return -EINVAL; 12.195 + } 12.196 + if (this_dbs_info->enable) 12.197 + /* Already enabled */ 12.198 + break; 12.199 + 12.200 + dbs_enable++; 12.201 + 12.202 + for_each_cpu_mask(j, policy->cpus) { 12.203 + struct cpu_dbs_info_s *j_dbs_info; 12.204 + j_dbs_info = &per_cpu(cpu_dbs_info, j); 12.205 + j_dbs_info->cur_policy = policy; 12.206 + 12.207 + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j); 12.208 + j_dbs_info->prev_cpu_wall = NOW(); 12.209 + } 12.210 + this_dbs_info->cpu = cpu; 12.211 + /* 12.212 + * Start the timerschedule work, when this governor 12.213 + * is used for first time 12.214 + */ 12.215 + if (dbs_enable == 1) { 12.216 + def_sampling_rate = policy->cpuinfo.transition_latency * 12.217 + DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; 12.218 + 12.219 + if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) 12.220 + def_sampling_rate = MIN_STAT_SAMPLING_RATE; 12.221 + 12.222 + dbs_tuners_ins.sampling_rate = def_sampling_rate; 12.223 + } 12.224 + dbs_timer_init(this_dbs_info); 12.225 + 12.226 + break; 12.227 + 12.228 + case CPUFREQ_GOV_STOP: 12.229 + dbs_timer_exit(this_dbs_info); 12.230 + dbs_enable--; 12.231 + 12.232 + break; 12.233 + 12.234 + case CPUFREQ_GOV_LIMITS: 12.235 + if (policy->max < this_dbs_info->cur_policy->cur) 12.236 + __cpufreq_driver_target(this_dbs_info->cur_policy, 12.237 + policy->max, CPUFREQ_RELATION_H); 12.238 + else if (policy->min > this_dbs_info->cur_policy->cur) 12.239 + __cpufreq_driver_target(this_dbs_info->cur_policy, 12.240 + policy->min, CPUFREQ_RELATION_L); 12.241 + break; 12.242 + } 12.243 + return 0; 12.244 +} 12.245 + 12.246 +struct cpufreq_governor cpufreq_gov_dbs = { 12.247 + .name = "ondemand", 12.248 + .governor = cpufreq_governor_dbs, 12.249 +};
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/xen/drivers/cpufreq/utility.c Fri Sep 26 14:04:38 2008 +0100 13.3 @@ -0,0 +1,368 @@ 13.4 +/* 13.5 + * utility.c - misc functions for cpufreq driver and Px statistic 13.6 + * 13.7 + * Copyright (C) 2001 Russell King 13.8 + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> 13.9 + * 13.10 + * Oct 2005 - Ashok Raj <ashok.raj@intel.com> 13.11 + * Added handling for CPU hotplug 13.12 + * Feb 2006 - Jacob Shin <jacob.shin@amd.com> 13.13 + * Fix handling for CPU hotplug -- affected CPUs 13.14 + * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com> 13.15 + * 1. Merge cpufreq.c and freq_table.c of linux 2.6.23 13.16 + * And poring to Xen hypervisor 13.17 + * 2. some Px statistic interface funcdtions 13.18 + * 13.19 + * This program is free software; you can redistribute it and/or modify 13.20 + * it under the terms of the GNU General Public License version 2 as 13.21 + * published by the Free Software Foundation. 13.22 + * 13.23 + */ 13.24 + 13.25 +#include <xen/errno.h> 13.26 +#include <xen/cpumask.h> 13.27 +#include <xen/types.h> 13.28 +#include <xen/spinlock.h> 13.29 +#include <xen/percpu.h> 13.30 +#include <xen/types.h> 13.31 +#include <xen/sched.h> 13.32 +#include <xen/timer.h> 13.33 +#include <asm/config.h> 13.34 +#include <acpi/cpufreq/cpufreq.h> 13.35 +#include <public/sysctl.h> 13.36 + 13.37 +struct cpufreq_driver *cpufreq_driver; 13.38 +struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS]; 13.39 +struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS]; 13.40 + 13.41 +/********************************************************************* 13.42 + * Px STATISTIC INFO * 13.43 + *********************************************************************/ 13.44 + 13.45 +void cpufreq_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) 13.46 +{ 13.47 + uint32_t i; 13.48 + uint64_t now; 13.49 + 13.50 + now = NOW(); 13.51 + 13.52 + for_each_cpu_mask(i, cpumask) { 13.53 + struct pm_px *pxpt = cpufreq_statistic_data[i]; 13.54 + struct processor_pminfo *pmpt = processor_pminfo[i]; 13.55 + uint64_t total_idle_ns; 13.56 + uint64_t tmp_idle_ns; 13.57 + 13.58 + if ( !pxpt || !pmpt ) 13.59 + continue; 13.60 + 13.61 + total_idle_ns = get_cpu_idle_time(i); 13.62 + tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 13.63 + 13.64 + pxpt->u.last = from; 13.65 + pxpt->u.cur = to; 13.66 + pxpt->u.pt[to].count++; 13.67 + pxpt->u.pt[from].residency += now - pxpt->prev_state_wall; 13.68 + pxpt->u.pt[from].residency -= tmp_idle_ns; 13.69 + 13.70 + (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++; 13.71 + 13.72 + pxpt->prev_state_wall = now; 13.73 + pxpt->prev_idle_wall = total_idle_ns; 13.74 + } 13.75 +} 13.76 + 13.77 +int cpufreq_statistic_init(unsigned int cpuid) 13.78 +{ 13.79 + uint32_t i, count; 13.80 + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; 13.81 + const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; 13.82 + 13.83 + count = pmpt->perf.state_count; 13.84 + 13.85 + if ( !pmpt ) 13.86 + return -EINVAL; 13.87 + 13.88 + if ( !pxpt ) 13.89 + { 13.90 + pxpt = xmalloc(struct pm_px); 13.91 + if ( !pxpt ) 13.92 + return -ENOMEM; 13.93 + memset(pxpt, 0, sizeof(*pxpt)); 13.94 + cpufreq_statistic_data[cpuid] = pxpt; 13.95 + } 13.96 + 13.97 + pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count); 13.98 + if (!pxpt->u.trans_pt) 13.99 + return -ENOMEM; 13.100 + 13.101 + pxpt->u.pt = xmalloc_array(struct pm_px_val, count); 13.102 + if (!pxpt->u.pt) { 13.103 + xfree(pxpt->u.trans_pt); 13.104 + return -ENOMEM; 13.105 + } 13.106 + 13.107 + memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t))); 13.108 + memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); 13.109 + 13.110 + pxpt->u.total = pmpt->perf.state_count; 13.111 + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 13.112 + 13.113 + for (i=0; i < pmpt->perf.state_count; i++) 13.114 + pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; 13.115 + 13.116 + pxpt->prev_state_wall = NOW(); 13.117 + pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); 13.118 + 13.119 + return 0; 13.120 +} 13.121 + 13.122 +void cpufreq_statistic_exit(unsigned int cpuid) 13.123 +{ 13.124 + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; 13.125 + 13.126 + if (!pxpt) 13.127 + return; 13.128 + xfree(pxpt->u.trans_pt); 13.129 + xfree(pxpt->u.pt); 13.130 + memset(pxpt, 0, sizeof(struct pm_px)); 13.131 +} 13.132 + 13.133 +void cpufreq_statistic_reset(unsigned int cpuid) 13.134 +{ 13.135 + uint32_t i, j, count; 13.136 + struct pm_px *pxpt = cpufreq_statistic_data[cpuid]; 13.137 + const struct processor_pminfo *pmpt = processor_pminfo[cpuid]; 13.138 + 13.139 + if ( !pxpt || !pmpt ) 13.140 + return; 13.141 + 13.142 + count = pmpt->perf.state_count; 13.143 + 13.144 + for (i=0; i < count; i++) { 13.145 + pxpt->u.pt[i].residency = 0; 13.146 + pxpt->u.pt[i].count = 0; 13.147 + 13.148 + for (j=0; j < count; j++) 13.149 + *(pxpt->u.trans_pt + i*count + j) = 0; 13.150 + } 13.151 + 13.152 + pxpt->prev_state_wall = NOW(); 13.153 + pxpt->prev_idle_wall = get_cpu_idle_time(cpuid); 13.154 +} 13.155 + 13.156 + 13.157 +/********************************************************************* 13.158 + * FREQUENCY TABLE HELPERS * 13.159 + *********************************************************************/ 13.160 + 13.161 +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, 13.162 + struct cpufreq_frequency_table *table) 13.163 +{ 13.164 + unsigned int min_freq = ~0; 13.165 + unsigned int max_freq = 0; 13.166 + unsigned int i; 13.167 + 13.168 + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 13.169 + unsigned int freq = table[i].frequency; 13.170 + if (freq == CPUFREQ_ENTRY_INVALID) 13.171 + continue; 13.172 + if (freq < min_freq) 13.173 + min_freq = freq; 13.174 + if (freq > max_freq) 13.175 + max_freq = freq; 13.176 + } 13.177 + 13.178 + policy->min = policy->cpuinfo.min_freq = min_freq; 13.179 + policy->max = policy->cpuinfo.max_freq = max_freq; 13.180 + 13.181 + if (policy->min == ~0) 13.182 + return -EINVAL; 13.183 + else 13.184 + return 0; 13.185 +} 13.186 + 13.187 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, 13.188 + struct cpufreq_frequency_table *table) 13.189 +{ 13.190 + unsigned int next_larger = ~0; 13.191 + unsigned int i; 13.192 + unsigned int count = 0; 13.193 + 13.194 + if (!cpu_online(policy->cpu)) 13.195 + return -EINVAL; 13.196 + 13.197 + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 13.198 + policy->cpuinfo.max_freq); 13.199 + 13.200 + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 13.201 + unsigned int freq = table[i].frequency; 13.202 + if (freq == CPUFREQ_ENTRY_INVALID) 13.203 + continue; 13.204 + if ((freq >= policy->min) && (freq <= policy->max)) 13.205 + count++; 13.206 + else if ((next_larger > freq) && (freq > policy->max)) 13.207 + next_larger = freq; 13.208 + } 13.209 + 13.210 + if (!count) 13.211 + policy->max = next_larger; 13.212 + 13.213 + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 13.214 + policy->cpuinfo.max_freq); 13.215 + 13.216 + return 0; 13.217 +} 13.218 + 13.219 +int cpufreq_frequency_table_target(struct cpufreq_policy *policy, 13.220 + struct cpufreq_frequency_table *table, 13.221 + unsigned int target_freq, 13.222 + unsigned int relation, 13.223 + unsigned int *index) 13.224 +{ 13.225 + struct cpufreq_frequency_table optimal = { 13.226 + .index = ~0, 13.227 + .frequency = 0, 13.228 + }; 13.229 + struct cpufreq_frequency_table suboptimal = { 13.230 + .index = ~0, 13.231 + .frequency = 0, 13.232 + }; 13.233 + unsigned int i; 13.234 + 13.235 + switch (relation) { 13.236 + case CPUFREQ_RELATION_H: 13.237 + suboptimal.frequency = ~0; 13.238 + break; 13.239 + case CPUFREQ_RELATION_L: 13.240 + optimal.frequency = ~0; 13.241 + break; 13.242 + } 13.243 + 13.244 + if (!cpu_online(policy->cpu)) 13.245 + return -EINVAL; 13.246 + 13.247 + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 13.248 + unsigned int freq = table[i].frequency; 13.249 + if (freq == CPUFREQ_ENTRY_INVALID) 13.250 + continue; 13.251 + if ((freq < policy->min) || (freq > policy->max)) 13.252 + continue; 13.253 + switch(relation) { 13.254 + case CPUFREQ_RELATION_H: 13.255 + if (freq <= target_freq) { 13.256 + if (freq >= optimal.frequency) { 13.257 + optimal.frequency = freq; 13.258 + optimal.index = i; 13.259 + } 13.260 + } else { 13.261 + if (freq <= suboptimal.frequency) { 13.262 + suboptimal.frequency = freq; 13.263 + suboptimal.index = i; 13.264 + } 13.265 + } 13.266 + break; 13.267 + case CPUFREQ_RELATION_L: 13.268 + if (freq >= target_freq) { 13.269 + if (freq <= optimal.frequency) { 13.270 + optimal.frequency = freq; 13.271 + optimal.index = i; 13.272 + } 13.273 + } else { 13.274 + if (freq >= suboptimal.frequency) { 13.275 + suboptimal.frequency = freq; 13.276 + suboptimal.index = i; 13.277 + } 13.278 + } 13.279 + break; 13.280 + } 13.281 + } 13.282 + if (optimal.index > i) { 13.283 + if (suboptimal.index > i) 13.284 + return -EINVAL; 13.285 + *index = suboptimal.index; 13.286 + } else 13.287 + *index = optimal.index; 13.288 + 13.289 + return 0; 13.290 +} 13.291 + 13.292 + 13.293 +/********************************************************************* 13.294 + * GOVERNORS * 13.295 + *********************************************************************/ 13.296 + 13.297 +int __cpufreq_driver_target(struct cpufreq_policy *policy, 13.298 + unsigned int target_freq, 13.299 + unsigned int relation) 13.300 +{ 13.301 + int retval = -EINVAL; 13.302 + 13.303 + if (cpu_online(policy->cpu) && cpufreq_driver->target) 13.304 + retval = cpufreq_driver->target(policy, target_freq, relation); 13.305 + 13.306 + return retval; 13.307 +} 13.308 + 13.309 +int __cpufreq_driver_getavg(struct cpufreq_policy *policy) 13.310 +{ 13.311 + int ret = 0; 13.312 + 13.313 + if (!policy) 13.314 + return -EINVAL; 13.315 + 13.316 + if (cpu_online(policy->cpu) && cpufreq_driver->getavg) 13.317 + ret = cpufreq_driver->getavg(policy->cpu); 13.318 + 13.319 + return ret; 13.320 +} 13.321 + 13.322 + 13.323 +/********************************************************************* 13.324 + * POLICY * 13.325 + *********************************************************************/ 13.326 + 13.327 +/* 13.328 + * data : current policy. 13.329 + * policy : policy to be set. 13.330 + */ 13.331 +int __cpufreq_set_policy(struct cpufreq_policy *data, 13.332 + struct cpufreq_policy *policy) 13.333 +{ 13.334 + int ret = 0; 13.335 + 13.336 + memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); 13.337 + 13.338 + if (policy->min > data->min && policy->min > policy->max) 13.339 + return -EINVAL; 13.340 + 13.341 + /* verify the cpu speed can be set within this limit */ 13.342 + ret = cpufreq_driver->verify(policy); 13.343 + if (ret) 13.344 + return ret; 13.345 + 13.346 + data->min = policy->min; 13.347 + data->max = policy->max; 13.348 + 13.349 + if (policy->governor != data->governor) { 13.350 + /* save old, working values */ 13.351 + struct cpufreq_governor *old_gov = data->governor; 13.352 + 13.353 + /* end old governor */ 13.354 + if (data->governor) 13.355 + __cpufreq_governor(data, CPUFREQ_GOV_STOP); 13.356 + 13.357 + /* start new governor */ 13.358 + data->governor = policy->governor; 13.359 + if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { 13.360 + /* new governor failed, so re-start old one */ 13.361 + if (old_gov) { 13.362 + data->governor = old_gov; 13.363 + __cpufreq_governor(data, CPUFREQ_GOV_START); 13.364 + } 13.365 + return -EINVAL; 13.366 + } 13.367 + /* might be a policy change, too, so fall through */ 13.368 + } 13.369 + 13.370 + return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); 13.371 +}
14.1 --- a/xen/include/acpi/cpufreq/cpufreq.h Fri Sep 26 11:12:29 2008 +0100 14.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Sep 26 14:04:38 2008 +0100 14.3 @@ -21,6 +21,13 @@ 14.4 14.5 struct cpufreq_governor; 14.6 14.7 +struct acpi_cpufreq_data { 14.8 + struct processor_performance *acpi_data; 14.9 + struct cpufreq_frequency_table *freq_table; 14.10 + unsigned int max_freq; 14.11 + unsigned int cpu_feature; 14.12 +}; 14.13 + 14.14 struct cpufreq_cpuinfo { 14.15 unsigned int max_freq; 14.16 unsigned int min_freq;
15.1 --- a/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 11:12:29 2008 +0100 15.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 14:04:38 2008 +0100 15.3 @@ -9,10 +9,10 @@ 15.4 int get_cpu_id(u8); 15.5 int powernow_cpufreq_init(void); 15.6 15.7 -void px_statistic_update(cpumask_t, uint8_t, uint8_t); 15.8 -int px_statistic_init(unsigned int); 15.9 -void px_statistic_exit(unsigned int); 15.10 -void px_statistic_reset(unsigned int); 15.11 +void cpufreq_statistic_update(cpumask_t, uint8_t, uint8_t); 15.12 +int cpufreq_statistic_init(unsigned int); 15.13 +void cpufreq_statistic_exit(unsigned int); 15.14 +void cpufreq_statistic_reset(unsigned int); 15.15 15.16 int cpufreq_limit_change(unsigned int); 15.17 15.18 @@ -58,6 +58,6 @@ struct pm_px { 15.19 uint64_t prev_idle_wall; 15.20 }; 15.21 15.22 -extern struct pm_px *px_statistic_data[NR_CPUS]; 15.23 +extern struct pm_px *cpufreq_statistic_data[NR_CPUS]; 15.24 15.25 #endif /* __XEN_PROCESSOR_PM_H__ */