ia64/xen-unstable
changeset 18484:f125e481d8b6
x86: Clean up cpufreq core logic
Clean up cpufreq core logic, which now can cope with cpu
online/offline event, and also dynamic platform limitation event
(_PPC).
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Clean up cpufreq core logic, which now can cope with cpu
online/offline event, and also dynamic platform limitation event
(_PPC).
Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Fri Sep 12 10:34:50 2008 +0100 (2008-09-12) |
parents | 346c073ed6a4 |
children | 34aed15ba9df |
files | xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c xen/arch/x86/acpi/cpufreq/powernow.c xen/arch/x86/acpi/cpufreq/utility.c xen/arch/x86/acpi/pmstat.c xen/arch/x86/acpi/power.c xen/arch/x86/platform_hypercall.c xen/arch/x86/smpboot.c xen/include/acpi/cpufreq/cpufreq.h xen/include/acpi/cpufreq/processor_perf.h xen/include/public/platform.h |
line diff
1.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Thu Sep 11 18:00:06 2008 +0100 1.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Fri Sep 12 10:34:50 2008 +0100 1.3 @@ -32,6 +32,7 @@ 1.4 #include <xen/errno.h> 1.5 #include <xen/delay.h> 1.6 #include <xen/cpumask.h> 1.7 +#include <xen/sched.h> 1.8 #include <xen/timer.h> 1.9 #include <xen/xmalloc.h> 1.10 #include <asm/bug.h> 1.11 @@ -44,12 +45,8 @@ 1.12 #include <acpi/acpi.h> 1.13 #include <acpi/cpufreq/cpufreq.h> 1.14 1.15 -struct processor_pminfo processor_pminfo[NR_CPUS]; 1.16 -struct cpufreq_policy xen_px_policy[NR_CPUS]; 1.17 - 1.18 -static cpumask_t *cpufreq_dom_pt; 1.19 -static unsigned long *cpufreq_dom_mask; 1.20 -static unsigned int cpufreq_dom_max; 1.21 +/* TODO: change to link list later as domain number may be sparse */ 1.22 +static cpumask_t cpufreq_dom_map[NR_CPUS]; 1.23 1.24 enum { 1.25 UNDEFINED_CAPABLE = 0, 1.26 @@ -335,7 +332,7 @@ static int acpi_cpufreq_target(struct cp 1.27 if (unlikely(result)) 1.28 return -ENODEV; 1.29 1.30 - online_policy_cpus = policy->cpus; 1.31 + cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); 1.32 1.33 next_perf_state = data->freq_table[next_state].index; 1.34 if (perf->state == next_perf_state) { 1.35 @@ -390,6 +387,20 @@ static int acpi_cpufreq_target(struct cp 1.36 return result; 1.37 } 1.38 1.39 +static int acpi_cpufreq_verify(struct cpufreq_policy *policy) 1.40 +{ 1.41 + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; 1.42 + struct processor_performance *perf = &processor_pminfo[policy->cpu].perf; 1.43 + 1.44 + if (!policy || !data) 1.45 + return -EINVAL; 1.46 + 1.47 + cpufreq_verify_within_limits(policy, 0, 1.48 + perf->states[perf->platform_limit].core_frequency * 1000); 1.49 + 1.50 + return cpufreq_frequency_table_verify(policy, data->freq_table); 1.51 +} 1.52 + 1.53 static unsigned long 1.54 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) 1.55 { 1.56 @@ -441,14 +452,6 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol 1.57 perf = data->acpi_data; 1.58 policy->shared_type = perf->shared_type; 1.59 1.60 - /* 1.61 - * Currently the latest linux (kernel version 2.6.26) 1.62 - * still has issue when handle the situation _psd HW_ALL coordination. 1.63 - * In Xen hypervisor, we handle _psd HW_ALL coordination in same way as 1.64 - * _psd SW_ALL coordination for the seek of safety. 1.65 - */ 1.66 - policy->cpus = perf->shared_cpu_map; 1.67 - 1.68 /* capability check */ 1.69 if (perf->state_count <= 1) { 1.70 printk("No P-States\n"); 1.71 @@ -496,6 +499,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol 1.72 policy->cpuinfo.transition_latency = 1.73 perf->states[i].transition_latency * 1000; 1.74 } 1.75 + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 1.76 1.77 data->max_freq = perf->states[0].core_frequency * 1000; 1.78 /* table init */ 1.79 @@ -554,114 +558,173 @@ err_unreg: 1.80 return result; 1.81 } 1.82 1.83 -static struct cpufreq_driver acpi_cpufreq_driver = { 1.84 - .target = acpi_cpufreq_target, 1.85 - .init = acpi_cpufreq_cpu_init, 1.86 -}; 1.87 - 1.88 -void cpufreq_dom_exit(void) 1.89 +static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) 1.90 { 1.91 - cpufreq_dom_max = 0; 1.92 - if (cpufreq_dom_mask) 1.93 - xfree(cpufreq_dom_mask); 1.94 - if (cpufreq_dom_pt) 1.95 - xfree(cpufreq_dom_pt); 1.96 -} 1.97 - 1.98 -int cpufreq_dom_init(void) 1.99 -{ 1.100 - unsigned int i; 1.101 - 1.102 - cpufreq_dom_max = 0; 1.103 + struct acpi_cpufreq_data *data = drv_data[policy->cpu]; 1.104 1.105 - for_each_online_cpu(i) { 1.106 - if (cpufreq_dom_max < processor_pminfo[i].perf.domain_info.domain) 1.107 - cpufreq_dom_max = processor_pminfo[i].perf.domain_info.domain; 1.108 + if (data) { 1.109 + drv_data[policy->cpu] = NULL; 1.110 + xfree(data->freq_table); 1.111 + xfree(data); 1.112 } 1.113 - cpufreq_dom_max++; 1.114 - 1.115 - cpufreq_dom_mask = xmalloc_array(unsigned long, 1.116 - BITS_TO_LONGS(cpufreq_dom_max)); 1.117 - if (!cpufreq_dom_mask) 1.118 - return -ENOMEM; 1.119 - bitmap_zero(cpufreq_dom_mask, cpufreq_dom_max); 1.120 - 1.121 - cpufreq_dom_pt = xmalloc_array(cpumask_t, cpufreq_dom_max); 1.122 - if (!cpufreq_dom_pt) 1.123 - return -ENOMEM; 1.124 - memset(cpufreq_dom_pt, 0, cpufreq_dom_max * sizeof(cpumask_t)); 1.125 - 1.126 - for_each_online_cpu(i) { 1.127 - __set_bit(processor_pminfo[i].perf.domain_info.domain, cpufreq_dom_mask); 1.128 - cpu_set(i, cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]); 1.129 - } 1.130 - 1.131 - for_each_online_cpu(i) 1.132 - processor_pminfo[i].perf.shared_cpu_map = 1.133 - cpufreq_dom_pt[processor_pminfo[i].perf.domain_info.domain]; 1.134 1.135 return 0; 1.136 } 1.137 1.138 -static int cpufreq_cpu_init(void) 1.139 -{ 1.140 - int i, ret = 0; 1.141 - 1.142 - for_each_online_cpu(i) { 1.143 - xen_px_policy[i].cpu = i; 1.144 +static struct cpufreq_driver acpi_cpufreq_driver = { 1.145 + .verify = acpi_cpufreq_verify, 1.146 + .target = acpi_cpufreq_target, 1.147 + .init = acpi_cpufreq_cpu_init, 1.148 + .exit = acpi_cpufreq_cpu_exit, 1.149 +}; 1.150 1.151 - ret = px_statistic_init(i); 1.152 - if (ret) 1.153 - return ret; 1.154 +int cpufreq_limit_change(unsigned int cpu) 1.155 +{ 1.156 + struct processor_performance *perf = &processor_pminfo[cpu].perf; 1.157 + struct cpufreq_policy *data = cpufreq_cpu_policy[cpu]; 1.158 + struct cpufreq_policy policy; 1.159 1.160 - ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]); 1.161 + if (!cpu_online(cpu) || !data) 1.162 + return -ENODEV; 1.163 + 1.164 + if ((perf->platform_limit < 0) || 1.165 + (perf->platform_limit >= perf->state_count)) 1.166 + return -EINVAL; 1.167 + 1.168 + memcpy(&policy, data, sizeof(struct cpufreq_policy)); 1.169 + 1.170 + policy.max = 1.171 + perf->states[perf->platform_limit].core_frequency * 1000; 1.172 + 1.173 + return __cpufreq_set_policy(data, &policy); 1.174 +} 1.175 + 1.176 +int cpufreq_add_cpu(unsigned int cpu) 1.177 +{ 1.178 + int ret = 0; 1.179 + unsigned int firstcpu; 1.180 + unsigned int dom; 1.181 + unsigned int j; 1.182 + struct cpufreq_policy new_policy; 1.183 + struct cpufreq_policy *policy; 1.184 + struct processor_performance *perf = &processor_pminfo[cpu].perf; 1.185 + 1.186 + /* to protect the case when Px was not controlled by xen */ 1.187 + if (!(perf->init & XEN_PX_INIT)) 1.188 + return 0; 1.189 + 1.190 + if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu]) 1.191 + return -EINVAL; 1.192 + 1.193 + ret = px_statistic_init(cpu); 1.194 + if (ret) 1.195 + return ret; 1.196 + 1.197 + dom = perf->domain_info.domain; 1.198 + if (cpus_weight(cpufreq_dom_map[dom])) { 1.199 + /* share policy with the first cpu since on same boat */ 1.200 + firstcpu = first_cpu(cpufreq_dom_map[dom]); 1.201 + policy = cpufreq_cpu_policy[firstcpu]; 1.202 + 1.203 + cpufreq_cpu_policy[cpu] = policy; 1.204 + cpu_set(cpu, cpufreq_dom_map[dom]); 1.205 + cpu_set(cpu, policy->cpus); 1.206 + 1.207 + printk(KERN_EMERG"adding CPU %u\n", cpu); 1.208 + } else { 1.209 + /* for the first cpu, setup policy and do init work */ 1.210 + policy = xmalloc(struct cpufreq_policy); 1.211 + if (!policy) { 1.212 + px_statistic_exit(cpu); 1.213 + return -ENOMEM; 1.214 + } 1.215 + memset(policy, 0, sizeof(struct cpufreq_policy)); 1.216 + 1.217 + cpufreq_cpu_policy[cpu] = policy; 1.218 + cpu_set(cpu, cpufreq_dom_map[dom]); 1.219 + cpu_set(cpu, policy->cpus); 1.220 + 1.221 + policy->cpu = cpu; 1.222 + ret = cpufreq_driver->init(policy); 1.223 if (ret) 1.224 - return ret; 1.225 + goto err1; 1.226 + printk(KERN_EMERG"CPU %u initialization completed\n", cpu); 1.227 } 1.228 + 1.229 + /* 1.230 + * After get full cpumap of the coordination domain, 1.231 + * we can safely start gov here. 1.232 + */ 1.233 + if (cpus_weight(cpufreq_dom_map[dom]) == 1.234 + perf->domain_info.num_processors) { 1.235 + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); 1.236 + policy->governor = NULL; 1.237 + ret = __cpufreq_set_policy(policy, &new_policy); 1.238 + if (ret) 1.239 + goto err2; 1.240 + } 1.241 + 1.242 + return 0; 1.243 + 1.244 +err2: 1.245 + cpufreq_driver->exit(policy); 1.246 +err1: 1.247 + for_each_cpu_mask(j, cpufreq_dom_map[dom]) { 1.248 + cpufreq_cpu_policy[j] = NULL; 1.249 + px_statistic_exit(j); 1.250 + } 1.251 + 1.252 + cpus_clear(cpufreq_dom_map[dom]); 1.253 + xfree(policy); 1.254 return ret; 1.255 } 1.256 1.257 -int cpufreq_dom_dbs(unsigned int event) 1.258 +int cpufreq_del_cpu(unsigned int cpu) 1.259 { 1.260 - unsigned int cpu, dom; 1.261 - int ret = 0; 1.262 + unsigned int dom; 1.263 + struct cpufreq_policy *policy; 1.264 + struct processor_performance *perf = &processor_pminfo[cpu].perf; 1.265 1.266 - for (dom = 0; dom < cpufreq_dom_max; dom++) { 1.267 - if (!test_bit(dom, cpufreq_dom_mask)) 1.268 - continue; 1.269 - cpu = first_cpu(cpufreq_dom_pt[dom]); 1.270 - ret = cpufreq_governor_dbs(&xen_px_policy[cpu], event); 1.271 - if (ret) 1.272 - return ret; 1.273 + /* to protect the case when Px was not controlled by xen */ 1.274 + if (!(perf->init & XEN_PX_INIT)) 1.275 + return 0; 1.276 + 1.277 + if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu]) 1.278 + return -EINVAL; 1.279 + 1.280 + dom = perf->domain_info.domain; 1.281 + policy = cpufreq_cpu_policy[cpu]; 1.282 + 1.283 + printk(KERN_EMERG"deleting CPU %u\n", cpu); 1.284 + 1.285 + /* for the first cpu of the domain, stop gov */ 1.286 + if (cpus_weight(cpufreq_dom_map[dom]) == 1.287 + perf->domain_info.num_processors) 1.288 + __cpufreq_governor(policy, CPUFREQ_GOV_STOP); 1.289 + 1.290 + cpufreq_cpu_policy[cpu] = NULL; 1.291 + cpu_clear(cpu, policy->cpus); 1.292 + cpu_clear(cpu, cpufreq_dom_map[dom]); 1.293 + px_statistic_exit(cpu); 1.294 + 1.295 + /* for the last cpu of the domain, clean room */ 1.296 + /* It's safe here to free freq_table, drv_data and policy */ 1.297 + if (!cpus_weight(cpufreq_dom_map[dom])) { 1.298 + cpufreq_driver->exit(policy); 1.299 + xfree(policy); 1.300 } 1.301 - return ret; 1.302 + 1.303 + return 0; 1.304 } 1.305 1.306 -int acpi_cpufreq_init(void) 1.307 +static int __init cpufreq_driver_init(void) 1.308 { 1.309 int ret = 0; 1.310 - 1.311 - /* setup cpumask of psd dom and shared cpu map of cpu */ 1.312 - ret = cpufreq_dom_init(); 1.313 - if (ret) 1.314 - goto err; 1.315 - 1.316 - /* setup cpufreq driver */ 1.317 - cpufreq_driver = &acpi_cpufreq_driver; 1.318 1.319 - /* setup cpufreq infrastructure */ 1.320 - ret = cpufreq_cpu_init(); 1.321 - if (ret) 1.322 - goto err; 1.323 - 1.324 - /* setup cpufreq dbs according to dom coordiation */ 1.325 - ret = cpufreq_dom_dbs(CPUFREQ_GOV_START); 1.326 - if (ret) 1.327 - goto err; 1.328 + if ((cpufreq_controller == FREQCTL_xen) && 1.329 + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) 1.330 + ret = cpufreq_register_driver(&acpi_cpufreq_driver); 1.331 1.332 return ret; 1.333 - 1.334 -err: 1.335 - cpufreq_dom_exit(); 1.336 - return ret; 1.337 } 1.338 +__initcall(cpufreq_driver_init);
2.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c Thu Sep 11 18:00:06 2008 +0100 2.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c Fri Sep 12 10:34:50 2008 +0100 2.3 @@ -238,4 +238,9 @@ int cpufreq_governor_dbs(struct cpufreq_ 2.4 break; 2.5 } 2.6 return 0; 2.7 -} 2.8 +} 2.9 + 2.10 +struct cpufreq_governor cpufreq_gov_dbs = { 2.11 + .name = "ondemand", 2.12 + .governor = cpufreq_governor_dbs, 2.13 +};
3.1 --- a/xen/arch/x86/acpi/cpufreq/powernow.c Thu Sep 11 18:00:06 2008 +0100 3.2 +++ b/xen/arch/x86/acpi/cpufreq/powernow.c Fri Sep 12 10:34:50 2008 +0100 3.3 @@ -50,7 +50,7 @@ 3.4 #define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */ 3.5 3.6 extern struct processor_pminfo processor_pminfo[NR_CPUS]; 3.7 -extern struct cpufreq_policy xen_px_policy[NR_CPUS]; 3.8 +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; 3.9 3.10 struct powernow_cpufreq_data { 3.11 struct processor_performance *acpi_data; 3.12 @@ -281,9 +281,9 @@ int powernow_cpufreq_init(void) 3.13 3.14 /* setup cpufreq infrastructure */ 3.15 for_each_online_cpu(i) { 3.16 - xen_px_policy[i].cpu = i; 3.17 + cpufreq_cpu_policy[i]->cpu = i; 3.18 3.19 - ret = powernow_cpufreq_cpu_init(&xen_px_policy[i]); 3.20 + ret = powernow_cpufreq_cpu_init(cpufreq_cpu_policy[i]); 3.21 if (ret) 3.22 goto cpufreq_init_out; 3.23 } 3.24 @@ -293,7 +293,7 @@ int powernow_cpufreq_init(void) 3.25 if (!cpu_isset(dom, dom_mask)) 3.26 continue; 3.27 i = first_cpu(pt[dom]); 3.28 - ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START); 3.29 + ret = cpufreq_governor_dbs(cpufreq_cpu_policy[i], CPUFREQ_GOV_START); 3.30 if (ret) 3.31 goto cpufreq_init_out; 3.32 }
4.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c Thu Sep 11 18:00:06 2008 +0100 4.2 +++ b/xen/arch/x86/acpi/cpufreq/utility.c Fri Sep 12 10:34:50 2008 +0100 4.3 @@ -31,47 +31,14 @@ 4.4 #include <acpi/cpufreq/cpufreq.h> 4.5 #include <public/sysctl.h> 4.6 4.7 -struct cpufreq_driver *cpufreq_driver; 4.8 +struct cpufreq_driver *cpufreq_driver; 4.9 +struct processor_pminfo processor_pminfo[NR_CPUS]; 4.10 +struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; 4.11 4.12 /********************************************************************* 4.13 * Px STATISTIC INFO * 4.14 *********************************************************************/ 4.15 4.16 -void px_statistic_suspend(void) 4.17 -{ 4.18 - int cpu; 4.19 - uint64_t now; 4.20 - 4.21 - now = NOW(); 4.22 - 4.23 - for_each_online_cpu(cpu) { 4.24 - struct pm_px *pxpt = &px_statistic_data[cpu]; 4.25 - uint64_t total_idle_ns; 4.26 - uint64_t tmp_idle_ns; 4.27 - 4.28 - total_idle_ns = get_cpu_idle_time(cpu); 4.29 - tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 4.30 - 4.31 - pxpt->u.pt[pxpt->u.cur].residency += 4.32 - now - pxpt->prev_state_wall; 4.33 - pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; 4.34 - } 4.35 -} 4.36 - 4.37 -void px_statistic_resume(void) 4.38 -{ 4.39 - int cpu; 4.40 - uint64_t now; 4.41 - 4.42 - now = NOW(); 4.43 - 4.44 - for_each_online_cpu(cpu) { 4.45 - struct pm_px *pxpt = &px_statistic_data[cpu]; 4.46 - pxpt->prev_state_wall = now; 4.47 - pxpt->prev_idle_wall = get_cpu_idle_time(cpu); 4.48 - } 4.49 -} 4.50 - 4.51 void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to) 4.52 { 4.53 uint32_t i; 4.54 @@ -101,7 +68,7 @@ void px_statistic_update(cpumask_t cpuma 4.55 } 4.56 } 4.57 4.58 -int px_statistic_init(int cpuid) 4.59 +int px_statistic_init(unsigned int cpuid) 4.60 { 4.61 uint32_t i, count; 4.62 struct pm_px *pxpt = &px_statistic_data[cpuid]; 4.63 @@ -123,7 +90,7 @@ int px_statistic_init(int cpuid) 4.64 memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val))); 4.65 4.66 pxpt->u.total = pmpt->perf.state_count; 4.67 - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; 4.68 + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 4.69 4.70 for (i=0; i < pmpt->perf.state_count; i++) 4.71 pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency; 4.72 @@ -134,7 +101,16 @@ int px_statistic_init(int cpuid) 4.73 return 0; 4.74 } 4.75 4.76 -void px_statistic_reset(int cpuid) 4.77 +void px_statistic_exit(unsigned int cpuid) 4.78 +{ 4.79 + struct pm_px *pxpt = &px_statistic_data[cpuid]; 4.80 + 4.81 + xfree(pxpt->u.trans_pt); 4.82 + xfree(pxpt->u.pt); 4.83 + memset(pxpt, 0, sizeof(struct pm_px)); 4.84 +} 4.85 + 4.86 +void px_statistic_reset(unsigned int cpuid) 4.87 { 4.88 uint32_t i, j, count; 4.89 struct pm_px *pxpt = &px_statistic_data[cpuid]; 4.90 @@ -184,6 +160,38 @@ int cpufreq_frequency_table_cpuinfo(stru 4.91 return 0; 4.92 } 4.93 4.94 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, 4.95 + struct cpufreq_frequency_table *table) 4.96 +{ 4.97 + unsigned int next_larger = ~0; 4.98 + unsigned int i; 4.99 + unsigned int count = 0; 4.100 + 4.101 + if (!cpu_online(policy->cpu)) 4.102 + return -EINVAL; 4.103 + 4.104 + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 4.105 + policy->cpuinfo.max_freq); 4.106 + 4.107 + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { 4.108 + unsigned int freq = table[i].frequency; 4.109 + if (freq == CPUFREQ_ENTRY_INVALID) 4.110 + continue; 4.111 + if ((freq >= policy->min) && (freq <= policy->max)) 4.112 + count++; 4.113 + else if ((next_larger > freq) && (freq > policy->max)) 4.114 + next_larger = freq; 4.115 + } 4.116 + 4.117 + if (!count) 4.118 + policy->max = next_larger; 4.119 + 4.120 + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, 4.121 + policy->cpuinfo.max_freq); 4.122 + 4.123 + return 0; 4.124 +} 4.125 + 4.126 int cpufreq_frequency_table_target(struct cpufreq_policy *policy, 4.127 struct cpufreq_frequency_table *table, 4.128 unsigned int target_freq, 4.129 @@ -289,57 +297,51 @@ int __cpufreq_driver_getavg(struct cpufr 4.130 4.131 4.132 /********************************************************************* 4.133 - * CPUFREQ SUSPEND/RESUME * 4.134 + * POLICY * 4.135 *********************************************************************/ 4.136 4.137 -void cpufreq_suspend(void) 4.138 +/* 4.139 + * data : current policy. 4.140 + * policy : policy to be set. 4.141 + */ 4.142 +int __cpufreq_set_policy(struct cpufreq_policy *data, 4.143 + struct cpufreq_policy *policy) 4.144 { 4.145 - int cpu; 4.146 + int ret = 0; 4.147 4.148 - /* to protect the case when Px was not controlled by xen */ 4.149 - for_each_online_cpu(cpu) { 4.150 - struct processor_performance *perf = &processor_pminfo[cpu].perf; 4.151 + memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo)); 4.152 4.153 - if (!(perf->init & XEN_PX_INIT)) 4.154 - return; 4.155 + if (policy->min > data->min && policy->min > policy->max) 4.156 + return -EINVAL; 4.157 + 4.158 + /* verify the cpu speed can be set within this limit */ 4.159 + ret = cpufreq_driver->verify(policy); 4.160 + if (ret) 4.161 + return ret; 4.162 + 4.163 + data->min = policy->min; 4.164 + data->max = policy->max; 4.165 + 4.166 + if (policy->governor != data->governor) { 4.167 + /* save old, working values */ 4.168 + struct cpufreq_governor *old_gov = data->governor; 4.169 + 4.170 + /* end old governor */ 4.171 + if (data->governor) 4.172 + __cpufreq_governor(data, CPUFREQ_GOV_STOP); 4.173 + 4.174 + /* start new governor */ 4.175 + data->governor = policy->governor; 4.176 + if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { 4.177 + /* new governor failed, so re-start old one */ 4.178 + if (old_gov) { 4.179 + data->governor = old_gov; 4.180 + __cpufreq_governor(data, CPUFREQ_GOV_START); 4.181 + } 4.182 + return -EINVAL; 4.183 + } 4.184 + /* might be a policy change, too, so fall through */ 4.185 } 4.186 4.187 - cpufreq_dom_dbs(CPUFREQ_GOV_STOP); 4.188 - 4.189 - cpufreq_dom_exit(); 4.190 - 4.191 - px_statistic_suspend(); 4.192 + return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); 4.193 } 4.194 - 4.195 -int cpufreq_resume(void) 4.196 -{ 4.197 - int cpu, ret = 0; 4.198 - 4.199 - /* 1. to protect the case when Px was not controlled by xen */ 4.200 - /* 2. set state and resume flag to sync cpu to right state and freq */ 4.201 - for_each_online_cpu(cpu) { 4.202 - struct processor_performance *perf = &processor_pminfo[cpu].perf; 4.203 - struct cpufreq_policy *policy = &xen_px_policy[cpu]; 4.204 - 4.205 - if (!(perf->init & XEN_PX_INIT)) 4.206 - goto err; 4.207 - perf->state = 0; 4.208 - policy->resume = 1; 4.209 - } 4.210 - 4.211 - px_statistic_resume(); 4.212 - 4.213 - ret = cpufreq_dom_init(); 4.214 - if (ret) 4.215 - goto err; 4.216 - 4.217 - ret = cpufreq_dom_dbs(CPUFREQ_GOV_START); 4.218 - if (ret) 4.219 - goto err; 4.220 - 4.221 - return ret; 4.222 - 4.223 -err: 4.224 - cpufreq_dom_exit(); 4.225 - return ret; 4.226 -}
5.1 --- a/xen/arch/x86/acpi/pmstat.c Thu Sep 11 18:00:06 2008 +0100 5.2 +++ b/xen/arch/x86/acpi/pmstat.c Fri Sep 12 10:34:50 2008 +0100 5.3 @@ -78,7 +78,7 @@ int do_get_pm_info(struct xen_sysctl_get 5.4 tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall; 5.5 5.6 now = NOW(); 5.7 - pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc; 5.8 + pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit; 5.9 pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall; 5.10 pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns; 5.11 pxpt->prev_state_wall = now;
6.1 --- a/xen/arch/x86/acpi/power.c Thu Sep 11 18:00:06 2008 +0100 6.2 +++ b/xen/arch/x86/acpi/power.c Fri Sep 12 10:34:50 2008 +0100 6.3 @@ -133,8 +133,6 @@ static int enter_state(u32 state) 6.4 6.5 freeze_domains(); 6.6 6.7 - cpufreq_suspend(); 6.8 - 6.9 disable_nonboot_cpus(); 6.10 if ( num_online_cpus() != 1 ) 6.11 { 6.12 @@ -142,6 +140,8 @@ static int enter_state(u32 state) 6.13 goto enable_cpu; 6.14 } 6.15 6.16 + cpufreq_del_cpu(0); 6.17 + 6.18 hvm_cpu_down(); 6.19 6.20 acpi_sleep_prepare(state); 6.21 @@ -189,8 +189,8 @@ static int enter_state(u32 state) 6.22 BUG(); 6.23 6.24 enable_cpu: 6.25 + cpufreq_add_cpu(0); 6.26 enable_nonboot_cpus(); 6.27 - cpufreq_resume(); 6.28 thaw_domains(); 6.29 spin_unlock(&pm_lock); 6.30 return error;
7.1 --- a/xen/arch/x86/platform_hypercall.c Thu Sep 11 18:00:06 2008 +0100 7.2 +++ b/xen/arch/x86/platform_hypercall.c Fri Sep 12 10:34:50 2008 +0100 7.3 @@ -393,7 +393,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe 7.4 memcpy ((void *)&pxpt->status_register, 7.5 (void *)&xenpxpt->status_register, 7.6 sizeof(struct xen_pct_register)); 7.7 - pxpt->init |= XEN_PX_PCT; 7.8 } 7.9 if ( xenpxpt->flags & XEN_PX_PSS ) 7.10 { 7.11 @@ -411,7 +410,6 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe 7.12 break; 7.13 } 7.14 pxpt->state_count = xenpxpt->state_count; 7.15 - pxpt->init |= XEN_PX_PSS; 7.16 } 7.17 if ( xenpxpt->flags & XEN_PX_PSD ) 7.18 { 7.19 @@ -419,27 +417,34 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe 7.20 memcpy ((void *)&pxpt->domain_info, 7.21 (void *)&xenpxpt->domain_info, 7.22 sizeof(struct xen_psd_package)); 7.23 - pxpt->init |= XEN_PX_PSD; 7.24 } 7.25 if ( xenpxpt->flags & XEN_PX_PPC ) 7.26 { 7.27 - pxpt->ppc = xenpxpt->ppc; 7.28 - pxpt->init |= XEN_PX_PPC; 7.29 + pxpt->platform_limit = xenpxpt->platform_limit; 7.30 + 7.31 + if ( pxpt->init == XEN_PX_INIT ) 7.32 + { 7.33 + ret = cpufreq_limit_change(cpuid); 7.34 + break; 7.35 + } 7.36 } 7.37 7.38 - if ( pxpt->init == ( XEN_PX_PCT | XEN_PX_PSS | 7.39 - XEN_PX_PSD | XEN_PX_PPC ) ) 7.40 + if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | 7.41 + XEN_PX_PSD | XEN_PX_PPC ) ) 7.42 { 7.43 - pxpt->init |= XEN_PX_INIT; 7.44 + pxpt->init = XEN_PX_INIT; 7.45 cpu_count++; 7.46 - } 7.47 - if ( cpu_count == num_online_cpus() ) 7.48 - { 7.49 - if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) 7.50 + 7.51 + /* Currently we only handle Intel and AMD processor */ 7.52 + if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) 7.53 + ret = cpufreq_add_cpu(cpuid); 7.54 + else if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && 7.55 + (cpu_count == num_online_cpus()) ) 7.56 ret = powernow_cpufreq_init(); 7.57 else 7.58 - ret = acpi_cpufreq_init(); 7.59 + break; 7.60 } 7.61 + 7.62 break; 7.63 } 7.64
8.1 --- a/xen/arch/x86/smpboot.c Thu Sep 11 18:00:06 2008 +0100 8.2 +++ b/xen/arch/x86/smpboot.c Fri Sep 12 10:34:50 2008 +0100 8.3 @@ -55,6 +55,7 @@ 8.4 #include <mach_wakecpu.h> 8.5 #include <smpboot_hooks.h> 8.6 #include <xen/stop_machine.h> 8.7 +#include <acpi/cpufreq/processor_perf.h> 8.8 8.9 #define set_kernel_exec(x, y) (0) 8.10 #define setup_trampoline() (bootsym_phys(trampoline_realmode_entry)) 8.11 @@ -1232,6 +1233,8 @@ int __cpu_disable(void) 8.12 mdelay(1); 8.13 local_irq_disable(); 8.14 8.15 + cpufreq_del_cpu(cpu); 8.16 + 8.17 time_suspend(); 8.18 8.19 remove_siblinginfo(cpu); 8.20 @@ -1421,6 +1424,8 @@ int __devinit __cpu_up(unsigned int cpu) 8.21 mb(); 8.22 process_pending_timers(); 8.23 } 8.24 + 8.25 + cpufreq_add_cpu(cpu); 8.26 return 0; 8.27 } 8.28
9.1 --- a/xen/include/acpi/cpufreq/cpufreq.h Thu Sep 11 18:00:06 2008 +0100 9.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h Fri Sep 12 10:34:50 2008 +0100 9.3 @@ -19,6 +19,8 @@ 9.4 9.5 #define CPUFREQ_NAME_LEN 16 9.6 9.7 +struct cpufreq_governor; 9.8 + 9.9 struct cpufreq_cpuinfo { 9.10 unsigned int max_freq; 9.11 unsigned int min_freq; 9.12 @@ -30,16 +32,21 @@ struct cpufreq_policy { 9.13 unsigned int shared_type; /* ANY or ALL affected CPUs 9.14 should set cpufreq */ 9.15 unsigned int cpu; /* cpu nr of registered CPU */ 9.16 - struct cpufreq_cpuinfo cpuinfo; /* see above */ 9.17 + struct cpufreq_cpuinfo cpuinfo; 9.18 9.19 unsigned int min; /* in kHz */ 9.20 unsigned int max; /* in kHz */ 9.21 unsigned int cur; /* in kHz, only needed if cpufreq 9.22 * governors are used */ 9.23 + struct cpufreq_governor *governor; 9.24 + 9.25 unsigned int resume; /* flag for cpufreq 1st run 9.26 * S3 wakeup, hotplug cpu, etc */ 9.27 }; 9.28 -extern struct cpufreq_policy xen_px_policy[NR_CPUS]; 9.29 +extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS]; 9.30 + 9.31 +extern int __cpufreq_set_policy(struct cpufreq_policy *data, 9.32 + struct cpufreq_policy *policy); 9.33 9.34 #define CPUFREQ_SHARED_TYPE_NONE (0) /* None */ 9.35 #define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */ 9.36 @@ -64,12 +71,27 @@ struct cpufreq_freqs { 9.37 #define CPUFREQ_GOV_STOP 2 9.38 #define CPUFREQ_GOV_LIMITS 3 9.39 9.40 +struct cpufreq_governor { 9.41 + char name[CPUFREQ_NAME_LEN]; 9.42 + int (*governor)(struct cpufreq_policy *policy, 9.43 + unsigned int event); 9.44 +}; 9.45 + 9.46 +extern struct cpufreq_governor cpufreq_gov_dbs; 9.47 +#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs 9.48 + 9.49 /* pass a target to the cpufreq driver */ 9.50 extern int __cpufreq_driver_target(struct cpufreq_policy *policy, 9.51 unsigned int target_freq, 9.52 unsigned int relation); 9.53 extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy); 9.54 9.55 +static __inline__ int 9.56 +__cpufreq_governor(struct cpufreq_policy *policy, unsigned int event) 9.57 +{ 9.58 + return policy->governor->governor(policy, event); 9.59 +} 9.60 + 9.61 9.62 /********************************************************************* 9.63 * CPUFREQ DRIVER INTERFACE * 9.64 @@ -91,7 +113,50 @@ struct cpufreq_driver { 9.65 9.66 extern struct cpufreq_driver *cpufreq_driver; 9.67 9.68 -void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state); 9.69 +static __inline__ 9.70 +int cpufreq_register_driver(struct cpufreq_driver *driver_data) 9.71 +{ 9.72 + if (!driver_data || 9.73 + !driver_data->init || 9.74 + !driver_data->exit || 9.75 + !driver_data->verify || 9.76 + !driver_data->target) 9.77 + return -EINVAL; 9.78 + 9.79 + if (cpufreq_driver) 9.80 + return -EBUSY; 9.81 + 9.82 + cpufreq_driver = driver_data; 9.83 + return 0; 9.84 +} 9.85 + 9.86 +static __inline__ 9.87 +int cpufreq_unregister_driver(struct cpufreq_driver *driver) 9.88 +{ 9.89 + if (!cpufreq_driver || (driver != cpufreq_driver)) 9.90 + return -EINVAL; 9.91 + 9.92 + cpufreq_driver = NULL; 9.93 + return 0; 9.94 +} 9.95 + 9.96 +static __inline__ 9.97 +void cpufreq_verify_within_limits(struct cpufreq_policy *policy, 9.98 + unsigned int min, unsigned int max) 9.99 +{ 9.100 + if (policy->min < min) 9.101 + policy->min = min; 9.102 + if (policy->max < min) 9.103 + policy->max = min; 9.104 + if (policy->min > max) 9.105 + policy->min = max; 9.106 + if (policy->max > max) 9.107 + policy->max = max; 9.108 + if (policy->min > policy->max) 9.109 + policy->min = policy->max; 9.110 + return; 9.111 +} 9.112 + 9.113 9.114 /********************************************************************* 9.115 * FREQUENCY TABLE HELPERS * 9.116 @@ -109,6 +174,9 @@ struct cpufreq_frequency_table { 9.117 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, 9.118 struct cpufreq_frequency_table *table); 9.119 9.120 +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, 9.121 + struct cpufreq_frequency_table *table); 9.122 + 9.123 int cpufreq_frequency_table_target(struct cpufreq_policy *policy, 9.124 struct cpufreq_frequency_table *table, 9.125 unsigned int target_freq,
10.1 --- a/xen/include/acpi/cpufreq/processor_perf.h Thu Sep 11 18:00:06 2008 +0100 10.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 12 10:34:50 2008 +0100 10.3 @@ -7,26 +7,23 @@ 10.4 #define XEN_PX_INIT 0x80000000 10.5 10.6 int get_cpu_id(u8); 10.7 -int acpi_cpufreq_init(void); 10.8 int powernow_cpufreq_init(void); 10.9 10.10 void px_statistic_update(cpumask_t, uint8_t, uint8_t); 10.11 -int px_statistic_init(int); 10.12 -void px_statistic_reset(int); 10.13 -void px_statistic_suspend(void); 10.14 -void px_statistic_resume(void); 10.15 +int px_statistic_init(unsigned int); 10.16 +void px_statistic_exit(unsigned int); 10.17 +void px_statistic_reset(unsigned int); 10.18 10.19 -void cpufreq_dom_exit(void); 10.20 -int cpufreq_dom_init(void); 10.21 -int cpufreq_dom_dbs(unsigned int); 10.22 -void cpufreq_suspend(void); 10.23 -int cpufreq_resume(void); 10.24 +int cpufreq_limit_change(unsigned int); 10.25 + 10.26 +int cpufreq_add_cpu(unsigned int); 10.27 +int cpufreq_del_cpu(unsigned int); 10.28 10.29 uint64_t get_cpu_idle_time(unsigned int); 10.30 10.31 struct processor_performance { 10.32 uint32_t state; 10.33 - uint32_t ppc; 10.34 + uint32_t platform_limit; 10.35 struct xen_pct_register control_register; 10.36 struct xen_pct_register status_register; 10.37 uint32_t state_count;
11.1 --- a/xen/include/public/platform.h Thu Sep 11 18:00:06 2008 +0100 11.2 +++ b/xen/include/public/platform.h Fri Sep 12 10:34:50 2008 +0100 11.3 @@ -289,7 +289,7 @@ struct xen_psd_package { 11.4 11.5 struct xen_processor_performance { 11.6 uint32_t flags; /* flag for Px sub info type */ 11.7 - uint32_t ppc; /* Platform limitation on freq usage */ 11.8 + uint32_t platform_limit; /* Platform limitation on freq usage */ 11.9 struct xen_pct_register control_register; 11.10 struct xen_pct_register status_register; 11.11 uint32_t state_count; /* total available performance states */