ia64/xen-unstable

view xen/arch/x86/acpi/cpufreq/cpufreq.c @ 18721:c3362551a424

Cpufreq statistic update for SW_ANY coordination

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Oct 27 10:06:58 2008 +0000 (2008-10-27)
parents 19b0a4f91712
children 303b1014f91e
line source
1 /*
2 * cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
8 *
9 * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
10 * porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
11 *
12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or (at
17 * your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27 *
28 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29 */
31 #include <xen/types.h>
32 #include <xen/errno.h>
33 #include <xen/delay.h>
34 #include <xen/cpumask.h>
35 #include <xen/sched.h>
36 #include <xen/timer.h>
37 #include <xen/xmalloc.h>
38 #include <asm/bug.h>
39 #include <asm/msr.h>
40 #include <asm/io.h>
41 #include <asm/config.h>
42 #include <asm/processor.h>
43 #include <asm/percpu.h>
44 #include <asm/cpufeature.h>
45 #include <acpi/acpi.h>
46 #include <acpi/cpufreq/cpufreq.h>
48 enum {
49 UNDEFINED_CAPABLE = 0,
50 SYSTEM_INTEL_MSR_CAPABLE,
51 SYSTEM_IO_CAPABLE,
52 };
54 #define INTEL_MSR_RANGE (0xffff)
55 #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
57 static struct acpi_cpufreq_data *drv_data[NR_CPUS];
59 static struct cpufreq_driver acpi_cpufreq_driver;
61 static int check_est_cpu(unsigned int cpuid)
62 {
63 struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
65 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
66 !cpu_has(cpu, X86_FEATURE_EST))
67 return 0;
69 return 1;
70 }
72 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
73 {
74 struct processor_performance *perf;
75 int i;
77 perf = data->acpi_data;
79 for (i=0; i<perf->state_count; i++) {
80 if (value == perf->states[i].status)
81 return data->freq_table[i].frequency;
82 }
83 return 0;
84 }
86 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
87 {
88 int i;
89 struct processor_performance *perf;
91 msr &= INTEL_MSR_RANGE;
92 perf = data->acpi_data;
94 for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
95 if (msr == perf->states[data->freq_table[i].index].status)
96 return data->freq_table[i].frequency;
97 }
98 return data->freq_table[0].frequency;
99 }
101 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
102 {
103 switch (data->cpu_feature) {
104 case SYSTEM_INTEL_MSR_CAPABLE:
105 return extract_msr(val, data);
106 case SYSTEM_IO_CAPABLE:
107 return extract_io(val, data);
108 default:
109 return 0;
110 }
111 }
113 struct msr_addr {
114 u32 reg;
115 };
117 struct io_addr {
118 u16 port;
119 u8 bit_width;
120 };
122 typedef union {
123 struct msr_addr msr;
124 struct io_addr io;
125 } drv_addr_union;
127 struct drv_cmd {
128 unsigned int type;
129 cpumask_t mask;
130 drv_addr_union addr;
131 u32 val;
132 };
134 static void do_drv_read(struct drv_cmd *cmd)
135 {
136 u32 h;
138 switch (cmd->type) {
139 case SYSTEM_INTEL_MSR_CAPABLE:
140 rdmsr(cmd->addr.msr.reg, cmd->val, h);
141 break;
142 case SYSTEM_IO_CAPABLE:
143 acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
144 &cmd->val, (u32)cmd->addr.io.bit_width);
145 break;
146 default:
147 break;
148 }
149 }
151 static void do_drv_write(void *drvcmd)
152 {
153 struct drv_cmd *cmd;
154 u32 lo, hi;
156 cmd = (struct drv_cmd *)drvcmd;
158 switch (cmd->type) {
159 case SYSTEM_INTEL_MSR_CAPABLE:
160 rdmsr(cmd->addr.msr.reg, lo, hi);
161 lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
162 wrmsr(cmd->addr.msr.reg, lo, hi);
163 break;
164 case SYSTEM_IO_CAPABLE:
165 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
166 cmd->val, (u32)cmd->addr.io.bit_width);
167 break;
168 default:
169 break;
170 }
171 }
173 static void drv_read(struct drv_cmd *cmd)
174 {
175 cmd->val = 0;
177 do_drv_read(cmd);
178 }
180 static void drv_write(struct drv_cmd *cmd)
181 {
182 on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
183 }
185 static u32 get_cur_val(cpumask_t mask)
186 {
187 struct processor_performance *perf;
188 struct drv_cmd cmd;
190 if (unlikely(cpus_empty(mask)))
191 return 0;
193 switch (drv_data[first_cpu(mask)]->cpu_feature) {
194 case SYSTEM_INTEL_MSR_CAPABLE:
195 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
196 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
197 break;
198 case SYSTEM_IO_CAPABLE:
199 cmd.type = SYSTEM_IO_CAPABLE;
200 perf = drv_data[first_cpu(mask)]->acpi_data;
201 cmd.addr.io.port = perf->control_register.address;
202 cmd.addr.io.bit_width = perf->control_register.bit_width;
203 break;
204 default:
205 return 0;
206 }
208 cmd.mask = mask;
210 drv_read(&cmd);
211 return cmd.val;
212 }
214 /*
215 * Return the measured active (C0) frequency on this CPU since last call
216 * to this function.
217 * Input: cpu number
218 * Return: Average CPU frequency in terms of max frequency (zero on error)
219 *
220 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
221 * over a period of time, while CPU is in C0 state.
222 * IA32_MPERF counts at the rate of max advertised frequency
223 * IA32_APERF counts at the rate of actual CPU frequency
224 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
225 * no meaning should be associated with absolute values of these MSRs.
226 */
227 static void __get_measured_perf(void *perf_percent)
228 {
229 unsigned int *ratio = perf_percent;
230 union {
231 struct {
232 uint32_t lo;
233 uint32_t hi;
234 } split;
235 uint64_t whole;
236 } aperf_cur, mperf_cur;
238 rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
239 rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
241 wrmsr(MSR_IA32_APERF, 0,0);
242 wrmsr(MSR_IA32_MPERF, 0,0);
244 if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
245 int shift_count = 7;
246 aperf_cur.whole >>= shift_count;
247 mperf_cur.whole >>= shift_count;
248 }
250 if (aperf_cur.whole && mperf_cur.whole)
251 *ratio = (aperf_cur.whole * 100) / mperf_cur.whole;
252 else
253 *ratio = 0;
254 }
256 static unsigned int get_measured_perf(unsigned int cpu)
257 {
258 unsigned int retval, perf_percent;
259 cpumask_t cpumask;
261 if (!cpu_online(cpu))
262 return 0;
264 cpumask = cpumask_of_cpu(cpu);
265 on_selected_cpus(cpumask, __get_measured_perf, (void *)&perf_percent,0,1);
267 retval = drv_data[cpu]->max_freq * perf_percent / 100;
268 return retval;
269 }
271 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
272 {
273 struct acpi_cpufreq_data *data = drv_data[cpu];
274 unsigned int freq;
276 if (unlikely(data == NULL ||
277 data->acpi_data == NULL || data->freq_table == NULL)) {
278 return 0;
279 }
281 freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
282 return freq;
283 }
285 static unsigned int check_freqs(cpumask_t mask, unsigned int freq,
286 struct acpi_cpufreq_data *data)
287 {
288 unsigned int cur_freq;
289 unsigned int i;
291 for (i=0; i<100; i++) {
292 cur_freq = extract_freq(get_cur_val(mask), data);
293 if (cur_freq == freq)
294 return 1;
295 udelay(10);
296 }
297 return 0;
298 }
300 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
301 unsigned int target_freq, unsigned int relation)
302 {
303 struct acpi_cpufreq_data *data = drv_data[policy->cpu];
304 struct processor_performance *perf;
305 struct cpufreq_freqs freqs;
306 cpumask_t online_policy_cpus;
307 struct drv_cmd cmd;
308 unsigned int next_state = 0; /* Index into freq_table */
309 unsigned int next_perf_state = 0; /* Index into perf table */
310 unsigned int j;
311 int result = 0;
313 if (unlikely(data == NULL ||
314 data->acpi_data == NULL || data->freq_table == NULL)) {
315 return -ENODEV;
316 }
318 perf = data->acpi_data;
319 result = cpufreq_frequency_table_target(policy,
320 data->freq_table,
321 target_freq,
322 relation, &next_state);
323 if (unlikely(result))
324 return -ENODEV;
326 cpus_and(online_policy_cpus, cpu_online_map, policy->cpus);
328 next_perf_state = data->freq_table[next_state].index;
329 if (perf->state == next_perf_state) {
330 if (unlikely(policy->resume)) {
331 printk(KERN_INFO "Called after resume, resetting to P%d\n",
332 next_perf_state);
333 policy->resume = 0;
334 }
335 else {
336 printk(KERN_DEBUG "Already at target state (P%d)\n",
337 next_perf_state);
338 return 0;
339 }
340 }
342 switch (data->cpu_feature) {
343 case SYSTEM_INTEL_MSR_CAPABLE:
344 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
345 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
346 cmd.val = (u32) perf->states[next_perf_state].control;
347 break;
348 case SYSTEM_IO_CAPABLE:
349 cmd.type = SYSTEM_IO_CAPABLE;
350 cmd.addr.io.port = perf->control_register.address;
351 cmd.addr.io.bit_width = perf->control_register.bit_width;
352 cmd.val = (u32) perf->states[next_perf_state].control;
353 break;
354 default:
355 return -ENODEV;
356 }
358 cpus_clear(cmd.mask);
360 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
361 cmd.mask = online_policy_cpus;
362 else
363 cpu_set(policy->cpu, cmd.mask);
365 freqs.old = perf->states[perf->state].core_frequency * 1000;
366 freqs.new = data->freq_table[next_state].frequency;
368 drv_write(&cmd);
370 if (!check_freqs(cmd.mask, freqs.new, data))
371 return -EAGAIN;
373 for_each_cpu_mask(j, online_policy_cpus)
374 cpufreq_statistic_update(j, perf->state, next_perf_state);
376 perf->state = next_perf_state;
377 policy->cur = freqs.new;
379 return result;
380 }
382 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
383 {
384 struct acpi_cpufreq_data *data;
385 struct processor_performance *perf;
387 if (!policy || !(data = drv_data[policy->cpu]) ||
388 !processor_pminfo[policy->cpu])
389 return -EINVAL;
391 perf = &processor_pminfo[policy->cpu]->perf;
393 cpufreq_verify_within_limits(policy, 0,
394 perf->states[perf->platform_limit].core_frequency * 1000);
396 return cpufreq_frequency_table_verify(policy, data->freq_table);
397 }
399 static unsigned long
400 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
401 {
402 struct processor_performance *perf = data->acpi_data;
404 if (cpu_khz) {
405 /* search the closest match to cpu_khz */
406 unsigned int i;
407 unsigned long freq;
408 unsigned long freqn = perf->states[0].core_frequency * 1000;
410 for (i=0; i<(perf->state_count-1); i++) {
411 freq = freqn;
412 freqn = perf->states[i+1].core_frequency * 1000;
413 if ((2 * cpu_khz) > (freqn + freq)) {
414 perf->state = i;
415 return freq;
416 }
417 }
418 perf->state = perf->state_count-1;
419 return freqn;
420 } else {
421 /* assume CPU is at P0... */
422 perf->state = 0;
423 return perf->states[0].core_frequency * 1000;
424 }
425 }
427 static int
428 acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
429 {
430 unsigned int i;
431 unsigned int valid_states = 0;
432 unsigned int cpu = policy->cpu;
433 struct acpi_cpufreq_data *data;
434 unsigned int result = 0;
435 struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
436 struct processor_performance *perf;
438 data = xmalloc(struct acpi_cpufreq_data);
439 if (!data)
440 return -ENOMEM;
441 memset(data, 0, sizeof(struct acpi_cpufreq_data));
443 drv_data[cpu] = data;
445 data->acpi_data = &processor_pminfo[cpu]->perf;
447 perf = data->acpi_data;
448 policy->shared_type = perf->shared_type;
450 /* capability check */
451 if (perf->state_count <= 1) {
452 printk("No P-States\n");
453 result = -ENODEV;
454 goto err_unreg;
455 }
457 if (perf->control_register.space_id != perf->status_register.space_id) {
458 result = -ENODEV;
459 goto err_unreg;
460 }
462 switch (perf->control_register.space_id) {
463 case ACPI_ADR_SPACE_SYSTEM_IO:
464 printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
465 "SYSTEM IO addr space\n");
466 data->cpu_feature = SYSTEM_IO_CAPABLE;
467 break;
468 case ACPI_ADR_SPACE_FIXED_HARDWARE:
469 printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
470 "HARDWARE addr space\n");
471 if (!check_est_cpu(cpu)) {
472 result = -ENODEV;
473 goto err_unreg;
474 }
475 data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
476 break;
477 default:
478 result = -ENODEV;
479 goto err_unreg;
480 }
482 data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
483 (perf->state_count+1));
484 if (!data->freq_table) {
485 result = -ENOMEM;
486 goto err_unreg;
487 }
489 /* detect transition latency */
490 policy->cpuinfo.transition_latency = 0;
491 for (i=0; i<perf->state_count; i++) {
492 if ((perf->states[i].transition_latency * 1000) >
493 policy->cpuinfo.transition_latency)
494 policy->cpuinfo.transition_latency =
495 perf->states[i].transition_latency * 1000;
496 }
497 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
499 data->max_freq = perf->states[0].core_frequency * 1000;
500 /* table init */
501 for (i=0; i<perf->state_count; i++) {
502 if (i>0 && perf->states[i].core_frequency >=
503 data->freq_table[valid_states-1].frequency / 1000)
504 continue;
506 data->freq_table[valid_states].index = i;
507 data->freq_table[valid_states].frequency =
508 perf->states[i].core_frequency * 1000;
509 valid_states++;
510 }
511 data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
512 perf->state = 0;
514 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
515 if (result)
516 goto err_freqfree;
518 switch (perf->control_register.space_id) {
519 case ACPI_ADR_SPACE_SYSTEM_IO:
520 /* Current speed is unknown and not detectable by IO port */
521 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
522 break;
523 case ACPI_ADR_SPACE_FIXED_HARDWARE:
524 acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
525 policy->cur = get_cur_freq_on_cpu(cpu);
526 break;
527 default:
528 break;
529 }
531 /* Check for APERF/MPERF support in hardware */
532 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
533 unsigned int ecx;
534 ecx = cpuid_ecx(6);
535 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
536 acpi_cpufreq_driver.getavg = get_measured_perf;
537 }
539 /*
540 * the first call to ->target() should result in us actually
541 * writing something to the appropriate registers.
542 */
543 policy->resume = 1;
545 return result;
547 err_freqfree:
548 xfree(data->freq_table);
549 err_unreg:
550 xfree(data);
551 drv_data[cpu] = NULL;
553 return result;
554 }
556 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
557 {
558 struct acpi_cpufreq_data *data = drv_data[policy->cpu];
560 if (data) {
561 drv_data[policy->cpu] = NULL;
562 xfree(data->freq_table);
563 xfree(data);
564 }
566 return 0;
567 }
569 static struct cpufreq_driver acpi_cpufreq_driver = {
570 .verify = acpi_cpufreq_verify,
571 .target = acpi_cpufreq_target,
572 .init = acpi_cpufreq_cpu_init,
573 .exit = acpi_cpufreq_cpu_exit,
574 };
576 static int __init cpufreq_driver_init(void)
577 {
578 int ret = 0;
580 if ((cpufreq_controller == FREQCTL_xen) &&
581 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
582 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
584 return ret;
585 }
586 __initcall(cpufreq_driver_init);
588 int cpufreq_cpu_init(unsigned int cpuid)
589 {
590 static int cpu_count=0;
591 int ret;
593 cpu_count++;
595 /* Currently we only handle Intel and AMD processor */
596 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
597 ret = cpufreq_add_cpu(cpuid);
598 else if ( (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
599 (cpu_count == num_online_cpus()) )
600 ret = powernow_cpufreq_init();
601 else
602 ret = -EFAULT;
603 return ret;
604 }