ia64/xen-unstable

view xen/drivers/cpufreq/cpufreq_ondemand.c @ 19149:efef232bbbdb

Consolidate cpufreq cmdline handling

... by moving as much of the option processing into cpufreq code as is
possible, by folding the cpufreq_governor option into the cpufreq one
(the governor name, if any, must be specified as the first thing after
the separator following "cpufreq=xen"), and by allowing each
governor to have an option processing routine.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Feb 03 18:12:51 2009 +0000 (2009-02-03)
parents 4035ea96ae2f
children 93d2193450c4
line source
1 /*
2 * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
3 *
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
7 * Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
8 * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
15 #include <xen/types.h>
16 #include <xen/percpu.h>
17 #include <xen/cpumask.h>
18 #include <xen/types.h>
19 #include <xen/sched.h>
20 #include <xen/timer.h>
21 #include <asm/config.h>
22 #include <acpi/cpufreq/cpufreq.h>
24 #define DEF_FREQUENCY_UP_THRESHOLD (80)
25 #define MIN_FREQUENCY_UP_THRESHOLD (11)
26 #define MAX_FREQUENCY_UP_THRESHOLD (100)
28 #define MIN_DBS_INTERVAL (MICROSECS(100))
29 #define MIN_SAMPLING_RATE_RATIO (2)
30 #define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10)
31 #define MIN_STAT_SAMPLING_RATE \
32 (MIN_SAMPLING_MILLISECS * MILLISECS(1))
33 #define MIN_SAMPLING_RATE \
34 (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
35 #define MAX_SAMPLING_RATE (500 * def_sampling_rate)
36 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
37 #define TRANSITION_LATENCY_LIMIT (10 * 1000 )
39 static uint64_t def_sampling_rate;
40 static uint64_t usr_sampling_rate;
42 /* Sampling types */
43 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
45 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
47 static unsigned int dbs_enable; /* number of CPUs using this policy */
49 static struct dbs_tuners {
50 uint64_t sampling_rate;
51 unsigned int up_threshold;
52 unsigned int powersave_bias;
53 } dbs_tuners_ins = {
54 .sampling_rate = 0,
55 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
56 .powersave_bias = 0,
57 };
59 static struct timer dbs_timer[NR_CPUS];
61 int write_ondemand_sampling_rate(unsigned int sampling_rate)
62 {
63 if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
64 (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
65 return -EINVAL;
67 dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
68 return 0;
69 }
71 int write_ondemand_up_threshold(unsigned int up_threshold)
72 {
73 if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
74 (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
75 return -EINVAL;
77 dbs_tuners_ins.up_threshold = up_threshold;
78 return 0;
79 }
81 int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
82 uint32_t *sampling_rate_min,
83 uint32_t *sampling_rate,
84 uint32_t *up_threshold)
85 {
86 if (!sampling_rate_max || !sampling_rate_min ||
87 !sampling_rate || !up_threshold)
88 return -EINVAL;
90 *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
91 *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
92 *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
93 *up_threshold = dbs_tuners_ins.up_threshold;
95 return 0;
96 }
98 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
99 {
100 unsigned int load = 0;
101 uint64_t cur_ns, idle_ns, total_ns;
103 struct cpufreq_policy *policy;
104 unsigned int j;
106 if (!this_dbs_info->enable)
107 return;
109 policy = this_dbs_info->cur_policy;
111 if (unlikely(policy->resume)) {
112 __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
113 return;
114 }
116 cur_ns = NOW();
117 total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
118 this_dbs_info->prev_cpu_wall = NOW();
120 if (total_ns < MIN_DBS_INTERVAL)
121 return;
123 /* Get Idle Time */
124 idle_ns = UINT_MAX;
125 for_each_cpu_mask(j, policy->cpus) {
126 uint64_t total_idle_ns;
127 unsigned int tmp_idle_ns;
128 struct cpu_dbs_info_s *j_dbs_info;
130 j_dbs_info = &per_cpu(cpu_dbs_info, j);
131 total_idle_ns = get_cpu_idle_time(j);
132 tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
133 j_dbs_info->prev_cpu_idle = total_idle_ns;
135 if (tmp_idle_ns < idle_ns)
136 idle_ns = tmp_idle_ns;
137 }
139 if (likely(total_ns > idle_ns))
140 load = (100 * (total_ns - idle_ns)) / total_ns;
142 /* Check for frequency increase */
143 if (load > dbs_tuners_ins.up_threshold) {
144 /* if we are already at full speed then break out early */
145 if (policy->cur == policy->max)
146 return;
147 __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
148 return;
149 }
151 /* Check for frequency decrease */
152 /* if we cannot reduce the frequency anymore, break out early */
153 if (policy->cur == policy->min)
154 return;
156 /*
157 * The optimal frequency is the frequency that is the lowest that
158 * can support the current CPU usage without triggering the up
159 * policy. To be safe, we focus 10 points under the threshold.
160 */
161 if (load < (dbs_tuners_ins.up_threshold - 10)) {
162 unsigned int freq_next, freq_cur;
164 freq_cur = __cpufreq_driver_getavg(policy);
165 if (!freq_cur)
166 freq_cur = policy->cur;
168 freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
170 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
171 }
172 }
174 static void do_dbs_timer(void *dbs)
175 {
176 struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
178 if (!dbs_info->enable)
179 return;
181 dbs_check_cpu(dbs_info);
183 set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
184 }
186 static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
187 {
188 dbs_info->enable = 1;
190 init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer,
191 (void *)dbs_info, dbs_info->cpu);
193 set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
194 }
196 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
197 {
198 dbs_info->enable = 0;
199 stop_timer(&dbs_timer[dbs_info->cpu]);
200 }
202 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
203 {
204 unsigned int cpu = policy->cpu;
205 struct cpu_dbs_info_s *this_dbs_info;
206 unsigned int j;
208 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
210 switch (event) {
211 case CPUFREQ_GOV_START:
212 if ((!cpu_online(cpu)) || (!policy->cur))
213 return -EINVAL;
215 if (policy->cpuinfo.transition_latency >
216 (TRANSITION_LATENCY_LIMIT * 1000)) {
217 printk(KERN_WARNING "ondemand governor failed to load "
218 "due to too long transition latency\n");
219 return -EINVAL;
220 }
221 if (this_dbs_info->enable)
222 /* Already enabled */
223 break;
225 dbs_enable++;
227 for_each_cpu_mask(j, policy->cpus) {
228 struct cpu_dbs_info_s *j_dbs_info;
229 j_dbs_info = &per_cpu(cpu_dbs_info, j);
230 j_dbs_info->cur_policy = policy;
232 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
233 j_dbs_info->prev_cpu_wall = NOW();
234 }
235 this_dbs_info->cpu = cpu;
236 /*
237 * Start the timerschedule work, when this governor
238 * is used for first time
239 */
240 if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
241 def_sampling_rate = policy->cpuinfo.transition_latency *
242 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
244 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
245 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
247 if (!usr_sampling_rate)
248 dbs_tuners_ins.sampling_rate = def_sampling_rate;
249 else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
250 printk(KERN_WARNING "cpufreq/ondemand: "
251 "specified sampling rate too low, using %"PRIu64"\n",
252 MIN_SAMPLING_RATE);
253 dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
254 } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
255 printk(KERN_WARNING "cpufreq/ondemand: "
256 "specified sampling rate too high, using %"PRIu64"\n",
257 MAX_SAMPLING_RATE);
258 dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
259 } else
260 dbs_tuners_ins.sampling_rate = usr_sampling_rate;
261 }
262 dbs_timer_init(this_dbs_info);
264 break;
266 case CPUFREQ_GOV_STOP:
267 dbs_timer_exit(this_dbs_info);
268 dbs_enable--;
270 break;
272 case CPUFREQ_GOV_LIMITS:
273 if (policy->max < this_dbs_info->cur_policy->cur)
274 __cpufreq_driver_target(this_dbs_info->cur_policy,
275 policy->max, CPUFREQ_RELATION_H);
276 else if (policy->min > this_dbs_info->cur_policy->cur)
277 __cpufreq_driver_target(this_dbs_info->cur_policy,
278 policy->min, CPUFREQ_RELATION_L);
279 break;
280 }
281 return 0;
282 }
284 static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
285 {
286 if ( !strcmp(name, "rate") && val )
287 {
288 usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
289 }
290 else if ( !strcmp(name, "threshold") && val )
291 {
292 unsigned long tmp = simple_strtoul(val, NULL, 0);
294 if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
295 {
296 printk(XENLOG_WARNING "cpufreq/ondemand: "
297 "specified threshold too low, using %d\n",
298 MIN_FREQUENCY_UP_THRESHOLD);
299 tmp = MIN_FREQUENCY_UP_THRESHOLD;
300 }
301 else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
302 {
303 printk(XENLOG_WARNING "cpufreq/ondemand: "
304 "specified threshold too high, using %d\n",
305 MAX_FREQUENCY_UP_THRESHOLD);
306 tmp = MAX_FREQUENCY_UP_THRESHOLD;
307 }
308 dbs_tuners_ins.up_threshold = tmp;
309 }
310 else if ( !strcmp(name, "bias") && val )
311 {
312 unsigned long tmp = simple_strtoul(val, NULL, 0);
314 if ( tmp > 1000 )
315 {
316 printk(XENLOG_WARNING "cpufreq/ondemand: "
317 "specified bias too high, using 1000\n");
318 tmp = 1000;
319 }
320 dbs_tuners_ins.powersave_bias = tmp;
321 }
322 }
324 struct cpufreq_governor cpufreq_gov_dbs = {
325 .name = "ondemand",
326 .governor = cpufreq_governor_dbs,
327 .handle_option = cpufreq_dbs_handle_option
328 };
330 static int __init cpufreq_gov_dbs_init(void)
331 {
332 return cpufreq_register_governor(&cpufreq_gov_dbs);
333 }
334 __initcall(cpufreq_gov_dbs_init);
336 static void cpufreq_gov_dbs_exit(void)
337 {
338 cpufreq_unregister_governor(&cpufreq_gov_dbs);
339 }
340 __exitcall(cpufreq_gov_dbs_exit);