ia64/xen-unstable

view xen/common/sched_bvt.c @ 1589:9eda3ea2b4a0

bitkeeper revision 1.1019 (40de8639yjRdZbQS_JrcVsHsaPIwZw)

smp.h, irq.h, config.h, sched_bvt.c, setup.c, pci-pc.c:
Fix compile errors for latest GCC (3.4.0).
author kaf24@scramble.cl.cam.ac.uk
date Sun Jun 27 08:32:57 2004 +0000 (2004-06-27)
parents da46c0d041f1
children 083178f6cdfa
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
4 * (C) 2002-2003 University of Cambridge
5 * (C) 2004 - Mark Williamson - Intel Research Cambridge
6 ****************************************************************************
7 *
8 * File: common/schedule.c
9 * Author: Rolf Neugebauer & Keir Fraser
10 * Updated for generic API by Mark Williamson
11 *
12 * Description: CPU scheduling
13 * implements A Borrowed Virtual Time scheduler.
14 * (see Duda & Cheriton SOSP'99)
15 */
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/delay.h>
22 #include <xen/event.h>
23 #include <xen/time.h>
24 #include <xen/ac_timer.h>
25 #include <xen/perfc.h>
26 #include <xen/sched-if.h>
27 #include <xen/slab.h>
29 /* all per-domain BVT-specific scheduling info is stored here */
30 struct bvt_dom_info
31 {
32 unsigned long mcu_advance; /* inverse of weight */
33 u32 avt; /* actual virtual time */
34 u32 evt; /* effective virtual time */
35 int warpback; /* warp? */
36 long warp; /* virtual time warp */
37 long warpl; /* warp limit */
38 long warpu; /* unwarp time requirement */
39 s_time_t warped; /* time it ran warped last time */
40 s_time_t uwarped; /* time it ran unwarped last time */
41 };
43 struct bvt_cpu_info
44 {
45 unsigned long svt; /* XXX check this is unsigned long! */
46 };
49 #define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv)
50 #define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv)
51 #define CPU_SVT(cpu) (CPU_INFO(cpu)->svt)
53 #define MCU (s32)MICROSECS(100) /* Minimum unit */
54 #define MCU_ADVANCE 10 /* default weight */
55 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
56 static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */
58 /* SLAB cache for struct bvt_dom_info objects */
59 static kmem_cache_t *dom_info_cache;
61 /*
62 * Calculate the effective virtual time for a domain. Take into account
63 * warping limits
64 */
65 static void __calc_evt(struct bvt_dom_info *inf)
66 {
67 s_time_t now = NOW();
69 if ( inf->warpback )
70 {
71 if ( ((now - inf->warped) < inf->warpl) &&
72 ((now - inf->uwarped) > inf->warpu) )
73 {
74 /* allowed to warp */
75 inf->evt = inf->avt - inf->warp;
76 }
77 else
78 {
79 /* warped for too long -> unwarp */
80 inf->evt = inf->avt;
81 inf->uwarped = now;
82 inf->warpback = 0;
83 }
84 }
85 else
86 {
87 inf->evt = inf->avt;
88 }
89 }
91 /**
92 * bvt_alloc_task - allocate BVT private structures for a task
93 * @p: task to allocate private structures for
94 *
95 * Returns non-zero on failure.
96 */
97 int bvt_alloc_task(struct domain *p)
98 {
99 p->sched_priv = kmem_cache_alloc(dom_info_cache);
100 if ( p->sched_priv == NULL )
101 return -1;
103 return 0;
104 }
106 /*
107 * Add and remove a domain
108 */
109 void bvt_add_task(struct domain *p)
110 {
111 struct bvt_dom_info *inf = BVT_INFO(p);
113 ASSERT(inf != NULL);
114 ASSERT(p != NULL);
116 inf->mcu_advance = MCU_ADVANCE;
118 if ( p->domain == IDLE_DOMAIN_ID )
119 {
120 inf->avt = inf->evt = ~0U;
121 }
122 else
123 {
124 /* Set avt and evt to system virtual time. */
125 inf->avt = CPU_SVT(p->processor);
126 inf->evt = CPU_SVT(p->processor);
127 /* Set some default values here. */
128 inf->warpback = 0;
129 inf->warp = 0;
130 inf->warpl = 0;
131 inf->warpu = 0;
132 }
134 return;
135 }
137 /**
138 * bvt_free_task - free BVT private structures for a task
139 * @p: task
140 */
141 void bvt_free_task(struct domain *p)
142 {
143 ASSERT( p->sched_priv != NULL );
144 kmem_cache_free( dom_info_cache, p->sched_priv );
145 }
148 void bvt_wake_up(struct domain *p)
149 {
150 struct bvt_dom_info *inf = BVT_INFO(p);
152 ASSERT(inf != NULL);
155 /* set the BVT parameters */
156 if (inf->avt < CPU_SVT(p->processor))
157 inf->avt = CPU_SVT(p->processor);
159 /* deal with warping here */
160 inf->warpback = 1;
161 inf->warped = NOW();
162 __calc_evt(inf);
163 __add_to_runqueue_head(p);
164 }
166 /*
167 * Block the currently-executing domain until a pertinent event occurs.
168 */
169 static void bvt_do_block(struct domain *p)
170 {
171 BVT_INFO(p)->warpback = 0;
172 }
174 /* Control the scheduler. */
175 int bvt_ctl(struct sched_ctl_cmd *cmd)
176 {
177 struct bvt_ctl *params = &cmd->u.bvt;
179 if ( cmd->direction == SCHED_INFO_PUT )
180 {
181 ctx_allow = params->ctx_allow;
182 }
183 else
184 {
185 params->ctx_allow = ctx_allow;
186 }
188 return 0;
189 }
191 /* Adjust scheduling parameter for a given domain. */
192 int bvt_adjdom(struct domain *p,
193 struct sched_adjdom_cmd *cmd)
194 {
195 struct bvt_adjdom *params = &cmd->u.bvt;
196 unsigned long flags;
198 if ( cmd->direction == SCHED_INFO_PUT )
199 {
200 unsigned long mcu_adv = params->mcu_adv,
201 warp = params->warp,
202 warpl = params->warpl,
203 warpu = params->warpu;
205 struct bvt_dom_info *inf = BVT_INFO(p);
207 DPRINTK("Get domain %u bvt mcu_adv=%ld, warp=%ld, "
208 "warpl=%ld, warpu=%ld\n",
209 p->domain, inf->mcu_advance, inf->warp,
210 inf->warpl, inf->warpu );
212 /* Sanity -- this can avoid divide-by-zero. */
213 if ( mcu_adv == 0 )
214 return -EINVAL;
216 spin_lock_irqsave(&schedule_lock[p->processor], flags);
217 inf->mcu_advance = mcu_adv;
218 inf->warp = warp;
219 inf->warpl = warpl;
220 inf->warpu = warpu;
222 DPRINTK("Set domain %u bvt mcu_adv=%ld, warp=%ld, "
223 "warpl=%ld, warpu=%ld\n",
224 p->domain, inf->mcu_advance, inf->warp,
225 inf->warpl, inf->warpu );
227 spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
228 }
229 else if ( cmd->direction == SCHED_INFO_GET )
230 {
231 struct bvt_dom_info *inf = BVT_INFO(p);
233 spin_lock_irqsave(&schedule_lock[p->processor], flags);
234 params->mcu_adv = inf->mcu_advance;
235 params->warp = inf->warp;
236 params->warpl = inf->warpl;
237 params->warpu = inf->warpu;
238 spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
239 }
241 return 0;
242 }
245 /*
246 * The main function
247 * - deschedule the current domain.
248 * - pick a new domain.
249 * i.e., the domain with lowest EVT.
250 * The runqueue should be ordered by EVT so that is easy.
251 */
252 static task_slice_t bvt_do_schedule(s_time_t now)
253 {
254 struct domain *prev = current, *next = NULL, *next_prime, *p;
255 struct list_head *tmp;
256 int cpu = prev->processor;
257 s32 r_time; /* time for new dom to run */
258 s32 ranfor; /* assume we never run longer than 2.1s! */
259 s32 mcus;
260 u32 next_evt, next_prime_evt, min_avt;
261 struct bvt_dom_info *prev_inf = BVT_INFO(prev),
262 *p_inf = NULL,
263 *next_inf = NULL,
264 *next_prime_inf = NULL;
265 task_slice_t ret;
267 ASSERT(prev->sched_priv != NULL);
268 ASSERT(prev_inf != NULL);
270 if ( likely(!is_idle_task(prev)) )
271 {
272 ranfor = (s32)(now - prev->lastschd);
273 /* Calculate mcu and update avt. */
274 mcus = (ranfor + MCU - 1) / MCU;
275 prev_inf->avt += mcus * prev_inf->mcu_advance;
277 __calc_evt(prev_inf);
279 __del_from_runqueue(prev);
281 if ( domain_runnable(prev) )
282 __add_to_runqueue_tail(prev);
283 }
285 /* We should at least have the idle task */
286 ASSERT(!list_empty(&schedule_data[cpu].runqueue));
288 /*
289 * scan through the run queue and pick the task with the lowest evt
290 * *and* the task the second lowest evt.
291 * this code is O(n) but we expect n to be small.
292 */
293 next = schedule_data[cpu].idle;
294 next_prime = NULL;
296 next_evt = ~0U;
297 next_prime_evt = ~0U;
298 min_avt = ~0U;
300 list_for_each ( tmp, &schedule_data[cpu].runqueue )
301 {
302 p = list_entry(tmp, struct domain, run_list);
303 p_inf = BVT_INFO(p);
305 if ( p_inf->evt < next_evt )
306 {
307 next_prime = next;
308 next_prime_evt = next_evt;
309 next = p;
310 next_evt = p_inf->evt;
311 }
312 else if ( next_prime_evt == ~0U )
313 {
314 next_prime_evt = p_inf->evt;
315 next_prime = p;
316 }
317 else if ( p_inf->evt < next_prime_evt )
318 {
319 next_prime_evt = p_inf->evt;
320 next_prime = p;
321 }
323 /* Determine system virtual time. */
324 if ( p_inf->avt < min_avt )
325 min_avt = p_inf->avt;
326 }
328 /* Update system virtual time. */
329 if ( min_avt != ~0U )
330 CPU_SVT(cpu) = min_avt;
332 /* check for virtual time overrun on this cpu */
333 if ( CPU_SVT(cpu) >= 0xf0000000 )
334 {
335 u_long t_flags;
336 write_lock_irqsave(&tasklist_lock, t_flags);
337 for_each_domain ( p )
338 {
339 if ( p->processor == cpu )
340 {
341 p_inf = BVT_INFO(p);
342 p_inf->evt -= 0xe0000000;
343 p_inf->avt -= 0xe0000000;
344 }
345 }
346 write_unlock_irqrestore(&tasklist_lock, t_flags);
347 CPU_SVT(cpu) -= 0xe0000000;
348 }
350 /* work out time for next run through scheduler */
351 if ( is_idle_task(next) )
352 {
353 r_time = ctx_allow;
354 goto sched_done;
355 }
357 if ( (next_prime == NULL) || is_idle_task(next_prime) )
358 {
359 /* We have only one runnable task besides the idle task. */
360 r_time = 10 * ctx_allow; /* RN: random constant */
361 goto sched_done;
362 }
364 next_prime_inf = BVT_INFO(next_prime);
365 next_inf = BVT_INFO(next);
367 /*
368 * If we are here then we have two runnable tasks.
369 * Work out how long 'next' can run till its evt is greater than
370 * 'next_prime's evt. Take context switch allowance into account.
371 */
372 ASSERT(next_prime_inf->evt >= next_inf->evt);
374 r_time = ((next_prime_inf->evt - next_inf->evt)/next_inf->mcu_advance)
375 + ctx_allow;
377 ASSERT(r_time >= ctx_allow);
379 sched_done:
380 next->min_slice = ctx_allow;
381 ret.task = next;
382 ret.time = r_time;
384 return ret;
385 }
388 static void bvt_dump_runq_el(struct domain *p)
389 {
390 struct bvt_dom_info *inf = BVT_INFO(p);
392 printk("mcua=0x%04lX ev=0x%08X av=0x%08X ",
393 inf->mcu_advance, inf->evt, inf->avt);
394 }
396 static void bvt_dump_settings(void)
397 {
398 printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns ", (u32)MCU, (s32)ctx_allow );
399 }
401 static void bvt_dump_cpu_state(int i)
402 {
403 printk("svt=0x%08lX ", CPU_SVT(i));
404 }
407 /* Initialise the data structures. */
408 int bvt_init_scheduler()
409 {
410 int i;
412 for ( i = 0; i < NR_CPUS; i++ )
413 {
414 schedule_data[i].sched_priv = kmalloc(sizeof(struct bvt_cpu_info));
415 if ( schedule_data[i].sched_priv == NULL )
416 {
417 printk("Failed to allocate BVT scheduler per-CPU memory!\n");
418 return -1;
419 }
421 CPU_SVT(i) = 0; /* XXX do I really need to do this? */
422 }
424 dom_info_cache = kmem_cache_create("BVT dom info",
425 sizeof(struct bvt_dom_info),
426 0, 0, NULL, NULL);
428 if ( dom_info_cache == NULL )
429 {
430 printk("BVT: Failed to allocate domain info SLAB cache");
431 return -1;
432 }
434 return 0;
435 }
437 static void bvt_pause(struct domain *p)
438 {
439 if( __task_on_runqueue(p) )
440 __del_from_runqueue(p);
441 }
443 struct scheduler sched_bvt_def = {
444 .name = "Borrowed Virtual Time",
445 .opt_name = "bvt",
446 .sched_id = SCHED_BVT,
448 .init_scheduler = bvt_init_scheduler,
449 .alloc_task = bvt_alloc_task,
450 .add_task = bvt_add_task,
451 .free_task = bvt_free_task,
452 .wake_up = bvt_wake_up,
453 .do_block = bvt_do_block,
454 .do_schedule = bvt_do_schedule,
455 .control = bvt_ctl,
456 .adjdom = bvt_adjdom,
457 .dump_settings = bvt_dump_settings,
458 .dump_cpu_state = bvt_dump_cpu_state,
459 .dump_runq_el = bvt_dump_runq_el,
460 .pause = bvt_pause,
461 };