ia64/xen-unstable

view xen/common/sched_bvt.c @ 9706:3c05406f5e0a

In some cases, say for instance for some bizzare reason
the tree was checked out of CVS, which doens't neccessarily
store file permissions, mkbuildtree may not be executable.
So run them explicitly via bash.

Signed-Off-By: Horms <horms@verge.net.au>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 13 11:24:00 2006 +0100 (2006-04-13)
parents 2303fb4682e7
children 6993a0f91efc
line source
1 /****************************************************************************
2 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
3 * (C) 2002-2003 University of Cambridge
4 * (C) 2004 - Mark Williamson - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: common/schedule.c
8 * Author: Rolf Neugebauer & Keir Fraser
9 * Updated for generic API by Mark Williamson
10 *
11 * Description: CPU scheduling
12 * implements A Borrowed Virtual Time scheduler.
13 * (see Duda & Cheriton SOSP'99)
14 */
16 #include <xen/config.h>
17 #include <xen/init.h>
18 #include <xen/lib.h>
19 #include <xen/sched.h>
20 #include <xen/delay.h>
21 #include <xen/event.h>
22 #include <xen/time.h>
23 #include <xen/timer.h>
24 #include <xen/perfc.h>
25 #include <xen/sched-if.h>
26 #include <xen/softirq.h>
28 /* all per-domain BVT-specific scheduling info is stored here */
29 struct bvt_vcpu_info
30 {
31 struct list_head run_list; /* runqueue list pointers */
32 u32 avt; /* actual virtual time */
33 u32 evt; /* effective virtual time */
34 int migrated; /* migrated to a new CPU */
35 struct vcpu *vcpu;
36 struct bvt_dom_info *inf;
37 };
39 struct bvt_dom_info
40 {
41 struct domain *domain; /* domain this info belongs to */
42 u32 mcu_advance; /* inverse of weight */
43 int warpback; /* warp? */
44 int warp; /* warp set and within the warp
45 limits*/
46 s32 warp_value; /* virtual time warp */
47 s_time_t warpl; /* warp limit */
48 struct timer warp_timer; /* deals with warpl */
49 s_time_t warpu; /* unwarp time requirement */
50 struct timer unwarp_timer; /* deals with warpu */
52 struct bvt_vcpu_info vcpu_inf[MAX_VIRT_CPUS];
53 };
55 struct bvt_cpu_info
56 {
57 struct list_head runqueue;
58 unsigned long svt;
59 };
61 #define BVT_INFO(p) ((struct bvt_dom_info *)(p)->sched_priv)
62 #define EBVT_INFO(p) ((struct bvt_vcpu_info *)(p)->sched_priv)
63 #define CPU_INFO(cpu) ((struct bvt_cpu_info *)(schedule_data[cpu]).sched_priv)
64 #define RUNLIST(p) ((struct list_head *)&(EBVT_INFO(p)->run_list))
65 #define RUNQUEUE(cpu) ((struct list_head *)&(CPU_INFO(cpu)->runqueue))
66 #define CPU_SVT(cpu) (CPU_INFO(cpu)->svt)
68 #define MCU (s32)MICROSECS(100) /* Minimum unit */
69 #define MCU_ADVANCE 10 /* default weight */
70 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
71 #define CTX_MIN (s32)MICROSECS(10) /* Low limit for ctx_allow */
72 static s32 ctx_allow = (s32)MILLISECS(5); /* context switch allowance */
74 static inline void __add_to_runqueue_head(struct vcpu *d)
75 {
76 list_add(RUNLIST(d), RUNQUEUE(d->processor));
77 }
79 static inline void __add_to_runqueue_tail(struct vcpu *d)
80 {
81 list_add_tail(RUNLIST(d), RUNQUEUE(d->processor));
82 }
84 static inline void __del_from_runqueue(struct vcpu *d)
85 {
86 struct list_head *runlist = RUNLIST(d);
87 list_del(runlist);
88 runlist->next = NULL;
89 }
91 static inline int __task_on_runqueue(struct vcpu *d)
92 {
93 return (RUNLIST(d))->next != NULL;
94 }
97 /* Warp/unwarp timer functions */
98 static void warp_timer_fn(void *data)
99 {
100 struct bvt_dom_info *inf = data;
101 struct vcpu *v = inf->domain->vcpu[0];
103 vcpu_schedule_lock_irq(v);
105 inf->warp = 0;
107 /* unwarp equal to zero => stop warping */
108 if ( inf->warpu == 0 )
109 {
110 inf->warpback = 0;
111 cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
112 }
114 set_timer(&inf->unwarp_timer, NOW() + inf->warpu);
116 vcpu_schedule_unlock_irq(v);
117 }
119 static void unwarp_timer_fn(void *data)
120 {
121 struct bvt_dom_info *inf = data;
122 struct vcpu *v = inf->domain->vcpu[0];
124 vcpu_schedule_lock_irq(v);
126 if ( inf->warpback )
127 {
128 inf->warp = 1;
129 cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
130 }
132 vcpu_schedule_unlock_irq(v);
133 }
135 static inline u32 calc_avt(struct vcpu *v, s_time_t now)
136 {
137 u32 ranfor, mcus;
138 struct bvt_dom_info *inf = BVT_INFO(v->domain);
139 struct bvt_vcpu_info *einf = EBVT_INFO(v);
141 ranfor = (u32)(now - v->runstate.state_entry_time);
142 mcus = (ranfor + MCU - 1)/MCU;
144 return einf->avt + mcus * inf->mcu_advance;
145 }
147 /*
148 * Calculate the effective virtual time for a domain. Take into account
149 * warping limits
150 */
151 static inline u32 calc_evt(struct vcpu *d, u32 avt)
152 {
153 struct bvt_dom_info *inf = BVT_INFO(d->domain);
154 /* TODO The warp routines need to be rewritten GM */
156 if ( inf->warp )
157 return avt - inf->warp_value;
158 else
159 return avt;
160 }
162 /**
163 * bvt_alloc_task - allocate BVT private structures for a task
164 * @p: task to allocate private structures for
165 *
166 * Returns non-zero on failure.
167 */
168 static int bvt_alloc_task(struct vcpu *v)
169 {
170 struct domain *d = v->domain;
171 struct bvt_dom_info *inf;
173 if ( (d->sched_priv == NULL) )
174 {
175 if ( (d->sched_priv = xmalloc(struct bvt_dom_info)) == NULL )
176 return -1;
177 memset(d->sched_priv, 0, sizeof(struct bvt_dom_info));
178 }
180 inf = BVT_INFO(d);
182 v->sched_priv = &inf->vcpu_inf[v->vcpu_id];
184 inf->vcpu_inf[v->vcpu_id].inf = BVT_INFO(d);
185 inf->vcpu_inf[v->vcpu_id].vcpu = v;
187 if ( v->vcpu_id == 0 )
188 {
189 inf->mcu_advance = MCU_ADVANCE;
190 inf->domain = v->domain;
191 inf->warpback = 0;
192 /* Set some default values here. */
193 inf->warp = 0;
194 inf->warp_value = 0;
195 inf->warpl = MILLISECS(2000);
196 inf->warpu = MILLISECS(1000);
197 /* Initialise the warp timers. */
198 init_timer(&inf->warp_timer, warp_timer_fn, inf, v->processor);
199 init_timer(&inf->unwarp_timer, unwarp_timer_fn, inf, v->processor);
200 }
202 return 0;
203 }
205 /*
206 * Add and remove a domain
207 */
208 static void bvt_add_task(struct vcpu *v)
209 {
210 struct bvt_vcpu_info *einf = EBVT_INFO(v);
212 /* Allocate per-CPU context if this is the first domain to be added. */
213 if ( CPU_INFO(v->processor) == NULL )
214 {
215 schedule_data[v->processor].sched_priv = xmalloc(struct bvt_cpu_info);
216 BUG_ON(CPU_INFO(v->processor) == NULL);
217 INIT_LIST_HEAD(RUNQUEUE(v->processor));
218 CPU_SVT(v->processor) = 0;
219 }
221 if ( is_idle_vcpu(v) )
222 {
223 einf->avt = einf->evt = ~0U;
224 BUG_ON(__task_on_runqueue(v));
225 __add_to_runqueue_head(v);
226 }
227 else
228 {
229 /* Set avt and evt to system virtual time. */
230 einf->avt = CPU_SVT(v->processor);
231 einf->evt = CPU_SVT(v->processor);
232 }
233 }
235 static void bvt_wake(struct vcpu *v)
236 {
237 struct bvt_vcpu_info *einf = EBVT_INFO(v);
238 struct vcpu *curr;
239 s_time_t now, r_time;
240 int cpu = v->processor;
241 u32 curr_evt;
243 if ( unlikely(__task_on_runqueue(v)) )
244 return;
246 __add_to_runqueue_head(v);
248 now = NOW();
250 /* Set the BVT parameters. AVT should always be updated
251 if CPU migration ocurred.*/
252 if ( (einf->avt < CPU_SVT(cpu)) || einf->migrated )
253 {
254 einf->avt = CPU_SVT(cpu);
255 einf->migrated = 0;
256 }
258 /* Deal with warping here. */
259 einf->evt = calc_evt(v, einf->avt);
261 curr = schedule_data[cpu].curr;
262 curr_evt = calc_evt(curr, calc_avt(curr, now));
263 /* Calculate the time the current domain would run assuming
264 the second smallest evt is of the newly woken domain */
265 r_time = curr->runstate.state_entry_time +
266 ((einf->evt - curr_evt) / BVT_INFO(curr->domain)->mcu_advance) +
267 ctx_allow;
269 if ( is_idle_vcpu(curr) || (einf->evt <= curr_evt) )
270 cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
271 else if ( schedule_data[cpu].s_timer.expires > r_time )
272 set_timer(&schedule_data[cpu].s_timer, r_time);
273 }
276 static void bvt_sleep(struct vcpu *v)
277 {
278 if ( schedule_data[v->processor].curr == v )
279 cpu_raise_softirq(v->processor, SCHEDULE_SOFTIRQ);
280 else if ( __task_on_runqueue(v) )
281 __del_from_runqueue(v);
282 }
285 static int bvt_set_affinity(struct vcpu *v, cpumask_t *affinity)
286 {
287 if ( v == current )
288 return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
290 vcpu_pause(v);
291 v->cpu_affinity = *affinity;
292 v->processor = first_cpu(v->cpu_affinity);
293 EBVT_INFO(v)->migrated = 1;
294 vcpu_unpause(v);
296 return 0;
297 }
300 /**
301 * bvt_free_task - free BVT private structures for a task
302 * @d: task
303 */
304 static void bvt_free_task(struct domain *d)
305 {
306 struct bvt_dom_info *inf = BVT_INFO(d);
308 ASSERT(inf != NULL);
310 kill_timer(&inf->warp_timer);
311 kill_timer(&inf->unwarp_timer);
313 xfree(inf);
314 }
316 /* Control the scheduler. */
317 static int bvt_ctl(struct sched_ctl_cmd *cmd)
318 {
319 struct bvt_ctl *params = &cmd->u.bvt;
321 if ( cmd->direction == SCHED_INFO_PUT )
322 ctx_allow = params->ctx_allow;
323 else
324 {
325 if ( ctx_allow < CTX_MIN )
326 ctx_allow = CTX_MIN;
327 params->ctx_allow = ctx_allow;
328 }
330 return 0;
331 }
333 /* Adjust scheduling parameter for a given domain. */
334 static int bvt_adjdom(
335 struct domain *d, struct sched_adjdom_cmd *cmd)
336 {
337 struct bvt_adjdom *params = &cmd->u.bvt;
339 if ( cmd->direction == SCHED_INFO_PUT )
340 {
341 u32 mcu_adv = params->mcu_adv;
342 u32 warpback = params->warpback;
343 s32 warpvalue = params->warpvalue;
344 s_time_t warpl = params->warpl;
345 s_time_t warpu = params->warpu;
347 struct bvt_dom_info *inf = BVT_INFO(d);
349 /* Sanity -- this can avoid divide-by-zero. */
350 if ( (mcu_adv == 0) || (warpl < 0) || (warpu < 0) )
351 return -EINVAL;
353 inf->mcu_advance = mcu_adv;
354 inf->warpback = warpback;
355 /* The warp should be the same as warpback */
356 inf->warp = warpback;
357 inf->warp_value = warpvalue;
358 inf->warpl = MILLISECS(warpl);
359 inf->warpu = MILLISECS(warpu);
361 /* If the unwarp timer set up it needs to be removed */
362 stop_timer(&inf->unwarp_timer);
363 /* If we stop warping the warp timer needs to be removed */
364 if ( !warpback )
365 stop_timer(&inf->warp_timer);
366 }
367 else if ( cmd->direction == SCHED_INFO_GET )
368 {
369 struct bvt_dom_info *inf = BVT_INFO(d);
370 params->mcu_adv = inf->mcu_advance;
371 params->warpvalue = inf->warp_value;
372 params->warpback = inf->warpback;
373 params->warpl = inf->warpl;
374 params->warpu = inf->warpu;
375 }
377 return 0;
378 }
381 /*
382 * The main function
383 * - deschedule the current domain.
384 * - pick a new domain.
385 * i.e., the domain with lowest EVT.
386 * The runqueue should be ordered by EVT so that is easy.
387 */
388 static struct task_slice bvt_do_schedule(s_time_t now)
389 {
390 struct domain *d;
391 struct vcpu *prev = current, *next = NULL, *next_prime, *ed;
392 int cpu = prev->processor;
393 s32 r_time; /* time for new dom to run */
394 u32 next_evt, next_prime_evt, min_avt;
395 struct bvt_dom_info *prev_inf = BVT_INFO(prev->domain);
396 struct bvt_vcpu_info *prev_einf = EBVT_INFO(prev);
397 struct bvt_vcpu_info *p_einf = NULL;
398 struct bvt_vcpu_info *next_einf = NULL;
399 struct bvt_vcpu_info *next_prime_einf = NULL;
400 struct task_slice ret;
402 ASSERT(prev->sched_priv != NULL);
403 ASSERT(prev_einf != NULL);
404 ASSERT(__task_on_runqueue(prev));
406 if ( likely(!is_idle_vcpu(prev)) )
407 {
408 prev_einf->avt = calc_avt(prev, now);
409 prev_einf->evt = calc_evt(prev, prev_einf->avt);
411 if(prev_inf->warpback && prev_inf->warpl > 0)
412 stop_timer(&prev_inf->warp_timer);
414 __del_from_runqueue(prev);
416 if ( vcpu_runnable(prev) )
417 __add_to_runqueue_tail(prev);
418 }
421 /* We should at least have the idle task */
422 ASSERT(!list_empty(RUNQUEUE(cpu)));
424 /*
425 * scan through the run queue and pick the task with the lowest evt
426 * *and* the task the second lowest evt.
427 * this code is O(n) but we expect n to be small.
428 */
429 next_einf = EBVT_INFO(schedule_data[cpu].idle);
430 next_prime_einf = NULL;
432 next_evt = ~0U;
433 next_prime_evt = ~0U;
434 min_avt = ~0U;
436 list_for_each_entry ( p_einf, RUNQUEUE(cpu), run_list )
437 {
438 if ( p_einf->evt < next_evt )
439 {
440 next_prime_einf = next_einf;
441 next_prime_evt = next_evt;
442 next_einf = p_einf;
443 next_evt = p_einf->evt;
444 }
445 else if ( next_prime_evt == ~0U )
446 {
447 next_prime_evt = p_einf->evt;
448 next_prime_einf = p_einf;
449 }
450 else if ( p_einf->evt < next_prime_evt )
451 {
452 next_prime_evt = p_einf->evt;
453 next_prime_einf = p_einf;
454 }
456 /* Determine system virtual time. */
457 if ( p_einf->avt < min_avt )
458 min_avt = p_einf->avt;
459 }
461 if ( next_einf->inf->warp && next_einf->inf->warpl > 0 )
462 set_timer(&next_einf->inf->warp_timer, now + next_einf->inf->warpl);
464 /* Extract the domain pointers from the dom infos */
465 next = next_einf->vcpu;
466 next_prime = next_prime_einf->vcpu;
468 /* Update system virtual time. */
469 if ( min_avt != ~0U )
470 CPU_SVT(cpu) = min_avt;
472 /* check for virtual time overrun on this cpu */
473 if ( CPU_SVT(cpu) >= 0xf0000000 )
474 {
475 ASSERT(!local_irq_is_enabled());
477 write_lock(&domlist_lock);
479 for_each_domain ( d )
480 {
481 for_each_vcpu (d, ed) {
482 if ( ed->processor == cpu )
483 {
484 p_einf = EBVT_INFO(ed);
485 p_einf->evt -= 0xe0000000;
486 p_einf->avt -= 0xe0000000;
487 }
488 }
489 }
491 write_unlock(&domlist_lock);
493 CPU_SVT(cpu) -= 0xe0000000;
494 }
496 /* work out time for next run through scheduler */
497 if ( is_idle_vcpu(next) )
498 {
499 r_time = ctx_allow;
500 goto sched_done;
501 }
503 if ( (next_prime == NULL) || is_idle_vcpu(next_prime) )
504 {
505 /* We have only one runnable task besides the idle task. */
506 r_time = 10 * ctx_allow; /* RN: random constant */
507 goto sched_done;
508 }
510 /*
511 * If we are here then we have two runnable tasks.
512 * Work out how long 'next' can run till its evt is greater than
513 * 'next_prime's evt. Take context switch allowance into account.
514 */
515 ASSERT(next_prime_einf->evt >= next_einf->evt);
517 r_time = ((next_prime_einf->evt - next_einf->evt)/next_einf->inf->mcu_advance)
518 + ctx_allow;
520 ASSERT(r_time >= ctx_allow);
522 sched_done:
523 ret.task = next;
524 ret.time = r_time;
525 return ret;
526 }
529 static void bvt_dump_runq_el(struct vcpu *p)
530 {
531 struct bvt_vcpu_info *inf = EBVT_INFO(p);
533 printk("mcua=%d ev=0x%08X av=0x%08X ",
534 inf->inf->mcu_advance, inf->evt, inf->avt);
535 }
537 static void bvt_dump_settings(void)
538 {
539 printk("BVT: mcu=0x%08Xns ctx_allow=0x%08Xns ", (u32)MCU, (s32)ctx_allow );
540 }
542 static void bvt_dump_cpu_state(int i)
543 {
544 struct list_head *queue;
545 int loop = 0;
546 struct bvt_vcpu_info *vcpu_inf;
547 struct vcpu *v;
549 printk("svt=0x%08lX ", CPU_SVT(i));
551 queue = RUNQUEUE(i);
552 printk("QUEUE rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
553 (unsigned long) queue->next, (unsigned long) queue->prev);
555 list_for_each_entry ( vcpu_inf, queue, run_list )
556 {
557 v = vcpu_inf->vcpu;
558 printk("%3d: %u has=%c ", loop++, v->domain->domain_id,
559 test_bit(_VCPUF_running, &v->vcpu_flags) ? 'T':'F');
560 bvt_dump_runq_el(v);
561 printk(" l: %p n: %p p: %p\n",
562 &vcpu_inf->run_list, vcpu_inf->run_list.next,
563 vcpu_inf->run_list.prev);
564 }
565 }
567 struct scheduler sched_bvt_def = {
568 .name = "Borrowed Virtual Time",
569 .opt_name = "bvt",
570 .sched_id = SCHED_BVT,
572 .alloc_task = bvt_alloc_task,
573 .add_task = bvt_add_task,
574 .free_task = bvt_free_task,
575 .do_schedule = bvt_do_schedule,
576 .control = bvt_ctl,
577 .adjdom = bvt_adjdom,
578 .dump_settings = bvt_dump_settings,
579 .dump_cpu_state = bvt_dump_cpu_state,
580 .sleep = bvt_sleep,
581 .wake = bvt_wake,
582 .set_affinity = bvt_set_affinity
583 };
585 /*
586 * Local variables:
587 * mode: C
588 * c-set-style: "BSD"
589 * c-basic-offset: 4
590 * tab-width: 4
591 * indent-tabs-mode: nil
592 * End:
593 */