ia64/xen-unstable

view xen/common/schedule.c @ 9706:3c05406f5e0a

In some cases, say for instance for some bizzare reason
the tree was checked out of CVS, which doens't neccessarily
store file permissions, mkbuildtree may not be executable.
So run them explicitly via bash.

Signed-Off-By: Horms <horms@verge.net.au>
author kaf24@firebug.cl.cam.ac.uk
date Thu Apr 13 11:24:00 2006 +0100 (2006-04-13)
parents b128f55ca05c
children 3145b215598c
line source
1 /****************************************************************************
2 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
3 * (C) 2002-2003 University of Cambridge
4 * (C) 2004 - Mark Williamson - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: common/schedule.c
8 * Author: Rolf Neugebauer & Keir Fraser
9 * Updated for generic API by Mark Williamson
10 *
11 * Description: Generic CPU scheduling code
12 * implements support functionality for the Xen scheduler API.
13 *
14 */
16 #include <xen/config.h>
17 #include <xen/init.h>
18 #include <xen/lib.h>
19 #include <xen/sched.h>
20 #include <xen/domain.h>
21 #include <xen/delay.h>
22 #include <xen/event.h>
23 #include <xen/time.h>
24 #include <xen/timer.h>
25 #include <xen/perfc.h>
26 #include <xen/sched-if.h>
27 #include <xen/softirq.h>
28 #include <xen/trace.h>
29 #include <xen/mm.h>
30 #include <xen/guest_access.h>
31 #include <public/sched.h>
32 #include <public/sched_ctl.h>
34 extern void arch_getdomaininfo_ctxt(struct vcpu *,
35 struct vcpu_guest_context *);
36 /* opt_sched: scheduler - default to SEDF */
37 static char opt_sched[10] = "sedf";
38 string_param("sched", opt_sched);
40 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
42 /* Various timer handlers. */
43 static void s_timer_fn(void *unused);
44 static void t_timer_fn(void *unused);
45 static void dom_timer_fn(void *data);
46 static void poll_timer_fn(void *data);
48 /* This is global for now so that private implementations can reach it */
49 struct schedule_data schedule_data[NR_CPUS];
51 extern struct scheduler sched_bvt_def;
52 extern struct scheduler sched_sedf_def;
53 static struct scheduler *schedulers[] = {
54 &sched_bvt_def,
55 &sched_sedf_def,
56 NULL
57 };
59 static void __enter_scheduler(void);
61 static struct scheduler ops;
63 #define SCHED_OP(fn, ...) \
64 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
65 : (typeof(ops.fn(__VA_ARGS__)))0 )
67 /* Per-CPU periodic timer sends an event to the currently-executing domain. */
68 static struct timer t_timer[NR_CPUS];
70 static inline void vcpu_runstate_change(
71 struct vcpu *v, int new_state, s_time_t new_entry_time)
72 {
73 ASSERT(v->runstate.state != new_state);
74 ASSERT(spin_is_locked(&schedule_data[v->processor].schedule_lock));
76 v->runstate.time[v->runstate.state] +=
77 new_entry_time - v->runstate.state_entry_time;
78 v->runstate.state_entry_time = new_entry_time;
79 v->runstate.state = new_state;
80 }
82 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
83 {
84 if ( likely(v == current) )
85 {
86 /* Fast lock-free path. */
87 memcpy(runstate, &v->runstate, sizeof(*runstate));
88 ASSERT(runstate->state == RUNSTATE_running);
89 runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
90 }
91 else
92 {
93 vcpu_schedule_lock_irq(v);
94 memcpy(runstate, &v->runstate, sizeof(*runstate));
95 runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
96 vcpu_schedule_unlock_irq(v);
97 }
98 }
100 struct domain *alloc_domain(void)
101 {
102 struct domain *d;
104 if ( (d = xmalloc(struct domain)) != NULL )
105 memset(d, 0, sizeof(*d));
107 return d;
108 }
110 void free_domain(struct domain *d)
111 {
112 struct vcpu *v;
113 int i;
115 for_each_vcpu ( d, v )
116 sched_rem_domain(v);
118 SCHED_OP(free_task, d);
120 for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
121 if ( (v = d->vcpu[i]) != NULL )
122 free_vcpu_struct(v);
124 xfree(d);
125 }
127 struct vcpu *alloc_vcpu(
128 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
129 {
130 struct vcpu *v;
132 BUG_ON(d->vcpu[vcpu_id] != NULL);
134 if ( (v = alloc_vcpu_struct(d, vcpu_id)) == NULL )
135 return NULL;
137 v->domain = d;
138 v->vcpu_id = vcpu_id;
139 v->processor = cpu_id;
140 atomic_set(&v->pausecnt, 0);
141 v->vcpu_info = &d->shared_info->vcpu_info[vcpu_id];
143 v->cpu_affinity = is_idle_domain(d) ?
144 cpumask_of_cpu(cpu_id) : CPU_MASK_ALL;
146 v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
147 v->runstate.state_entry_time = NOW();
149 if ( (vcpu_id != 0) && !is_idle_domain(d) )
150 set_bit(_VCPUF_down, &v->vcpu_flags);
152 if ( SCHED_OP(alloc_task, v) < 0 )
153 {
154 free_vcpu_struct(v);
155 return NULL;
156 }
158 d->vcpu[vcpu_id] = v;
159 if ( vcpu_id != 0 )
160 d->vcpu[v->vcpu_id-1]->next_in_list = v;
162 sched_add_domain(v);
164 return v;
165 }
167 void sched_add_domain(struct vcpu *v)
168 {
169 /* Initialise the per-domain timers. */
170 init_timer(&v->timer, dom_timer_fn, v, v->processor);
171 init_timer(&v->poll_timer, poll_timer_fn, v, v->processor);
173 if ( is_idle_vcpu(v) )
174 {
175 schedule_data[v->processor].curr = v;
176 schedule_data[v->processor].idle = v;
177 set_bit(_VCPUF_running, &v->vcpu_flags);
178 }
180 SCHED_OP(add_task, v);
181 TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
182 }
184 void sched_rem_domain(struct vcpu *v)
185 {
186 kill_timer(&v->timer);
187 kill_timer(&v->poll_timer);
189 SCHED_OP(rem_task, v);
190 TRACE_2D(TRC_SCHED_DOM_REM, v->domain->domain_id, v->vcpu_id);
191 }
193 void vcpu_sleep_nosync(struct vcpu *v)
194 {
195 unsigned long flags;
197 vcpu_schedule_lock_irqsave(v, flags);
199 if ( likely(!vcpu_runnable(v)) )
200 {
201 if ( v->runstate.state == RUNSTATE_runnable )
202 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
204 SCHED_OP(sleep, v);
205 }
207 vcpu_schedule_unlock_irqrestore(v, flags);
209 TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
210 }
212 void vcpu_sleep_sync(struct vcpu *v)
213 {
214 vcpu_sleep_nosync(v);
216 while ( !vcpu_runnable(v) && test_bit(_VCPUF_running, &v->vcpu_flags) )
217 cpu_relax();
219 sync_vcpu_execstate(v);
220 }
222 void vcpu_wake(struct vcpu *v)
223 {
224 unsigned long flags;
226 vcpu_schedule_lock_irqsave(v, flags);
228 if ( likely(vcpu_runnable(v)) )
229 {
230 if ( v->runstate.state >= RUNSTATE_blocked )
231 vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
232 SCHED_OP(wake, v);
233 }
234 else if ( !test_bit(_VCPUF_blocked, &v->vcpu_flags) )
235 {
236 if ( v->runstate.state == RUNSTATE_blocked )
237 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
238 }
240 vcpu_schedule_unlock_irqrestore(v, flags);
242 TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
243 }
245 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
246 {
247 cpumask_t online_affinity;
249 cpus_and(online_affinity, *affinity, cpu_online_map);
250 if ( cpus_empty(online_affinity) )
251 return -EINVAL;
253 return SCHED_OP(set_affinity, v, affinity);
254 }
256 /* Block the currently-executing domain until a pertinent event occurs. */
257 static long do_block(void)
258 {
259 struct vcpu *v = current;
261 v->vcpu_info->evtchn_upcall_mask = 0;
262 set_bit(_VCPUF_blocked, &v->vcpu_flags);
264 /* Check for events /after/ blocking: avoids wakeup waiting race. */
265 if ( event_pending(v) )
266 {
267 clear_bit(_VCPUF_blocked, &v->vcpu_flags);
268 }
269 else
270 {
271 TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
272 __enter_scheduler();
273 }
275 return 0;
276 }
278 static long do_poll(struct sched_poll *sched_poll)
279 {
280 struct vcpu *v = current;
281 evtchn_port_t port;
282 long rc = 0;
283 unsigned int i;
285 /* Fairly arbitrary limit. */
286 if ( sched_poll->nr_ports > 128 )
287 return -EINVAL;
289 if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) )
290 return -EFAULT;
292 /* Ensure that upcalls are disabled: tested by evtchn_set_pending(). */
293 if ( !v->vcpu_info->evtchn_upcall_mask )
294 return -EINVAL;
296 set_bit(_VCPUF_blocked, &v->vcpu_flags);
298 /* Check for events /after/ blocking: avoids wakeup waiting race. */
299 for ( i = 0; i < sched_poll->nr_ports; i++ )
300 {
301 rc = -EFAULT;
302 if ( __copy_from_guest_offset(&port, sched_poll->ports, i, 1) )
303 goto out;
305 rc = -EINVAL;
306 if ( port >= MAX_EVTCHNS )
307 goto out;
309 rc = 0;
310 if ( evtchn_pending(v->domain, port) )
311 goto out;
312 }
314 if ( sched_poll->timeout != 0 )
315 set_timer(&v->poll_timer, sched_poll->timeout);
317 TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
318 __enter_scheduler();
320 stop_timer(&v->poll_timer);
322 out:
323 clear_bit(_VCPUF_blocked, &v->vcpu_flags);
324 return rc;
325 }
327 /* Voluntarily yield the processor for this allocation. */
328 static long do_yield(void)
329 {
330 TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
331 __enter_scheduler();
332 return 0;
333 }
335 long do_sched_op_compat(int cmd, unsigned long arg)
336 {
337 long ret = 0;
339 switch ( cmd )
340 {
341 case SCHEDOP_yield:
342 {
343 ret = do_yield();
344 break;
345 }
347 case SCHEDOP_block:
348 {
349 ret = do_block();
350 break;
351 }
353 case SCHEDOP_shutdown:
354 {
355 TRACE_3D(TRC_SCHED_SHUTDOWN,
356 current->domain->domain_id, current->vcpu_id, arg);
357 domain_shutdown(current->domain, (u8)arg);
358 break;
359 }
361 default:
362 ret = -ENOSYS;
363 }
365 return ret;
366 }
368 long do_sched_op(int cmd, GUEST_HANDLE(void) arg)
369 {
370 long ret = 0;
372 switch ( cmd )
373 {
374 case SCHEDOP_yield:
375 {
376 ret = do_yield();
377 break;
378 }
380 case SCHEDOP_block:
381 {
382 ret = do_block();
383 break;
384 }
386 case SCHEDOP_shutdown:
387 {
388 struct sched_shutdown sched_shutdown;
390 ret = -EFAULT;
391 if ( copy_from_guest(&sched_shutdown, arg, 1) )
392 break;
394 ret = 0;
395 TRACE_3D(TRC_SCHED_SHUTDOWN,
396 current->domain->domain_id, current->vcpu_id,
397 sched_shutdown.reason);
398 domain_shutdown(current->domain, (u8)sched_shutdown.reason);
400 break;
401 }
403 case SCHEDOP_poll:
404 {
405 struct sched_poll sched_poll;
407 ret = -EFAULT;
408 if ( copy_from_guest(&sched_poll, arg, 1) )
409 break;
411 ret = do_poll(&sched_poll);
413 break;
414 }
416 case SCHEDOP_remote_shutdown:
417 {
418 struct domain *d;
419 struct sched_remote_shutdown sched_remote_shutdown;
421 if ( !IS_PRIV(current->domain) )
422 return -EPERM;
424 ret = -EFAULT;
425 if ( copy_from_guest(&sched_remote_shutdown, arg, 1) )
426 break;
428 ret = -ESRCH;
429 d = find_domain_by_id(sched_remote_shutdown.domain_id);
430 if ( d == NULL )
431 break;
433 domain_shutdown(d, (u8)sched_remote_shutdown.reason);
434 put_domain(d);
435 ret = 0;
437 break;
438 }
440 default:
441 ret = -ENOSYS;
442 }
444 return ret;
445 }
447 /* Per-domain one-shot-timer hypercall. */
448 long do_set_timer_op(s_time_t timeout)
449 {
450 struct vcpu *v = current;
452 if ( timeout == 0 )
453 stop_timer(&v->timer);
454 else
455 set_timer(&v->timer, timeout);
457 return 0;
458 }
460 /* sched_id - fetch ID of current scheduler */
461 int sched_id(void)
462 {
463 return ops.sched_id;
464 }
466 long sched_ctl(struct sched_ctl_cmd *cmd)
467 {
468 if ( cmd->sched_id != ops.sched_id )
469 return -EINVAL;
471 SCHED_OP(control, cmd);
472 TRACE_0D(TRC_SCHED_CTL);
473 return 0;
474 }
477 /* Adjust scheduling parameter for a given domain. */
478 long sched_adjdom(struct sched_adjdom_cmd *cmd)
479 {
480 struct domain *d;
481 struct vcpu *v;
483 if ( (cmd->sched_id != ops.sched_id) ||
484 ((cmd->direction != SCHED_INFO_PUT) &&
485 (cmd->direction != SCHED_INFO_GET)) )
486 return -EINVAL;
488 d = find_domain_by_id(cmd->domain);
489 if ( d == NULL )
490 return -ESRCH;
492 /*
493 * Most VCPUs we can simply pause. If we are adjusting this VCPU then
494 * we acquire the local schedule_lock to guard against concurrent updates.
495 *
496 * We only acquire the local schedule lock after we have paused all other
497 * VCPUs in this domain. There are two reasons for this:
498 * 1- We don't want to hold up interrupts as pausing a VCPU can
499 * trigger a tlb shootdown.
500 * 2- Pausing other VCPUs involves briefly locking the schedule
501 * lock of the CPU they are running on. This CPU could be the
502 * same as ours.
503 */
505 for_each_vcpu ( d, v )
506 {
507 if ( v != current )
508 vcpu_pause(v);
509 }
511 if ( d == current->domain )
512 vcpu_schedule_lock_irq(current);
514 SCHED_OP(adjdom, d, cmd);
515 TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
517 if ( d == current->domain )
518 vcpu_schedule_unlock_irq(current);
520 for_each_vcpu ( d, v )
521 {
522 if ( v != current )
523 vcpu_unpause(v);
524 }
526 put_domain(d);
528 return 0;
529 }
531 /*
532 * The main function
533 * - deschedule the current domain (scheduler independent).
534 * - pick a new domain (scheduler dependent).
535 */
536 static void __enter_scheduler(void)
537 {
538 struct vcpu *prev = current, *next = NULL;
539 int cpu = smp_processor_id();
540 s_time_t now = NOW();
541 struct task_slice next_slice;
542 s32 r_time; /* time for new dom to run */
544 ASSERT(!in_irq());
546 perfc_incrc(sched_run);
548 spin_lock_irq(&schedule_data[cpu].schedule_lock);
550 stop_timer(&schedule_data[cpu].s_timer);
552 /* get policy-specific decision on scheduling... */
553 next_slice = ops.do_schedule(now);
555 r_time = next_slice.time;
556 next = next_slice.task;
558 schedule_data[cpu].curr = next;
560 set_timer(&schedule_data[cpu].s_timer, now + r_time);
562 if ( unlikely(prev == next) )
563 {
564 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
565 return continue_running(prev);
566 }
568 TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
569 prev->domain->domain_id,
570 now - prev->runstate.state_entry_time);
571 TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
572 next->domain->domain_id,
573 (next->runstate.state == RUNSTATE_runnable) ?
574 (now - next->runstate.state_entry_time) : 0,
575 r_time);
577 ASSERT(prev->runstate.state == RUNSTATE_running);
578 vcpu_runstate_change(
579 prev,
580 (test_bit(_VCPUF_blocked, &prev->vcpu_flags) ? RUNSTATE_blocked :
581 (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
582 now);
584 ASSERT(next->runstate.state != RUNSTATE_running);
585 vcpu_runstate_change(next, RUNSTATE_running, now);
587 ASSERT(!test_bit(_VCPUF_running, &next->vcpu_flags));
588 set_bit(_VCPUF_running, &next->vcpu_flags);
590 spin_unlock_irq(&schedule_data[cpu].schedule_lock);
592 perfc_incrc(sched_ctx);
594 prev->sleep_tick = schedule_data[cpu].tick;
596 /* Ensure that the domain has an up-to-date time base. */
597 if ( !is_idle_vcpu(next) )
598 {
599 update_vcpu_system_time(next);
600 if ( next->sleep_tick != schedule_data[cpu].tick )
601 send_timer_event(next);
602 }
604 TRACE_4D(TRC_SCHED_SWITCH,
605 prev->domain->domain_id, prev->vcpu_id,
606 next->domain->domain_id, next->vcpu_id);
608 context_switch(prev, next);
609 }
612 /****************************************************************************
613 * Timers: the scheduler utilises a number of timers
614 * - s_timer: per CPU timer for preemption and scheduling decisions
615 * - t_timer: per CPU periodic timer to send timer interrupt to current dom
616 * - dom_timer: per domain timer to specifiy timeout values
617 ****************************************************************************/
619 /* The scheduler timer: force a run through the scheduler */
620 static void s_timer_fn(void *unused)
621 {
622 raise_softirq(SCHEDULE_SOFTIRQ);
623 perfc_incrc(sched_irq);
624 }
626 /* Periodic tick timer: send timer event to current domain */
627 static void t_timer_fn(void *unused)
628 {
629 struct vcpu *v = current;
630 unsigned int cpu = smp_processor_id();
632 schedule_data[cpu].tick++;
634 if ( !is_idle_vcpu(v) )
635 {
636 update_vcpu_system_time(v);
637 send_timer_event(v);
638 }
640 page_scrub_schedule_work();
642 set_timer(&t_timer[cpu], NOW() + MILLISECS(10));
643 }
645 /* Domain timer function, sends a virtual timer interrupt to domain */
646 static void dom_timer_fn(void *data)
647 {
648 struct vcpu *v = data;
650 update_vcpu_system_time(v);
651 send_timer_event(v);
652 }
654 /* SCHEDOP_poll timeout callback. */
655 static void poll_timer_fn(void *data)
656 {
657 struct vcpu *v = data;
658 vcpu_unblock(v);
659 }
661 /* Initialise the data structures. */
662 void __init scheduler_init(void)
663 {
664 int i, rc;
666 open_softirq(SCHEDULE_SOFTIRQ, __enter_scheduler);
668 for ( i = 0; i < NR_CPUS; i++ )
669 {
670 spin_lock_init(&schedule_data[i].schedule_lock);
671 init_timer(&schedule_data[i].s_timer, s_timer_fn, NULL, i);
672 init_timer(&t_timer[i], t_timer_fn, NULL, i);
673 }
675 for ( i = 0; schedulers[i] != NULL; i++ )
676 {
677 ops = *schedulers[i];
678 if ( strcmp(ops.opt_name, opt_sched) == 0 )
679 break;
680 }
682 if ( schedulers[i] == NULL )
683 printk("Could not find scheduler: %s\n", opt_sched);
685 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
687 if ( idle_vcpu[0] != NULL )
688 {
689 schedule_data[0].curr = idle_vcpu[0];
690 schedule_data[0].idle = idle_vcpu[0];
692 rc = SCHED_OP(alloc_task, idle_vcpu[0]);
693 BUG_ON(rc < 0);
695 sched_add_domain(idle_vcpu[0]);
696 }
697 }
699 /*
700 * Start a scheduler for each CPU
701 * This has to be done *after* the timers, e.g., APICs, have been initialised
702 */
703 void schedulers_start(void)
704 {
705 t_timer_fn(0);
706 smp_call_function((void *)t_timer_fn, NULL, 1, 1);
707 }
709 void dump_runq(unsigned char key)
710 {
711 s_time_t now = NOW();
712 int i;
713 unsigned long flags;
715 local_irq_save(flags);
717 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
718 SCHED_OP(dump_settings);
719 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
721 for_each_online_cpu ( i )
722 {
723 spin_lock(&schedule_data[i].schedule_lock);
724 printk("CPU[%02d] ", i);
725 SCHED_OP(dump_cpu_state,i);
726 spin_unlock(&schedule_data[i].schedule_lock);
727 }
729 local_irq_restore(flags);
730 }
732 /*
733 * Local variables:
734 * mode: C
735 * c-set-style: "BSD"
736 * c-basic-offset: 4
737 * tab-width: 4
738 * indent-tabs-mode: nil
739 * End:
740 */