ia64/xen-unstable

view xen/common/schedule.c @ 18594:5e4e234d58be

x86: Define __per_cpu_shift label to help kdump/crashdump.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Oct 08 13:11:06 2008 +0100 (2008-10-08)
parents 366c78ff361b
children dc61548aa479
line source
1 /****************************************************************************
2 * (C) 2002-2003 - Rolf Neugebauer - Intel Research Cambridge
3 * (C) 2002-2003 University of Cambridge
4 * (C) 2004 - Mark Williamson - Intel Research Cambridge
5 ****************************************************************************
6 *
7 * File: common/schedule.c
8 * Author: Rolf Neugebauer & Keir Fraser
9 * Updated for generic API by Mark Williamson
10 *
11 * Description: Generic CPU scheduling code
12 * implements support functionality for the Xen scheduler API.
13 *
14 */
16 #ifndef COMPAT
17 #include <xen/config.h>
18 #include <xen/init.h>
19 #include <xen/lib.h>
20 #include <xen/sched.h>
21 #include <xen/domain.h>
22 #include <xen/delay.h>
23 #include <xen/event.h>
24 #include <xen/time.h>
25 #include <xen/timer.h>
26 #include <xen/perfc.h>
27 #include <xen/sched-if.h>
28 #include <xen/softirq.h>
29 #include <xen/trace.h>
30 #include <xen/mm.h>
31 #include <xen/errno.h>
32 #include <xen/guest_access.h>
33 #include <xen/multicall.h>
34 #include <public/sched.h>
35 #include <xsm/xsm.h>
37 /* opt_sched: scheduler - default to credit */
38 static char opt_sched[10] = "credit";
39 string_param("sched", opt_sched);
41 #define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
43 /* Various timer handlers. */
44 static void s_timer_fn(void *unused);
45 static void vcpu_periodic_timer_fn(void *data);
46 static void vcpu_singleshot_timer_fn(void *data);
47 static void poll_timer_fn(void *data);
49 /* This is global for now so that private implementations can reach it */
50 DEFINE_PER_CPU(struct schedule_data, schedule_data);
52 extern struct scheduler sched_sedf_def;
53 extern struct scheduler sched_credit_def;
54 static struct scheduler *schedulers[] = {
55 &sched_sedf_def,
56 &sched_credit_def,
57 NULL
58 };
60 static struct scheduler ops;
62 #define SCHED_OP(fn, ...) \
63 (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
64 : (typeof(ops.fn(__VA_ARGS__)))0 )
66 static inline void trace_runstate_change(struct vcpu *v, int new_state)
67 {
68 struct { uint32_t vcpu:16, domain:16; } d;
69 uint32_t event;
71 if ( likely(!tb_init_done) )
72 return;
74 d.vcpu = v->vcpu_id;
75 d.domain = v->domain->domain_id;
77 event = TRC_SCHED_RUNSTATE_CHANGE;
78 event |= ( v->runstate.state & 0x3 ) << 8;
79 event |= ( new_state & 0x3 ) << 4;
81 __trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
82 }
84 static inline void vcpu_runstate_change(
85 struct vcpu *v, int new_state, s_time_t new_entry_time)
86 {
87 ASSERT(v->runstate.state != new_state);
88 ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
90 trace_runstate_change(v, new_state);
92 v->runstate.time[v->runstate.state] +=
93 new_entry_time - v->runstate.state_entry_time;
94 v->runstate.state_entry_time = new_entry_time;
95 v->runstate.state = new_state;
96 }
98 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
99 {
100 if ( likely(v == current) )
101 {
102 /* Fast lock-free path. */
103 memcpy(runstate, &v->runstate, sizeof(*runstate));
104 ASSERT(runstate->state == RUNSTATE_running);
105 runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
106 }
107 else
108 {
109 vcpu_schedule_lock_irq(v);
110 memcpy(runstate, &v->runstate, sizeof(*runstate));
111 runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
112 vcpu_schedule_unlock_irq(v);
113 }
114 }
116 int sched_init_vcpu(struct vcpu *v, unsigned int processor)
117 {
118 struct domain *d = v->domain;
120 /*
121 * Initialize processor and affinity settings. The idler, and potentially
122 * domain-0 VCPUs, are pinned onto their respective physical CPUs.
123 */
124 v->processor = processor;
125 if ( is_idle_domain(d) || d->is_pinned )
126 v->cpu_affinity = cpumask_of_cpu(processor);
127 else
128 cpus_setall(v->cpu_affinity);
130 /* Initialise the per-vcpu timers. */
131 init_timer(&v->periodic_timer, vcpu_periodic_timer_fn,
132 v, v->processor);
133 init_timer(&v->singleshot_timer, vcpu_singleshot_timer_fn,
134 v, v->processor);
135 init_timer(&v->poll_timer, poll_timer_fn,
136 v, v->processor);
138 /* Idle VCPUs are scheduled immediately. */
139 if ( is_idle_domain(d) )
140 {
141 per_cpu(schedule_data, v->processor).curr = v;
142 per_cpu(schedule_data, v->processor).idle = v;
143 v->is_running = 1;
144 }
146 TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
148 return SCHED_OP(init_vcpu, v);
149 }
151 void sched_destroy_vcpu(struct vcpu *v)
152 {
153 kill_timer(&v->periodic_timer);
154 kill_timer(&v->singleshot_timer);
155 kill_timer(&v->poll_timer);
156 SCHED_OP(destroy_vcpu, v);
157 }
159 int sched_init_domain(struct domain *d)
160 {
161 return SCHED_OP(init_domain, d);
162 }
164 void sched_destroy_domain(struct domain *d)
165 {
166 SCHED_OP(destroy_domain, d);
167 }
169 void vcpu_sleep_nosync(struct vcpu *v)
170 {
171 unsigned long flags;
173 vcpu_schedule_lock_irqsave(v, flags);
175 if ( likely(!vcpu_runnable(v)) )
176 {
177 if ( v->runstate.state == RUNSTATE_runnable )
178 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
180 SCHED_OP(sleep, v);
181 }
183 vcpu_schedule_unlock_irqrestore(v, flags);
185 TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
186 }
188 void vcpu_sleep_sync(struct vcpu *v)
189 {
190 vcpu_sleep_nosync(v);
192 while ( !vcpu_runnable(v) && v->is_running )
193 cpu_relax();
195 sync_vcpu_execstate(v);
196 }
198 void vcpu_wake(struct vcpu *v)
199 {
200 unsigned long flags;
202 vcpu_schedule_lock_irqsave(v, flags);
204 if ( likely(vcpu_runnable(v)) )
205 {
206 if ( v->runstate.state >= RUNSTATE_blocked )
207 vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
208 SCHED_OP(wake, v);
209 }
210 else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
211 {
212 if ( v->runstate.state == RUNSTATE_blocked )
213 vcpu_runstate_change(v, RUNSTATE_offline, NOW());
214 }
216 vcpu_schedule_unlock_irqrestore(v, flags);
218 TRACE_2D(TRC_SCHED_WAKE, v->domain->domain_id, v->vcpu_id);
219 }
221 void vcpu_unblock(struct vcpu *v)
222 {
223 if ( !test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
224 return;
226 /* Polling period ends when a VCPU is unblocked. */
227 if ( unlikely(v->poll_evtchn != 0) )
228 {
229 v->poll_evtchn = 0;
230 /*
231 * We *must* re-clear _VPF_blocked to avoid racing other wakeups of
232 * this VCPU (and it then going back to sleep on poll_mask).
233 * Test-and-clear is idiomatic and ensures clear_bit not reordered.
234 */
235 if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
236 clear_bit(_VPF_blocked, &v->pause_flags);
237 }
239 vcpu_wake(v);
240 }
242 static void vcpu_migrate(struct vcpu *v)
243 {
244 unsigned long flags;
245 int old_cpu;
247 vcpu_schedule_lock_irqsave(v, flags);
249 /*
250 * NB. Check of v->running happens /after/ setting migration flag
251 * because they both happen in (different) spinlock regions, and those
252 * regions are strictly serialised.
253 */
254 if ( v->is_running ||
255 !test_and_clear_bit(_VPF_migrating, &v->pause_flags) )
256 {
257 vcpu_schedule_unlock_irqrestore(v, flags);
258 return;
259 }
261 /* Switch to new CPU, then unlock old CPU. */
262 old_cpu = v->processor;
263 v->processor = SCHED_OP(pick_cpu, v);
264 spin_unlock_irqrestore(
265 &per_cpu(schedule_data, old_cpu).schedule_lock, flags);
267 /* Wake on new CPU. */
268 vcpu_wake(v);
269 }
271 /*
272 * Force a VCPU through a deschedule/reschedule path.
273 * For example, using this when setting the periodic timer period means that
274 * most periodic-timer state need only be touched from within the scheduler
275 * which can thus be done without need for synchronisation.
276 */
277 void vcpu_force_reschedule(struct vcpu *v)
278 {
279 vcpu_schedule_lock_irq(v);
280 if ( v->is_running )
281 set_bit(_VPF_migrating, &v->pause_flags);
282 vcpu_schedule_unlock_irq(v);
284 if ( test_bit(_VPF_migrating, &v->pause_flags) )
285 {
286 vcpu_sleep_nosync(v);
287 vcpu_migrate(v);
288 }
289 }
291 /*
292 * This function is used by cpu_hotplug code from stop_machine context.
293 * Hence we can avoid needing to take the
294 */
295 void cpu_disable_scheduler(void)
296 {
297 struct domain *d;
298 struct vcpu *v;
299 unsigned int cpu = smp_processor_id();
301 for_each_domain ( d )
302 {
303 for_each_vcpu ( d, v )
304 {
305 if ( is_idle_vcpu(v) )
306 continue;
308 if ( (cpus_weight(v->cpu_affinity) == 1) &&
309 cpu_isset(cpu, v->cpu_affinity) )
310 {
311 printk("Breaking vcpu affinity for domain %d vcpu %d\n",
312 v->domain->domain_id, v->vcpu_id);
313 cpus_setall(v->cpu_affinity);
314 }
316 /*
317 * Migrate single-shot timers to CPU0. A new cpu will automatically
318 * be chosen when the timer is next re-set.
319 */
320 if ( v->singleshot_timer.cpu == cpu )
321 migrate_timer(&v->singleshot_timer, 0);
323 if ( v->processor == cpu )
324 {
325 set_bit(_VPF_migrating, &v->pause_flags);
326 vcpu_sleep_nosync(v);
327 vcpu_migrate(v);
328 }
329 }
330 }
331 }
333 static int __vcpu_set_affinity(
334 struct vcpu *v, cpumask_t *affinity,
335 bool_t old_lock_status, bool_t new_lock_status)
336 {
337 cpumask_t online_affinity, old_affinity;
339 cpus_and(online_affinity, *affinity, cpu_online_map);
340 if ( cpus_empty(online_affinity) )
341 return -EINVAL;
343 vcpu_schedule_lock_irq(v);
345 if ( v->affinity_locked != old_lock_status )
346 {
347 BUG_ON(!v->affinity_locked);
348 vcpu_schedule_unlock_irq(v);
349 return -EBUSY;
350 }
352 v->affinity_locked = new_lock_status;
354 old_affinity = v->cpu_affinity;
355 v->cpu_affinity = *affinity;
356 *affinity = old_affinity;
357 if ( !cpu_isset(v->processor, v->cpu_affinity) )
358 set_bit(_VPF_migrating, &v->pause_flags);
360 vcpu_schedule_unlock_irq(v);
362 if ( test_bit(_VPF_migrating, &v->pause_flags) )
363 {
364 vcpu_sleep_nosync(v);
365 vcpu_migrate(v);
366 }
368 return 0;
369 }
371 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
372 {
373 if ( v->domain->is_pinned )
374 return -EINVAL;
375 return __vcpu_set_affinity(v, affinity, 0, 0);
376 }
378 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
379 {
380 return __vcpu_set_affinity(v, affinity, 0, 1);
381 }
383 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
384 {
385 return __vcpu_set_affinity(v, affinity, 1, 1);
386 }
388 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
389 {
390 cpumask_t online_affinity;
392 /* Do not fail if no CPU in old affinity mask is online. */
393 cpus_and(online_affinity, *affinity, cpu_online_map);
394 if ( cpus_empty(online_affinity) )
395 *affinity = cpu_online_map;
397 if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
398 BUG();
399 }
401 /* Block the currently-executing domain until a pertinent event occurs. */
402 static long do_block(void)
403 {
404 struct vcpu *v = current;
406 local_event_delivery_enable();
407 set_bit(_VPF_blocked, &v->pause_flags);
409 /* Check for events /after/ blocking: avoids wakeup waiting race. */
410 if ( local_events_need_delivery() )
411 {
412 clear_bit(_VPF_blocked, &v->pause_flags);
413 }
414 else
415 {
416 TRACE_2D(TRC_SCHED_BLOCK, v->domain->domain_id, v->vcpu_id);
417 raise_softirq(SCHEDULE_SOFTIRQ);
418 }
420 return 0;
421 }
423 static long do_poll(struct sched_poll *sched_poll)
424 {
425 struct vcpu *v = current;
426 struct domain *d = v->domain;
427 evtchn_port_t port;
428 long rc;
429 unsigned int i;
431 /* Fairly arbitrary limit. */
432 if ( sched_poll->nr_ports > 128 )
433 return -EINVAL;
435 if ( !guest_handle_okay(sched_poll->ports, sched_poll->nr_ports) )
436 return -EFAULT;
438 set_bit(_VPF_blocked, &v->pause_flags);
439 v->poll_evtchn = -1;
440 set_bit(v->vcpu_id, d->poll_mask);
442 #ifndef CONFIG_X86 /* set_bit() implies mb() on x86 */
443 /* Check for events /after/ setting flags: avoids wakeup waiting race. */
444 smp_mb();
446 /*
447 * Someone may have seen we are blocked but not that we are polling, or
448 * vice versa. We are certainly being woken, so clean up and bail. Beyond
449 * this point others can be guaranteed to clean up for us if they wake us.
450 */
451 rc = 0;
452 if ( (v->poll_evtchn == 0) ||
453 !test_bit(_VPF_blocked, &v->pause_flags) ||
454 !test_bit(v->vcpu_id, d->poll_mask) )
455 goto out;
456 #endif
458 for ( i = 0; i < sched_poll->nr_ports; i++ )
459 {
460 rc = -EFAULT;
461 if ( __copy_from_guest_offset(&port, sched_poll->ports, i, 1) )
462 goto out;
464 rc = -EINVAL;
465 if ( port >= MAX_EVTCHNS(d) )
466 goto out;
468 rc = 0;
469 if ( test_bit(port, &shared_info(d, evtchn_pending)) )
470 goto out;
471 }
473 if ( sched_poll->nr_ports == 1 )
474 v->poll_evtchn = port;
476 if ( sched_poll->timeout != 0 )
477 set_timer(&v->poll_timer, sched_poll->timeout);
479 TRACE_2D(TRC_SCHED_BLOCK, d->domain_id, v->vcpu_id);
480 raise_softirq(SCHEDULE_SOFTIRQ);
482 return 0;
484 out:
485 v->poll_evtchn = 0;
486 clear_bit(v->vcpu_id, d->poll_mask);
487 clear_bit(_VPF_blocked, &v->pause_flags);
488 return rc;
489 }
491 /* Voluntarily yield the processor for this allocation. */
492 static long do_yield(void)
493 {
494 TRACE_2D(TRC_SCHED_YIELD, current->domain->domain_id, current->vcpu_id);
495 raise_softirq(SCHEDULE_SOFTIRQ);
496 return 0;
497 }
499 long do_sched_op_compat(int cmd, unsigned long arg)
500 {
501 long ret = 0;
503 switch ( cmd )
504 {
505 case SCHEDOP_yield:
506 {
507 ret = do_yield();
508 break;
509 }
511 case SCHEDOP_block:
512 {
513 ret = do_block();
514 break;
515 }
517 case SCHEDOP_shutdown:
518 {
519 TRACE_3D(TRC_SCHED_SHUTDOWN,
520 current->domain->domain_id, current->vcpu_id, arg);
521 domain_shutdown(current->domain, (u8)arg);
522 break;
523 }
525 default:
526 ret = -ENOSYS;
527 }
529 return ret;
530 }
532 typedef long ret_t;
534 #endif /* !COMPAT */
536 ret_t do_sched_op(int cmd, XEN_GUEST_HANDLE(void) arg)
537 {
538 ret_t ret = 0;
540 switch ( cmd )
541 {
542 case SCHEDOP_yield:
543 {
544 ret = do_yield();
545 break;
546 }
548 case SCHEDOP_block:
549 {
550 ret = do_block();
551 break;
552 }
554 case SCHEDOP_shutdown:
555 {
556 struct sched_shutdown sched_shutdown;
558 ret = -EFAULT;
559 if ( copy_from_guest(&sched_shutdown, arg, 1) )
560 break;
562 ret = 0;
563 TRACE_3D(TRC_SCHED_SHUTDOWN,
564 current->domain->domain_id, current->vcpu_id,
565 sched_shutdown.reason);
566 domain_shutdown(current->domain, (u8)sched_shutdown.reason);
568 break;
569 }
571 case SCHEDOP_poll:
572 {
573 struct sched_poll sched_poll;
575 ret = -EFAULT;
576 if ( copy_from_guest(&sched_poll, arg, 1) )
577 break;
579 ret = do_poll(&sched_poll);
581 break;
582 }
584 case SCHEDOP_remote_shutdown:
585 {
586 struct domain *d;
587 struct sched_remote_shutdown sched_remote_shutdown;
589 ret = -EFAULT;
590 if ( copy_from_guest(&sched_remote_shutdown, arg, 1) )
591 break;
593 ret = -ESRCH;
594 d = rcu_lock_domain_by_id(sched_remote_shutdown.domain_id);
595 if ( d == NULL )
596 break;
598 if ( !IS_PRIV_FOR(current->domain, d) )
599 {
600 rcu_unlock_domain(d);
601 return -EPERM;
602 }
604 ret = xsm_schedop_shutdown(current->domain, d);
605 if ( ret )
606 {
607 rcu_unlock_domain(d);
608 return ret;
609 }
611 domain_shutdown(d, (u8)sched_remote_shutdown.reason);
613 rcu_unlock_domain(d);
614 ret = 0;
616 break;
617 }
619 default:
620 ret = -ENOSYS;
621 }
623 return ret;
624 }
626 #ifndef COMPAT
628 /* Per-vcpu oneshot-timer hypercall. */
629 long do_set_timer_op(s_time_t timeout)
630 {
631 struct vcpu *v = current;
632 s_time_t offset = timeout - NOW();
634 if ( timeout == 0 )
635 {
636 stop_timer(&v->singleshot_timer);
637 }
638 else if ( unlikely(timeout < 0) || /* overflow into 64th bit? */
639 unlikely((offset > 0) && ((uint32_t)(offset >> 50) != 0)) )
640 {
641 /*
642 * Linux workaround: occasionally we will see timeouts a long way in
643 * the future due to wrapping in Linux's jiffy time handling. We check
644 * for timeouts wrapped negative, and for positive timeouts more than
645 * about 13 days in the future (2^50ns). The correct fix is to trigger
646 * an interrupt immediately (since Linux in fact has pending work to
647 * do in this situation). However, older guests also set a long timeout
648 * when they have *no* pending timers at all: setting an immediate
649 * timeout in this case can burn a lot of CPU. We therefore go for a
650 * reasonable middleground of triggering a timer event in 100ms.
651 */
652 gdprintk(XENLOG_INFO,
653 "Warning: huge timeout set by vcpu %d: %"PRIx64"\n",
654 v->vcpu_id, (uint64_t)timeout);
655 set_timer(&v->singleshot_timer, NOW() + MILLISECS(100));
656 }
657 else
658 {
659 if ( v->singleshot_timer.cpu != smp_processor_id() )
660 {
661 stop_timer(&v->singleshot_timer);
662 v->singleshot_timer.cpu = smp_processor_id();
663 }
665 set_timer(&v->singleshot_timer, timeout);
666 }
668 return 0;
669 }
671 /* sched_id - fetch ID of current scheduler */
672 int sched_id(void)
673 {
674 return ops.sched_id;
675 }
677 /* Adjust scheduling parameter for a given domain. */
678 long sched_adjust(struct domain *d, struct xen_domctl_scheduler_op *op)
679 {
680 struct vcpu *v;
681 long ret;
683 if ( (op->sched_id != ops.sched_id) ||
684 ((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
685 (op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
686 return -EINVAL;
688 /*
689 * Most VCPUs we can simply pause. If we are adjusting this VCPU then
690 * we acquire the local schedule_lock to guard against concurrent updates.
691 *
692 * We only acquire the local schedule lock after we have paused all other
693 * VCPUs in this domain. There are two reasons for this:
694 * 1- We don't want to hold up interrupts as pausing a VCPU can
695 * trigger a tlb shootdown.
696 * 2- Pausing other VCPUs involves briefly locking the schedule
697 * lock of the CPU they are running on. This CPU could be the
698 * same as ours.
699 */
701 for_each_vcpu ( d, v )
702 {
703 if ( v != current )
704 vcpu_pause(v);
705 }
707 if ( d == current->domain )
708 vcpu_schedule_lock_irq(current);
710 if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
711 TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
713 if ( d == current->domain )
714 vcpu_schedule_unlock_irq(current);
716 for_each_vcpu ( d, v )
717 {
718 if ( v != current )
719 vcpu_unpause(v);
720 }
722 return ret;
723 }
725 static void vcpu_periodic_timer_work(struct vcpu *v)
726 {
727 s_time_t now = NOW();
728 uint64_t periodic_next_event;
730 ASSERT(!active_timer(&v->periodic_timer));
732 if ( v->periodic_period == 0 )
733 return;
735 periodic_next_event = v->periodic_last_event + v->periodic_period;
737 /* The timer subsystem may call us up to TIME_SLOP ahead of deadline. */
738 if ( (now + TIME_SLOP) > periodic_next_event )
739 {
740 send_timer_event(v);
741 v->periodic_last_event = now;
742 periodic_next_event = now + v->periodic_period;
743 }
745 v->periodic_timer.cpu = smp_processor_id();
746 set_timer(&v->periodic_timer, periodic_next_event);
747 }
749 /*
750 * The main function
751 * - deschedule the current domain (scheduler independent).
752 * - pick a new domain (scheduler dependent).
753 */
754 static void schedule(void)
755 {
756 struct vcpu *prev = current, *next = NULL;
757 s_time_t now = NOW();
758 struct schedule_data *sd;
759 struct task_slice next_slice;
760 s32 r_time; /* time for new dom to run */
762 ASSERT(!in_irq());
763 ASSERT(this_cpu(mc_state).flags == 0);
765 perfc_incr(sched_run);
767 sd = &this_cpu(schedule_data);
769 spin_lock_irq(&sd->schedule_lock);
771 stop_timer(&sd->s_timer);
773 /* get policy-specific decision on scheduling... */
774 next_slice = ops.do_schedule(now);
776 r_time = next_slice.time;
777 next = next_slice.task;
779 sd->curr = next;
781 set_timer(&sd->s_timer, now + r_time);
783 if ( unlikely(prev == next) )
784 {
785 spin_unlock_irq(&sd->schedule_lock);
786 return continue_running(prev);
787 }
789 TRACE_2D(TRC_SCHED_SWITCH_INFPREV,
790 prev->domain->domain_id,
791 now - prev->runstate.state_entry_time);
792 TRACE_3D(TRC_SCHED_SWITCH_INFNEXT,
793 next->domain->domain_id,
794 (next->runstate.state == RUNSTATE_runnable) ?
795 (now - next->runstate.state_entry_time) : 0,
796 r_time);
798 ASSERT(prev->runstate.state == RUNSTATE_running);
799 vcpu_runstate_change(
800 prev,
801 (test_bit(_VPF_blocked, &prev->pause_flags) ? RUNSTATE_blocked :
802 (vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
803 now);
805 ASSERT(next->runstate.state != RUNSTATE_running);
806 vcpu_runstate_change(next, RUNSTATE_running, now);
808 ASSERT(!next->is_running);
809 next->is_running = 1;
811 spin_unlock_irq(&sd->schedule_lock);
813 perfc_incr(sched_ctx);
815 stop_timer(&prev->periodic_timer);
817 /* Ensure that the domain has an up-to-date time base. */
818 update_vcpu_system_time(next);
819 vcpu_periodic_timer_work(next);
821 TRACE_4D(TRC_SCHED_SWITCH,
822 prev->domain->domain_id, prev->vcpu_id,
823 next->domain->domain_id, next->vcpu_id);
825 context_switch(prev, next);
826 }
828 void context_saved(struct vcpu *prev)
829 {
830 /* Clear running flag /after/ writing context to memory. */
831 smp_wmb();
833 prev->is_running = 0;
835 /* Check for migration request /after/ clearing running flag. */
836 smp_mb();
838 if ( unlikely(test_bit(_VPF_migrating, &prev->pause_flags)) )
839 vcpu_migrate(prev);
840 }
842 /* The scheduler timer: force a run through the scheduler */
843 static void s_timer_fn(void *unused)
844 {
845 raise_softirq(SCHEDULE_SOFTIRQ);
846 perfc_incr(sched_irq);
847 }
849 /* Per-VCPU periodic timer function: sends a virtual timer interrupt. */
850 static void vcpu_periodic_timer_fn(void *data)
851 {
852 struct vcpu *v = data;
853 vcpu_periodic_timer_work(v);
854 }
856 /* Per-VCPU single-shot timer function: sends a virtual timer interrupt. */
857 static void vcpu_singleshot_timer_fn(void *data)
858 {
859 struct vcpu *v = data;
860 send_timer_event(v);
861 }
863 /* SCHEDOP_poll timeout callback. */
864 static void poll_timer_fn(void *data)
865 {
866 struct vcpu *v = data;
868 if ( test_and_clear_bit(v->vcpu_id, v->domain->poll_mask) )
869 vcpu_unblock(v);
870 }
872 /* Initialise the data structures. */
873 void __init scheduler_init(void)
874 {
875 int i;
877 open_softirq(SCHEDULE_SOFTIRQ, schedule);
879 for_each_cpu ( i )
880 {
881 spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
882 init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
883 }
885 for ( i = 0; schedulers[i] != NULL; i++ )
886 {
887 ops = *schedulers[i];
888 if ( strcmp(ops.opt_name, opt_sched) == 0 )
889 break;
890 }
892 if ( schedulers[i] == NULL )
893 printk("Could not find scheduler: %s\n", opt_sched);
895 printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
896 SCHED_OP(init);
897 }
899 void dump_runq(unsigned char key)
900 {
901 s_time_t now = NOW();
902 int i;
903 unsigned long flags;
905 local_irq_save(flags);
907 printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
908 SCHED_OP(dump_settings);
909 printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
911 for_each_online_cpu ( i )
912 {
913 spin_lock(&per_cpu(schedule_data, i).schedule_lock);
914 printk("CPU[%02d] ", i);
915 SCHED_OP(dump_cpu_state, i);
916 spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
917 }
919 local_irq_restore(flags);
920 }
922 #ifdef CONFIG_COMPAT
923 #include "compat/schedule.c"
924 #endif
926 #endif /* !COMPAT */
928 /*
929 * Local variables:
930 * mode: C
931 * c-set-style: "BSD"
932 * c-basic-offset: 4
933 * tab-width: 4
934 * indent-tabs-mode: nil
935 * End:
936 */