ia64/xen-unstable

view xen/include/xen/sched.h @ 19313:cfacba42091c

Improve vcpu_migration_delay handling.

Signed-off-by: Xiaowei Yang <xiaowei.yang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Mar 11 10:12:14 2009 +0000 (2009-03-11)
parents 71af89e70fee
children 2dcdd2fcb945
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <xen/shared.h>
10 #include <public/xen.h>
11 #include <public/domctl.h>
12 #include <public/vcpu.h>
13 #include <public/xsm/acm.h>
14 #include <xen/time.h>
15 #include <xen/timer.h>
16 #include <xen/grant_table.h>
17 #include <xen/rangeset.h>
18 #include <asm/domain.h>
19 #include <xen/xenoprof.h>
20 #include <xen/rcupdate.h>
21 #include <xen/irq.h>
22 #include <xen/mm.h>
24 #ifdef CONFIG_COMPAT
25 #include <compat/vcpu.h>
26 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
27 #endif
29 /* A global pointer to the initial domain (DOM0). */
30 extern struct domain *dom0;
32 #ifndef CONFIG_COMPAT
33 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_LONG
34 #else
35 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_LONG)
36 #endif
37 #define MAX_EVTCHNS(d) (BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d) * 64)
38 #define EVTCHNS_PER_BUCKET 128
39 #define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
41 struct evtchn
42 {
43 #define ECS_FREE 0 /* Channel is available for use. */
44 #define ECS_RESERVED 1 /* Channel is reserved. */
45 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
46 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
47 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
48 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
49 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
50 u8 state; /* ECS_* */
51 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
52 u16 notify_vcpu_id; /* VCPU for local delivery notification */
53 union {
54 struct {
55 domid_t remote_domid;
56 } unbound; /* state == ECS_UNBOUND */
57 struct {
58 u16 remote_port;
59 struct domain *remote_dom;
60 } interdomain; /* state == ECS_INTERDOMAIN */
61 u16 pirq; /* state == ECS_PIRQ */
62 u16 virq; /* state == ECS_VIRQ */
63 } u;
64 #ifdef FLASK_ENABLE
65 void *ssid;
66 #endif
67 };
69 int evtchn_init(struct domain *d);
70 void evtchn_destroy(struct domain *d);
72 struct vcpu
73 {
74 int vcpu_id;
76 int processor;
78 vcpu_info_t *vcpu_info;
80 struct domain *domain;
82 struct vcpu *next_in_list;
84 uint64_t periodic_period;
85 uint64_t periodic_last_event;
86 struct timer periodic_timer;
87 struct timer singleshot_timer;
89 struct timer poll_timer; /* timeout for SCHEDOP_poll */
91 void *sched_priv; /* scheduler-specific data */
93 struct vcpu_runstate_info runstate;
94 #ifndef CONFIG_COMPAT
95 # define runstate_guest(v) ((v)->runstate_guest)
96 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
97 #else
98 # define runstate_guest(v) ((v)->runstate_guest.native)
99 union {
100 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
101 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
102 } runstate_guest; /* guest address */
103 #endif
105 /* last time when vCPU is scheduled out */
106 uint64_t last_run_time;
108 /* Has the FPU been initialised? */
109 bool_t fpu_initialised;
110 /* Has the FPU been used since it was last saved? */
111 bool_t fpu_dirtied;
112 /* Initialization completed for this VCPU? */
113 bool_t is_initialised;
114 /* Currently running on a CPU? */
115 bool_t is_running;
116 /* MCE callback pending for this VCPU? */
117 bool_t mce_pending;
118 /* NMI callback pending for this VCPU? */
119 bool_t nmi_pending;
121 /* Higher priorized traps may interrupt lower priorized traps,
122 * lower priorized traps wait until higher priorized traps finished.
123 * Note: This concept is known as "system priority level" (spl)
124 * in the UNIX world. */
125 uint16_t old_trap_priority;
126 uint16_t trap_priority;
127 #define VCPU_TRAP_NONE 0
128 #define VCPU_TRAP_NMI 1
129 #define VCPU_TRAP_MCE 2
131 /* Require shutdown to be deferred for some asynchronous operation? */
132 bool_t defer_shutdown;
133 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
134 bool_t paused_for_shutdown;
135 /* VCPU affinity is temporarily locked from controller changes? */
136 bool_t affinity_locked;
138 /*
139 * > 0: a single port is being polled;
140 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
141 * < 0: multiple ports may be being polled.
142 */
143 int poll_evtchn;
145 unsigned long pause_flags;
146 atomic_t pause_count;
148 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
149 u16 virq_to_evtchn[NR_VIRQS];
150 spinlock_t virq_lock;
152 /* Bitmask of CPUs on which this VCPU may run. */
153 cpumask_t cpu_affinity;
154 /* Used to change affinity temporarily. */
155 cpumask_t cpu_affinity_tmp;
157 /* Bitmask of CPUs which are holding onto this VCPU's state. */
158 cpumask_t vcpu_dirty_cpumask;
160 struct arch_vcpu arch;
161 };
163 /* Per-domain lock can be recursively acquired in fault handlers. */
164 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
165 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
166 #define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
168 struct domain
169 {
170 domid_t domain_id;
172 shared_info_t *shared_info; /* shared data area */
174 spinlock_t domain_lock;
176 spinlock_t page_alloc_lock; /* protects all the following fields */
177 struct page_list_head page_list; /* linked list, of size tot_pages */
178 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
179 unsigned int tot_pages; /* number of pages currently possesed */
180 unsigned int max_pages; /* maximum value for tot_pages */
181 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
183 /* Scheduling. */
184 void *sched_priv; /* scheduler-specific data */
186 struct domain *next_in_list;
187 struct domain *next_in_hashbucket;
189 struct list_head rangesets;
190 spinlock_t rangesets_lock;
192 /* Event channel information. */
193 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
194 spinlock_t event_lock;
196 struct grant_table *grant_table;
198 /*
199 * Interrupt to event-channel mappings. Updates should be protected by the
200 * domain's event-channel spinlock. Read accesses can also synchronise on
201 * the lock, but races don't usually matter.
202 */
203 u16 pirq_to_evtchn[NR_IRQS];
204 DECLARE_BITMAP(pirq_mask, NR_IRQS);
206 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
207 struct rangeset *iomem_caps;
208 struct rangeset *irq_caps;
210 /* Is this an HVM guest? */
211 bool_t is_hvm;
212 /* Does this guest need iommu mappings? */
213 bool_t need_iommu;
214 /* Is this guest fully privileged (aka dom0)? */
215 bool_t is_privileged;
216 /* Which guest this guest has privileges on */
217 struct domain *target;
218 /* Is this guest being debugged by dom0? */
219 bool_t debugger_attached;
220 /* Is this guest dying (i.e., a zombie)? */
221 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
222 /* Domain is paused by controller software? */
223 bool_t is_paused_by_controller;
224 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
225 bool_t is_pinned;
227 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
228 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
230 /* Guest has shut down (inc. reason code)? */
231 spinlock_t shutdown_lock;
232 bool_t is_shutting_down; /* in process of shutting down? */
233 bool_t is_shut_down; /* fully shut down? */
234 int shutdown_code;
236 /* If this is not 0, send suspend notification here instead of
237 * raising DOM_EXC */
238 int suspend_evtchn;
240 atomic_t pause_count;
242 unsigned long vm_assist;
244 atomic_t refcnt;
246 struct vcpu *vcpu[MAX_VIRT_CPUS];
248 /* Bitmask of CPUs which are holding onto this domain's state. */
249 cpumask_t domain_dirty_cpumask;
251 struct arch_domain arch;
253 void *ssid; /* sHype security subject identifier */
255 /* Control-plane tools handle for this domain. */
256 xen_domain_handle_t handle;
258 /* OProfile support. */
259 struct xenoprof *xenoprof;
260 int32_t time_offset_seconds;
262 struct rcu_head rcu;
264 /*
265 * Hypercall deadlock avoidance lock. Used if a hypercall might
266 * cause a deadlock. Acquirers don't spin waiting; they preempt.
267 */
268 spinlock_t hypercall_deadlock_mutex;
270 /* VRAM dirty support. */
271 struct sh_dirty_vram *dirty_vram;
272 };
274 struct domain_setup_info
275 {
276 /* Initialised by caller. */
277 unsigned long image_addr;
278 unsigned long image_len;
279 /* Initialised by loader: Public. */
280 unsigned long v_start;
281 unsigned long v_end;
282 unsigned long v_kernstart;
283 unsigned long v_kernend;
284 unsigned long v_kernentry;
285 #define PAEKERN_no 0
286 #define PAEKERN_yes 1
287 #define PAEKERN_extended_cr3 2
288 #define PAEKERN_bimodal 3
289 unsigned int pae_kernel;
290 /* Initialised by loader: Private. */
291 unsigned long elf_paddr_offset;
292 unsigned int load_symtab;
293 unsigned long symtab_addr;
294 unsigned long symtab_len;
295 };
297 extern struct vcpu *idle_vcpu[NR_CPUS];
298 #define IDLE_DOMAIN_ID (0x7FFFU)
299 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
300 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
302 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
303 #define put_domain(_d) \
304 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
306 /*
307 * Use this when you don't have an existing reference to @d. It returns
308 * FALSE if @d is being destroyed.
309 */
310 static always_inline int get_domain(struct domain *d)
311 {
312 atomic_t old, new, seen = d->refcnt;
313 do
314 {
315 old = seen;
316 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
317 return 0;
318 _atomic_set(new, _atomic_read(old) + 1);
319 seen = atomic_compareandswap(old, new, &d->refcnt);
320 }
321 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
322 return 1;
323 }
325 /*
326 * Use this when you already have, or are borrowing, a reference to @d.
327 * In this case we know that @d cannot be destroyed under our feet.
328 */
329 static inline void get_knownalive_domain(struct domain *d)
330 {
331 atomic_inc(&d->refcnt);
332 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
333 }
335 /* Obtain a reference to the currently-running domain. */
336 static inline struct domain *get_current_domain(void)
337 {
338 struct domain *d = current->domain;
339 get_knownalive_domain(d);
340 return d;
341 }
343 struct domain *domain_create(
344 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
345 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
346 #define _DOMCRF_hvm 0
347 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
348 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
349 #define _DOMCRF_hap 1
350 #define DOMCRF_hap (1U<<_DOMCRF_hap)
351 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
352 by tboot */
353 #define _DOMCRF_s3_integrity 2
354 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
355 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
356 #define _DOMCRF_dummy 3
357 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
359 /*
360 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
361 * This is the preferred function if the returned domain reference
362 * is short lived, but it cannot be used if the domain reference needs
363 * to be kept beyond the current scope (e.g., across a softirq).
364 * The returned domain reference must be discarded using rcu_unlock_domain().
365 */
366 struct domain *rcu_lock_domain_by_id(domid_t dom);
368 /*
369 * As above function, but accounts for current domain context:
370 * - Translates target DOMID_SELF into caller's domain id; and
371 * - Checks that caller has permission to act on the target domain.
372 */
373 int rcu_lock_target_domain_by_id(domid_t dom, struct domain **d);
375 /* Finish a RCU critical region started by rcu_lock_domain_by_id(). */
376 static inline void rcu_unlock_domain(struct domain *d)
377 {
378 rcu_read_unlock(&domlist_read_lock);
379 }
381 static inline struct domain *rcu_lock_domain(struct domain *d)
382 {
383 rcu_read_lock(d);
384 return d;
385 }
387 static inline struct domain *rcu_lock_current_domain(void)
388 {
389 return rcu_lock_domain(current->domain);
390 }
392 struct domain *get_domain_by_id(domid_t dom);
393 void domain_destroy(struct domain *d);
394 int domain_kill(struct domain *d);
395 void domain_shutdown(struct domain *d, u8 reason);
396 void domain_resume(struct domain *d);
397 void domain_pause_for_debugger(void);
399 int vcpu_start_shutdown_deferral(struct vcpu *v);
400 void vcpu_end_shutdown_deferral(struct vcpu *v);
402 /*
403 * Mark specified domain as crashed. This function always returns, even if the
404 * caller is the specified domain. The domain is not synchronously descheduled
405 * from any processor.
406 */
407 void __domain_crash(struct domain *d);
408 #define domain_crash(d) do { \
409 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
410 __domain_crash(d); \
411 } while (0)
413 /*
414 * Mark current domain as crashed and synchronously deschedule from the local
415 * processor. This function never returns.
416 */
417 void __domain_crash_synchronous(void) __attribute__((noreturn));
418 #define domain_crash_synchronous() do { \
419 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
420 __domain_crash_synchronous(); \
421 } while (0)
423 #define set_current_state(_s) do { current->state = (_s); } while (0)
424 void scheduler_init(void);
425 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
426 void sched_destroy_vcpu(struct vcpu *v);
427 int sched_init_domain(struct domain *d);
428 void sched_destroy_domain(struct domain *d);
429 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
430 int sched_id(void);
431 void vcpu_wake(struct vcpu *d);
432 void vcpu_sleep_nosync(struct vcpu *d);
433 void vcpu_sleep_sync(struct vcpu *d);
435 /*
436 * Force synchronisation of given VCPU's state. If it is currently descheduled,
437 * this call will ensure that all its state is committed to memory and that
438 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
439 */
440 void sync_vcpu_execstate(struct vcpu *v);
442 /*
443 * Called by the scheduler to switch to another VCPU. This function must
444 * call context_saved(@prev) when the local CPU is no longer running in
445 * @prev's context, and that context is saved to memory. Alternatively, if
446 * implementing lazy context switching, it suffices to ensure that invoking
447 * sync_vcpu_execstate() will switch and commit @prev's state.
448 */
449 void context_switch(
450 struct vcpu *prev,
451 struct vcpu *next);
453 /*
454 * As described above, context_switch() must call this function when the
455 * local CPU is no longer running in @prev's context, and @prev's context is
456 * saved to memory. Alternatively, if implementing lazy context switching,
457 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
458 */
459 void context_saved(struct vcpu *prev);
461 /* Called by the scheduler to continue running the current VCPU. */
462 void continue_running(
463 struct vcpu *same);
465 void startup_cpu_idle_loop(void);
467 /*
468 * Creates a continuation to resume the current hypercall. The caller should
469 * return immediately, propagating the value returned from this invocation.
470 * The format string specifies the types and number of hypercall arguments.
471 * It contains one character per argument as follows:
472 * 'i' [unsigned] {char, int}
473 * 'l' [unsigned] long
474 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
475 */
476 unsigned long hypercall_create_continuation(
477 unsigned int op, const char *format, ...);
479 #define hypercall_preempt_check() (unlikely( \
480 softirq_pending(smp_processor_id()) | \
481 local_events_need_delivery() \
482 ))
484 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
485 extern spinlock_t domlist_update_lock;
486 extern rcu_read_lock_t domlist_read_lock;
488 extern struct domain *domain_list;
490 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
491 #define for_each_domain(_d) \
492 for ( (_d) = rcu_dereference(domain_list); \
493 (_d) != NULL; \
494 (_d) = rcu_dereference((_d)->next_in_list )) \
496 #define for_each_vcpu(_d,_v) \
497 for ( (_v) = (_d)->vcpu[0]; \
498 (_v) != NULL; \
499 (_v) = (_v)->next_in_list )
501 /*
502 * Per-VCPU pause flags.
503 */
504 /* Domain is blocked waiting for an event. */
505 #define _VPF_blocked 0
506 #define VPF_blocked (1UL<<_VPF_blocked)
507 /* VCPU is offline. */
508 #define _VPF_down 1
509 #define VPF_down (1UL<<_VPF_down)
510 /* VCPU is blocked awaiting an event to be consumed by Xen. */
511 #define _VPF_blocked_in_xen 2
512 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
513 /* VCPU affinity has changed: migrating to a new CPU. */
514 #define _VPF_migrating 3
515 #define VPF_migrating (1UL<<_VPF_migrating)
517 static inline int vcpu_runnable(struct vcpu *v)
518 {
519 return !(v->pause_flags |
520 atomic_read(&v->pause_count) |
521 atomic_read(&v->domain->pause_count));
522 }
524 void vcpu_unblock(struct vcpu *v);
525 void vcpu_pause(struct vcpu *v);
526 void vcpu_pause_nosync(struct vcpu *v);
527 void domain_pause(struct domain *d);
528 void vcpu_unpause(struct vcpu *v);
529 void domain_unpause(struct domain *d);
530 void domain_pause_by_systemcontroller(struct domain *d);
531 void domain_unpause_by_systemcontroller(struct domain *d);
532 void cpu_init(void);
534 void vcpu_force_reschedule(struct vcpu *v);
535 void cpu_disable_scheduler(void);
536 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
537 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
538 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
539 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
541 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
542 uint64_t get_cpu_idle_time(unsigned int cpu);
544 #define IS_PRIV(_d) ((_d)->is_privileged)
545 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
547 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
549 #define is_hvm_domain(d) ((d)->is_hvm)
550 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
551 #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
553 extern enum cpufreq_controller {
554 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
555 } cpufreq_controller;
557 #endif /* __SCHED_H__ */
559 /*
560 * Local variables:
561 * mode: C
562 * c-set-style: "BSD"
563 * c-basic-offset: 4
564 * tab-width: 4
565 * indent-tabs-mode: nil
566 * End:
567 */