ia64/xen-unstable

view xen/include/xen/sched.h @ 17965:14fd83fe71c3

Add facility to get notification of domain suspend by event channel.
This event channel will be notified when the domain transitions to the
suspended state, which can be much faster than raising VIRQ_DOM_EXC
and waiting for the notification to be propagated via xenstore.

No attempt is made here to prevent multiple subscribers (last one
wins), or to detect that the subscriber has gone away. Userspace tools
should take care.

Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jul 04 12:00:24 2008 +0100 (2008-07-04)
parents 926a366ca82f
children a49673cd23d2
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <xen/shared.h>
10 #include <public/xen.h>
11 #include <public/domctl.h>
12 #include <public/vcpu.h>
13 #include <public/xsm/acm.h>
14 #include <xen/time.h>
15 #include <xen/timer.h>
16 #include <xen/grant_table.h>
17 #include <xen/rangeset.h>
18 #include <asm/domain.h>
19 #include <xen/xenoprof.h>
20 #include <xen/rcupdate.h>
21 #include <xen/irq.h>
23 #ifdef CONFIG_COMPAT
24 #include <compat/vcpu.h>
25 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
26 #endif
28 /* A global pointer to the initial domain (DOM0). */
29 extern struct domain *dom0;
31 #ifndef CONFIG_COMPAT
32 #define MAX_EVTCHNS(d) NR_EVENT_CHANNELS
33 #else
34 #define MAX_EVTCHNS(d) (!IS_COMPAT(d) ? \
35 NR_EVENT_CHANNELS : \
36 sizeof(unsigned int) * sizeof(unsigned int) * 64)
37 #endif
38 #define EVTCHNS_PER_BUCKET 128
39 #define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
41 struct evtchn
42 {
43 #define ECS_FREE 0 /* Channel is available for use. */
44 #define ECS_RESERVED 1 /* Channel is reserved. */
45 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
46 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
47 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
48 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
49 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
50 u8 state; /* ECS_* */
51 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
52 u16 notify_vcpu_id; /* VCPU for local delivery notification */
53 union {
54 struct {
55 domid_t remote_domid;
56 } unbound; /* state == ECS_UNBOUND */
57 struct {
58 u16 remote_port;
59 struct domain *remote_dom;
60 } interdomain; /* state == ECS_INTERDOMAIN */
61 u16 pirq; /* state == ECS_PIRQ */
62 u16 virq; /* state == ECS_VIRQ */
63 } u;
64 #ifdef FLASK_ENABLE
65 void *ssid;
66 #endif
67 };
69 int evtchn_init(struct domain *d);
70 void evtchn_destroy(struct domain *d);
72 struct vcpu
73 {
74 int vcpu_id;
76 int processor;
78 vcpu_info_t *vcpu_info;
80 struct domain *domain;
82 struct vcpu *next_in_list;
84 uint64_t periodic_period;
85 uint64_t periodic_last_event;
86 struct timer periodic_timer;
87 struct timer singleshot_timer;
89 struct timer poll_timer; /* timeout for SCHEDOP_poll */
91 void *sched_priv; /* scheduler-specific data */
93 struct vcpu_runstate_info runstate;
94 #ifndef CONFIG_COMPAT
95 # define runstate_guest(v) ((v)->runstate_guest)
96 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
97 #else
98 # define runstate_guest(v) ((v)->runstate_guest.native)
99 union {
100 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
101 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
102 } runstate_guest; /* guest address */
103 #endif
105 /* Has the FPU been initialised? */
106 bool_t fpu_initialised;
107 /* Has the FPU been used since it was last saved? */
108 bool_t fpu_dirtied;
109 /* Is this VCPU polling any event channels (SCHEDOP_poll)? */
110 bool_t is_polling;
111 /* Initialization completed for this VCPU? */
112 bool_t is_initialised;
113 /* Currently running on a CPU? */
114 bool_t is_running;
115 /* NMI callback pending for this VCPU? */
116 bool_t nmi_pending;
117 /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
118 bool_t nmi_masked;
119 /* Require shutdown to be deferred for some asynchronous operation? */
120 bool_t defer_shutdown;
121 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
122 bool_t paused_for_shutdown;
123 /* VCPU affinity is temporarily locked from controller changes? */
124 bool_t affinity_locked;
126 unsigned long pause_flags;
127 atomic_t pause_count;
129 u16 virq_to_evtchn[NR_VIRQS];
131 /* Bitmask of CPUs on which this VCPU may run. */
132 cpumask_t cpu_affinity;
133 /* Used to change affinity temporarily. */
134 cpumask_t cpu_affinity_tmp;
136 /* Bitmask of CPUs which are holding onto this VCPU's state. */
137 cpumask_t vcpu_dirty_cpumask;
139 struct arch_vcpu arch;
140 };
142 /* Per-domain lock can be recursively acquired in fault handlers. */
143 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
144 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
145 #define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
147 struct domain
148 {
149 domid_t domain_id;
151 shared_info_t *shared_info; /* shared data area */
153 spinlock_t domain_lock;
155 spinlock_t page_alloc_lock; /* protects all the following fields */
156 struct list_head page_list; /* linked list, of size tot_pages */
157 struct list_head xenpage_list; /* linked list, of size xenheap_pages */
158 unsigned int tot_pages; /* number of pages currently possesed */
159 unsigned int max_pages; /* maximum value for tot_pages */
160 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
162 /* Scheduling. */
163 void *sched_priv; /* scheduler-specific data */
165 struct domain *next_in_list;
166 struct domain *next_in_hashbucket;
168 struct list_head rangesets;
169 spinlock_t rangesets_lock;
171 /* Event channel information. */
172 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
173 spinlock_t evtchn_lock;
175 struct grant_table *grant_table;
177 /*
178 * Interrupt to event-channel mappings. Updates should be protected by the
179 * domain's event-channel spinlock. Read accesses can also synchronise on
180 * the lock, but races don't usually matter.
181 */
182 u16 pirq_to_evtchn[NR_IRQS];
183 DECLARE_BITMAP(pirq_mask, NR_IRQS);
185 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
186 struct rangeset *iomem_caps;
187 struct rangeset *irq_caps;
189 /* Is this an HVM guest? */
190 bool_t is_hvm;
191 /* Does this guest need iommu mappings? */
192 bool_t need_iommu;
193 /* Is this guest fully privileged (aka dom0)? */
194 bool_t is_privileged;
195 /* Which guest this guest has privileges on */
196 struct domain *target;
197 /* Is this guest being debugged by dom0? */
198 bool_t debugger_attached;
199 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
200 bool_t is_polling;
201 /* Is this guest dying (i.e., a zombie)? */
202 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
203 /* Domain is paused by controller software? */
204 bool_t is_paused_by_controller;
205 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
206 bool_t is_pinned;
208 /* Guest has shut down (inc. reason code)? */
209 spinlock_t shutdown_lock;
210 bool_t is_shutting_down; /* in process of shutting down? */
211 bool_t is_shut_down; /* fully shut down? */
212 int shutdown_code;
214 /* If this is not 0, send suspend notification here instead of
215 * raising DOM_EXC */
216 int suspend_evtchn;
218 atomic_t pause_count;
220 unsigned long vm_assist;
222 atomic_t refcnt;
224 struct vcpu *vcpu[MAX_VIRT_CPUS];
226 /* Bitmask of CPUs which are holding onto this domain's state. */
227 cpumask_t domain_dirty_cpumask;
229 struct arch_domain arch;
231 void *ssid; /* sHype security subject identifier */
233 /* Control-plane tools handle for this domain. */
234 xen_domain_handle_t handle;
236 /* OProfile support. */
237 struct xenoprof *xenoprof;
238 int32_t time_offset_seconds;
240 struct rcu_head rcu;
242 /*
243 * Hypercall deadlock avoidance lock. Used if a hypercall might
244 * cause a deadlock. Acquirers don't spin waiting; they preempt.
245 */
246 spinlock_t hypercall_deadlock_mutex;
248 /* VRAM dirty support. */
249 struct sh_dirty_vram *dirty_vram;
250 };
252 struct domain_setup_info
253 {
254 /* Initialised by caller. */
255 unsigned long image_addr;
256 unsigned long image_len;
257 /* Initialised by loader: Public. */
258 unsigned long v_start;
259 unsigned long v_end;
260 unsigned long v_kernstart;
261 unsigned long v_kernend;
262 unsigned long v_kernentry;
263 #define PAEKERN_no 0
264 #define PAEKERN_yes 1
265 #define PAEKERN_extended_cr3 2
266 #define PAEKERN_bimodal 3
267 unsigned int pae_kernel;
268 /* Initialised by loader: Private. */
269 unsigned long elf_paddr_offset;
270 unsigned int load_symtab;
271 unsigned long symtab_addr;
272 unsigned long symtab_len;
273 };
275 extern struct vcpu *idle_vcpu[NR_CPUS];
276 #define IDLE_DOMAIN_ID (0x7FFFU)
277 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
278 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
280 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
281 #define put_domain(_d) \
282 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
284 /*
285 * Use this when you don't have an existing reference to @d. It returns
286 * FALSE if @d is being destroyed.
287 */
288 static always_inline int get_domain(struct domain *d)
289 {
290 atomic_t old, new, seen = d->refcnt;
291 do
292 {
293 old = seen;
294 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
295 return 0;
296 _atomic_set(new, _atomic_read(old) + 1);
297 seen = atomic_compareandswap(old, new, &d->refcnt);
298 }
299 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
300 return 1;
301 }
303 /*
304 * Use this when you already have, or are borrowing, a reference to @d.
305 * In this case we know that @d cannot be destroyed under our feet.
306 */
307 static inline void get_knownalive_domain(struct domain *d)
308 {
309 atomic_inc(&d->refcnt);
310 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
311 }
313 /* Obtain a reference to the currently-running domain. */
314 static inline struct domain *get_current_domain(void)
315 {
316 struct domain *d = current->domain;
317 get_knownalive_domain(d);
318 return d;
319 }
321 struct domain *domain_create(
322 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
323 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
324 #define _DOMCRF_hvm 0
325 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
326 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
327 #define _DOMCRF_hap 1
328 #define DOMCRF_hap (1U<<_DOMCRF_hap)
329 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
330 #define _DOMCRF_dummy 2
331 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
333 int construct_dom0(
334 struct domain *d,
335 unsigned long image_start, unsigned long image_len,
336 unsigned long initrd_start, unsigned long initrd_len,
337 char *cmdline);
339 /*
340 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
341 * This is the preferred function if the returned domain reference
342 * is short lived, but it cannot be used if the domain reference needs
343 * to be kept beyond the current scope (e.g., across a softirq).
344 * The returned domain reference must be discarded using rcu_unlock_domain().
345 */
346 struct domain *rcu_lock_domain_by_id(domid_t dom);
348 /* Finish a RCU critical region started by rcu_lock_domain_by_id(). */
349 static inline void rcu_unlock_domain(struct domain *d)
350 {
351 rcu_read_unlock(&domlist_read_lock);
352 }
354 static inline struct domain *rcu_lock_domain(struct domain *d)
355 {
356 rcu_read_lock(d);
357 return d;
358 }
360 static inline struct domain *rcu_lock_current_domain(void)
361 {
362 return rcu_lock_domain(current->domain);
363 }
365 struct domain *get_domain_by_id(domid_t dom);
366 void domain_destroy(struct domain *d);
367 int domain_kill(struct domain *d);
368 void domain_shutdown(struct domain *d, u8 reason);
369 void domain_resume(struct domain *d);
370 void domain_pause_for_debugger(void);
372 int vcpu_start_shutdown_deferral(struct vcpu *v);
373 void vcpu_end_shutdown_deferral(struct vcpu *v);
375 /*
376 * Mark specified domain as crashed. This function always returns, even if the
377 * caller is the specified domain. The domain is not synchronously descheduled
378 * from any processor.
379 */
380 void __domain_crash(struct domain *d);
381 #define domain_crash(d) do { \
382 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
383 __domain_crash(d); \
384 } while (0)
386 /*
387 * Mark current domain as crashed and synchronously deschedule from the local
388 * processor. This function never returns.
389 */
390 void __domain_crash_synchronous(void) __attribute__((noreturn));
391 #define domain_crash_synchronous() do { \
392 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
393 __domain_crash_synchronous(); \
394 } while (0)
396 #define set_current_state(_s) do { current->state = (_s); } while (0)
397 void scheduler_init(void);
398 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
399 void sched_destroy_vcpu(struct vcpu *v);
400 int sched_init_domain(struct domain *d);
401 void sched_destroy_domain(struct domain *d);
402 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
403 int sched_id(void);
404 void vcpu_wake(struct vcpu *d);
405 void vcpu_sleep_nosync(struct vcpu *d);
406 void vcpu_sleep_sync(struct vcpu *d);
408 /*
409 * Force synchronisation of given VCPU's state. If it is currently descheduled,
410 * this call will ensure that all its state is committed to memory and that
411 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
412 */
413 void sync_vcpu_execstate(struct vcpu *v);
415 /*
416 * Called by the scheduler to switch to another VCPU. This function must
417 * call context_saved(@prev) when the local CPU is no longer running in
418 * @prev's context, and that context is saved to memory. Alternatively, if
419 * implementing lazy context switching, it suffices to ensure that invoking
420 * sync_vcpu_execstate() will switch and commit @prev's state.
421 */
422 void context_switch(
423 struct vcpu *prev,
424 struct vcpu *next);
426 /*
427 * As described above, context_switch() must call this function when the
428 * local CPU is no longer running in @prev's context, and @prev's context is
429 * saved to memory. Alternatively, if implementing lazy context switching,
430 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
431 */
432 void context_saved(struct vcpu *prev);
434 /* Called by the scheduler to continue running the current VCPU. */
435 void continue_running(
436 struct vcpu *same);
438 void startup_cpu_idle_loop(void);
440 /*
441 * Creates a continuation to resume the current hypercall. The caller should
442 * return immediately, propagating the value returned from this invocation.
443 * The format string specifies the types and number of hypercall arguments.
444 * It contains one character per argument as follows:
445 * 'i' [unsigned] {char, int}
446 * 'l' [unsigned] long
447 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
448 */
449 unsigned long hypercall_create_continuation(
450 unsigned int op, const char *format, ...);
452 #define hypercall_preempt_check() (unlikely( \
453 softirq_pending(smp_processor_id()) | \
454 local_events_need_delivery() \
455 ))
457 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
458 extern spinlock_t domlist_update_lock;
459 extern rcu_read_lock_t domlist_read_lock;
461 extern struct domain *domain_list;
463 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
464 #define for_each_domain(_d) \
465 for ( (_d) = rcu_dereference(domain_list); \
466 (_d) != NULL; \
467 (_d) = rcu_dereference((_d)->next_in_list )) \
469 #define for_each_vcpu(_d,_v) \
470 for ( (_v) = (_d)->vcpu[0]; \
471 (_v) != NULL; \
472 (_v) = (_v)->next_in_list )
474 /*
475 * Per-VCPU pause flags.
476 */
477 /* Domain is blocked waiting for an event. */
478 #define _VPF_blocked 0
479 #define VPF_blocked (1UL<<_VPF_blocked)
480 /* VCPU is offline. */
481 #define _VPF_down 1
482 #define VPF_down (1UL<<_VPF_down)
483 /* VCPU is blocked awaiting an event to be consumed by Xen. */
484 #define _VPF_blocked_in_xen 2
485 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
486 /* VCPU affinity has changed: migrating to a new CPU. */
487 #define _VPF_migrating 3
488 #define VPF_migrating (1UL<<_VPF_migrating)
490 static inline int vcpu_runnable(struct vcpu *v)
491 {
492 return !(v->pause_flags |
493 atomic_read(&v->pause_count) |
494 atomic_read(&v->domain->pause_count));
495 }
497 void vcpu_pause(struct vcpu *v);
498 void vcpu_pause_nosync(struct vcpu *v);
499 void domain_pause(struct domain *d);
500 void vcpu_unpause(struct vcpu *v);
501 void domain_unpause(struct domain *d);
502 void domain_pause_by_systemcontroller(struct domain *d);
503 void domain_unpause_by_systemcontroller(struct domain *d);
504 void cpu_init(void);
506 void vcpu_force_reschedule(struct vcpu *v);
507 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
508 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
509 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
511 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
513 static inline void vcpu_unblock(struct vcpu *v)
514 {
515 if ( test_and_clear_bit(_VPF_blocked, &v->pause_flags) )
516 vcpu_wake(v);
517 }
519 #define IS_PRIV(_d) ((_d)->is_privileged)
520 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
522 #ifndef IS_COMPAT
523 #define IS_COMPAT(d) 0
524 #endif
526 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
528 #define is_hvm_domain(d) ((d)->is_hvm)
529 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
530 #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
532 extern enum cpufreq_controller {
533 FREQCTL_none, FREQCTL_dom0_kernel
534 } cpufreq_controller;
536 #endif /* __SCHED_H__ */
538 /*
539 * Local variables:
540 * mode: C
541 * c-set-style: "BSD"
542 * c-basic-offset: 4
543 * tab-width: 4
544 * indent-tabs-mode: nil
545 * End:
546 */