ia64/xen-unstable

view xen/include/xen/sched.h @ 19738:8dd5c3cae086

x86 hvm: move dirty_vram into struct hvm_domain

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:04:03 2009 +0100 (2009-06-05)
parents 6705898f768d
children 2f9e1348aa98
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <xen/shared.h>
10 #include <public/xen.h>
11 #include <public/domctl.h>
12 #include <public/vcpu.h>
13 #include <public/xsm/acm.h>
14 #include <xen/time.h>
15 #include <xen/timer.h>
16 #include <xen/grant_table.h>
17 #include <xen/rangeset.h>
18 #include <asm/domain.h>
19 #include <xen/xenoprof.h>
20 #include <xen/rcupdate.h>
21 #include <xen/irq.h>
22 #include <xen/mm.h>
24 #ifdef CONFIG_COMPAT
25 #include <compat/vcpu.h>
26 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
27 #endif
29 /* A global pointer to the initial domain (DOM0). */
30 extern struct domain *dom0;
32 #ifndef CONFIG_COMPAT
33 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_LONG
34 #else
35 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_LONG)
36 #endif
37 #define MAX_EVTCHNS(d) (BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d))
38 #define EVTCHNS_PER_BUCKET 128
39 #define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
41 struct evtchn
42 {
43 #define ECS_FREE 0 /* Channel is available for use. */
44 #define ECS_RESERVED 1 /* Channel is reserved. */
45 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
46 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
47 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
48 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
49 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
50 u8 state; /* ECS_* */
51 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
52 u16 notify_vcpu_id; /* VCPU for local delivery notification */
53 union {
54 struct {
55 domid_t remote_domid;
56 } unbound; /* state == ECS_UNBOUND */
57 struct {
58 u16 remote_port;
59 struct domain *remote_dom;
60 } interdomain; /* state == ECS_INTERDOMAIN */
61 u16 pirq; /* state == ECS_PIRQ */
62 u16 virq; /* state == ECS_VIRQ */
63 } u;
64 #ifdef FLASK_ENABLE
65 void *ssid;
66 #endif
67 };
69 int evtchn_init(struct domain *d);
70 void evtchn_destroy(struct domain *d);
72 struct vcpu
73 {
74 int vcpu_id;
76 int processor;
78 vcpu_info_t *vcpu_info;
80 struct domain *domain;
82 struct vcpu *next_in_list;
84 uint64_t periodic_period;
85 uint64_t periodic_last_event;
86 struct timer periodic_timer;
87 struct timer singleshot_timer;
89 struct timer poll_timer; /* timeout for SCHEDOP_poll */
91 void *sched_priv; /* scheduler-specific data */
93 struct vcpu_runstate_info runstate;
94 #ifndef CONFIG_COMPAT
95 # define runstate_guest(v) ((v)->runstate_guest)
96 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
97 #else
98 # define runstate_guest(v) ((v)->runstate_guest.native)
99 union {
100 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
101 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
102 } runstate_guest; /* guest address */
103 #endif
105 /* last time when vCPU is scheduled out */
106 uint64_t last_run_time;
108 /* Has the FPU been initialised? */
109 bool_t fpu_initialised;
110 /* Has the FPU been used since it was last saved? */
111 bool_t fpu_dirtied;
112 /* Initialization completed for this VCPU? */
113 bool_t is_initialised;
114 /* Currently running on a CPU? */
115 bool_t is_running;
116 /* MCE callback pending for this VCPU? */
117 bool_t mce_pending;
118 /* NMI callback pending for this VCPU? */
119 bool_t nmi_pending;
121 /* Higher priorized traps may interrupt lower priorized traps,
122 * lower priorized traps wait until higher priorized traps finished.
123 * Note: This concept is known as "system priority level" (spl)
124 * in the UNIX world. */
125 uint16_t old_trap_priority;
126 uint16_t trap_priority;
127 #define VCPU_TRAP_NONE 0
128 #define VCPU_TRAP_NMI 1
129 #define VCPU_TRAP_MCE 2
131 /* Require shutdown to be deferred for some asynchronous operation? */
132 bool_t defer_shutdown;
133 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
134 bool_t paused_for_shutdown;
135 /* VCPU affinity is temporarily locked from controller changes? */
136 bool_t affinity_locked;
138 /*
139 * > 0: a single port is being polled;
140 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
141 * < 0: multiple ports may be being polled.
142 */
143 int poll_evtchn;
145 unsigned long pause_flags;
146 atomic_t pause_count;
148 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
149 u16 virq_to_evtchn[NR_VIRQS];
150 spinlock_t virq_lock;
152 /* Bitmask of CPUs on which this VCPU may run. */
153 cpumask_t cpu_affinity;
154 /* Used to change affinity temporarily. */
155 cpumask_t cpu_affinity_tmp;
157 /* Bitmask of CPUs which are holding onto this VCPU's state. */
158 cpumask_t vcpu_dirty_cpumask;
160 struct arch_vcpu arch;
161 };
163 /* Per-domain lock can be recursively acquired in fault handlers. */
164 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
165 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
166 #define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
168 struct domain
169 {
170 domid_t domain_id;
172 shared_info_t *shared_info; /* shared data area */
174 spinlock_t domain_lock;
176 spinlock_t page_alloc_lock; /* protects all the following fields */
177 struct page_list_head page_list; /* linked list, of size tot_pages */
178 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
179 unsigned int tot_pages; /* number of pages currently possesed */
180 unsigned int max_pages; /* maximum value for tot_pages */
181 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
183 /* Scheduling. */
184 void *sched_priv; /* scheduler-specific data */
186 struct domain *next_in_list;
187 struct domain *next_in_hashbucket;
189 struct list_head rangesets;
190 spinlock_t rangesets_lock;
192 /* Event channel information. */
193 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
194 spinlock_t event_lock;
196 struct grant_table *grant_table;
198 /*
199 * Interrupt to event-channel mappings. Updates should be protected by the
200 * domain's event-channel spinlock. Read accesses can also synchronise on
201 * the lock, but races don't usually matter.
202 */
203 unsigned int nr_pirqs;
204 u16 *pirq_to_evtchn;
205 unsigned long *pirq_mask;
207 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
208 struct rangeset *iomem_caps;
209 struct rangeset *irq_caps;
211 /* Is this an HVM guest? */
212 bool_t is_hvm;
213 /* Does this guest need iommu mappings? */
214 bool_t need_iommu;
215 /* Is this guest fully privileged (aka dom0)? */
216 bool_t is_privileged;
217 /* Which guest this guest has privileges on */
218 struct domain *target;
219 /* Is this guest being debugged by dom0? */
220 bool_t debugger_attached;
221 /* Is this guest dying (i.e., a zombie)? */
222 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
223 /* Domain is paused by controller software? */
224 bool_t is_paused_by_controller;
225 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
226 bool_t is_pinned;
228 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
229 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
231 /* Guest has shut down (inc. reason code)? */
232 spinlock_t shutdown_lock;
233 bool_t is_shutting_down; /* in process of shutting down? */
234 bool_t is_shut_down; /* fully shut down? */
235 int shutdown_code;
237 /* If this is not 0, send suspend notification here instead of
238 * raising DOM_EXC */
239 int suspend_evtchn;
241 atomic_t pause_count;
243 unsigned long vm_assist;
245 atomic_t refcnt;
247 struct vcpu *vcpu[MAX_VIRT_CPUS];
249 /* Bitmask of CPUs which are holding onto this domain's state. */
250 cpumask_t domain_dirty_cpumask;
252 struct arch_domain arch;
254 void *ssid; /* sHype security subject identifier */
256 /* Control-plane tools handle for this domain. */
257 xen_domain_handle_t handle;
259 /* OProfile support. */
260 struct xenoprof *xenoprof;
261 int32_t time_offset_seconds;
263 struct rcu_head rcu;
265 /*
266 * Hypercall deadlock avoidance lock. Used if a hypercall might
267 * cause a deadlock. Acquirers don't spin waiting; they preempt.
268 */
269 spinlock_t hypercall_deadlock_mutex;
271 /* transcendent memory, auto-allocated on first tmem op by each domain */
272 void *tmem;
273 };
275 struct domain_setup_info
276 {
277 /* Initialised by caller. */
278 unsigned long image_addr;
279 unsigned long image_len;
280 /* Initialised by loader: Public. */
281 unsigned long v_start;
282 unsigned long v_end;
283 unsigned long v_kernstart;
284 unsigned long v_kernend;
285 unsigned long v_kernentry;
286 #define PAEKERN_no 0
287 #define PAEKERN_yes 1
288 #define PAEKERN_extended_cr3 2
289 #define PAEKERN_bimodal 3
290 unsigned int pae_kernel;
291 /* Initialised by loader: Private. */
292 unsigned long elf_paddr_offset;
293 unsigned int load_symtab;
294 unsigned long symtab_addr;
295 unsigned long symtab_len;
296 };
298 extern struct vcpu *idle_vcpu[NR_CPUS];
299 #define IDLE_DOMAIN_ID (0x7FFFU)
300 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
301 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
303 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
304 #define put_domain(_d) \
305 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
307 /*
308 * Use this when you don't have an existing reference to @d. It returns
309 * FALSE if @d is being destroyed.
310 */
311 static always_inline int get_domain(struct domain *d)
312 {
313 atomic_t old, new, seen = d->refcnt;
314 do
315 {
316 old = seen;
317 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
318 return 0;
319 _atomic_set(new, _atomic_read(old) + 1);
320 seen = atomic_compareandswap(old, new, &d->refcnt);
321 }
322 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
323 return 1;
324 }
326 /*
327 * Use this when you already have, or are borrowing, a reference to @d.
328 * In this case we know that @d cannot be destroyed under our feet.
329 */
330 static inline void get_knownalive_domain(struct domain *d)
331 {
332 atomic_inc(&d->refcnt);
333 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
334 }
336 /* Obtain a reference to the currently-running domain. */
337 static inline struct domain *get_current_domain(void)
338 {
339 struct domain *d = current->domain;
340 get_knownalive_domain(d);
341 return d;
342 }
344 struct domain *domain_create(
345 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
346 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
347 #define _DOMCRF_hvm 0
348 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
349 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
350 #define _DOMCRF_hap 1
351 #define DOMCRF_hap (1U<<_DOMCRF_hap)
352 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
353 by tboot */
354 #define _DOMCRF_s3_integrity 2
355 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
356 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
357 #define _DOMCRF_dummy 3
358 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
360 /*
361 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
362 * This is the preferred function if the returned domain reference
363 * is short lived, but it cannot be used if the domain reference needs
364 * to be kept beyond the current scope (e.g., across a softirq).
365 * The returned domain reference must be discarded using rcu_unlock_domain().
366 */
367 struct domain *rcu_lock_domain_by_id(domid_t dom);
369 /*
370 * As above function, but accounts for current domain context:
371 * - Translates target DOMID_SELF into caller's domain id; and
372 * - Checks that caller has permission to act on the target domain.
373 */
374 int rcu_lock_target_domain_by_id(domid_t dom, struct domain **d);
376 /* Finish a RCU critical region started by rcu_lock_domain_by_id(). */
377 static inline void rcu_unlock_domain(struct domain *d)
378 {
379 rcu_read_unlock(&domlist_read_lock);
380 }
382 static inline struct domain *rcu_lock_domain(struct domain *d)
383 {
384 rcu_read_lock(d);
385 return d;
386 }
388 static inline struct domain *rcu_lock_current_domain(void)
389 {
390 return rcu_lock_domain(current->domain);
391 }
393 struct domain *get_domain_by_id(domid_t dom);
394 void domain_destroy(struct domain *d);
395 int domain_kill(struct domain *d);
396 void domain_shutdown(struct domain *d, u8 reason);
397 void domain_resume(struct domain *d);
398 void domain_pause_for_debugger(void);
400 int vcpu_start_shutdown_deferral(struct vcpu *v);
401 void vcpu_end_shutdown_deferral(struct vcpu *v);
403 /*
404 * Mark specified domain as crashed. This function always returns, even if the
405 * caller is the specified domain. The domain is not synchronously descheduled
406 * from any processor.
407 */
408 void __domain_crash(struct domain *d);
409 #define domain_crash(d) do { \
410 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
411 __domain_crash(d); \
412 } while (0)
414 /*
415 * Mark current domain as crashed and synchronously deschedule from the local
416 * processor. This function never returns.
417 */
418 void __domain_crash_synchronous(void) __attribute__((noreturn));
419 #define domain_crash_synchronous() do { \
420 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
421 __domain_crash_synchronous(); \
422 } while (0)
424 #define set_current_state(_s) do { current->state = (_s); } while (0)
425 void scheduler_init(void);
426 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
427 void sched_destroy_vcpu(struct vcpu *v);
428 int sched_init_domain(struct domain *d);
429 void sched_destroy_domain(struct domain *d);
430 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
431 int sched_id(void);
432 void sched_tick_suspend(void);
433 void sched_tick_resume(void);
434 void vcpu_wake(struct vcpu *d);
435 void vcpu_sleep_nosync(struct vcpu *d);
436 void vcpu_sleep_sync(struct vcpu *d);
438 /*
439 * Force synchronisation of given VCPU's state. If it is currently descheduled,
440 * this call will ensure that all its state is committed to memory and that
441 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
442 */
443 void sync_vcpu_execstate(struct vcpu *v);
445 /*
446 * Called by the scheduler to switch to another VCPU. This function must
447 * call context_saved(@prev) when the local CPU is no longer running in
448 * @prev's context, and that context is saved to memory. Alternatively, if
449 * implementing lazy context switching, it suffices to ensure that invoking
450 * sync_vcpu_execstate() will switch and commit @prev's state.
451 */
452 void context_switch(
453 struct vcpu *prev,
454 struct vcpu *next);
456 /*
457 * As described above, context_switch() must call this function when the
458 * local CPU is no longer running in @prev's context, and @prev's context is
459 * saved to memory. Alternatively, if implementing lazy context switching,
460 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
461 */
462 void context_saved(struct vcpu *prev);
464 /* Called by the scheduler to continue running the current VCPU. */
465 void continue_running(
466 struct vcpu *same);
468 void startup_cpu_idle_loop(void);
470 /*
471 * Creates a continuation to resume the current hypercall. The caller should
472 * return immediately, propagating the value returned from this invocation.
473 * The format string specifies the types and number of hypercall arguments.
474 * It contains one character per argument as follows:
475 * 'i' [unsigned] {char, int}
476 * 'l' [unsigned] long
477 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
478 */
479 unsigned long hypercall_create_continuation(
480 unsigned int op, const char *format, ...);
482 #define hypercall_preempt_check() (unlikely( \
483 softirq_pending(smp_processor_id()) | \
484 local_events_need_delivery() \
485 ))
487 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
488 extern spinlock_t domlist_update_lock;
489 extern rcu_read_lock_t domlist_read_lock;
491 extern struct domain *domain_list;
493 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
494 #define for_each_domain(_d) \
495 for ( (_d) = rcu_dereference(domain_list); \
496 (_d) != NULL; \
497 (_d) = rcu_dereference((_d)->next_in_list )) \
499 #define for_each_vcpu(_d,_v) \
500 for ( (_v) = (_d)->vcpu[0]; \
501 (_v) != NULL; \
502 (_v) = (_v)->next_in_list )
504 /*
505 * Per-VCPU pause flags.
506 */
507 /* Domain is blocked waiting for an event. */
508 #define _VPF_blocked 0
509 #define VPF_blocked (1UL<<_VPF_blocked)
510 /* VCPU is offline. */
511 #define _VPF_down 1
512 #define VPF_down (1UL<<_VPF_down)
513 /* VCPU is blocked awaiting an event to be consumed by Xen. */
514 #define _VPF_blocked_in_xen 2
515 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
516 /* VCPU affinity has changed: migrating to a new CPU. */
517 #define _VPF_migrating 3
518 #define VPF_migrating (1UL<<_VPF_migrating)
520 static inline int vcpu_runnable(struct vcpu *v)
521 {
522 return !(v->pause_flags |
523 atomic_read(&v->pause_count) |
524 atomic_read(&v->domain->pause_count));
525 }
527 void vcpu_unblock(struct vcpu *v);
528 void vcpu_pause(struct vcpu *v);
529 void vcpu_pause_nosync(struct vcpu *v);
530 void domain_pause(struct domain *d);
531 void vcpu_unpause(struct vcpu *v);
532 void domain_unpause(struct domain *d);
533 void domain_pause_by_systemcontroller(struct domain *d);
534 void domain_unpause_by_systemcontroller(struct domain *d);
535 void cpu_init(void);
537 void vcpu_force_reschedule(struct vcpu *v);
538 void cpu_disable_scheduler(void);
539 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
540 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
541 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
542 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
544 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
545 uint64_t get_cpu_idle_time(unsigned int cpu);
547 #define IS_PRIV(_d) ((_d)->is_privileged)
548 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
550 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
552 #define is_hvm_domain(d) ((d)->is_hvm)
553 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
554 #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
556 void set_vcpu_migration_delay(unsigned int delay);
557 unsigned int get_vcpu_migration_delay(void);
559 extern int sched_smt_power_savings;
561 extern enum cpufreq_controller {
562 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
563 } cpufreq_controller;
565 #endif /* __SCHED_H__ */
567 /*
568 * Local variables:
569 * mode: C
570 * c-set-style: "BSD"
571 * c-basic-offset: 4
572 * tab-width: 4
573 * indent-tabs-mode: nil
574 * End:
575 */