ia64/xen-unstable

view xen/include/xen/sched.h @ 19800:78962f85c562

IOMMU: Add two generic functions to vendor neutral interface

Add 2 generic functions into the vendor neutral iommu interface, The
reason is that from changeset 19732, there is only one global flag
"iommu_enabled" that controls iommu enablement for both vtd and amd
systems, so we need different code paths for vtd and amd iommu systems
if this flag has been turned on. Also, the early checking of
"iommu_enabled" in iommu_setup() is removed to prevent iommu
functionalities from been disabled on amd systems.

Signed-off-by: Wei Wang <wei.wang2@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 19 08:41:50 2009 +0100 (2009-06-19)
parents 2f9e1348aa98
children
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <xen/shared.h>
10 #include <public/xen.h>
11 #include <public/domctl.h>
12 #include <public/vcpu.h>
13 #include <public/xsm/acm.h>
14 #include <xen/time.h>
15 #include <xen/timer.h>
16 #include <xen/grant_table.h>
17 #include <xen/rangeset.h>
18 #include <asm/domain.h>
19 #include <xen/xenoprof.h>
20 #include <xen/rcupdate.h>
21 #include <xen/irq.h>
22 #include <xen/mm.h>
24 #ifdef CONFIG_COMPAT
25 #include <compat/vcpu.h>
26 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
27 #endif
29 /* A global pointer to the initial domain (DOM0). */
30 extern struct domain *dom0;
32 #ifndef CONFIG_COMPAT
33 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_LONG
34 #else
35 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_LONG)
36 #endif
37 #define MAX_EVTCHNS(d) (BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d))
38 #define EVTCHNS_PER_BUCKET 128
39 #define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
41 struct evtchn
42 {
43 #define ECS_FREE 0 /* Channel is available for use. */
44 #define ECS_RESERVED 1 /* Channel is reserved. */
45 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
46 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
47 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
48 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
49 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
50 u8 state; /* ECS_* */
51 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
52 u16 notify_vcpu_id; /* VCPU for local delivery notification */
53 union {
54 struct {
55 domid_t remote_domid;
56 } unbound; /* state == ECS_UNBOUND */
57 struct {
58 u16 remote_port;
59 struct domain *remote_dom;
60 } interdomain; /* state == ECS_INTERDOMAIN */
61 u16 pirq; /* state == ECS_PIRQ */
62 u16 virq; /* state == ECS_VIRQ */
63 } u;
64 #ifdef FLASK_ENABLE
65 void *ssid;
66 #endif
67 };
69 int evtchn_init(struct domain *d);
70 void evtchn_destroy(struct domain *d);
72 struct vcpu
73 {
74 int vcpu_id;
76 int processor;
78 vcpu_info_t *vcpu_info;
80 struct domain *domain;
82 struct vcpu *next_in_list;
84 uint64_t periodic_period;
85 uint64_t periodic_last_event;
86 struct timer periodic_timer;
87 struct timer singleshot_timer;
89 struct timer poll_timer; /* timeout for SCHEDOP_poll */
91 void *sched_priv; /* scheduler-specific data */
93 struct vcpu_runstate_info runstate;
94 #ifndef CONFIG_COMPAT
95 # define runstate_guest(v) ((v)->runstate_guest)
96 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
97 #else
98 # define runstate_guest(v) ((v)->runstate_guest.native)
99 union {
100 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
101 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
102 } runstate_guest; /* guest address */
103 #endif
105 /* last time when vCPU is scheduled out */
106 uint64_t last_run_time;
108 /* Has the FPU been initialised? */
109 bool_t fpu_initialised;
110 /* Has the FPU been used since it was last saved? */
111 bool_t fpu_dirtied;
112 /* Initialization completed for this VCPU? */
113 bool_t is_initialised;
114 /* Currently running on a CPU? */
115 bool_t is_running;
116 /* MCE callback pending for this VCPU? */
117 bool_t mce_pending;
118 /* NMI callback pending for this VCPU? */
119 bool_t nmi_pending;
121 /* Higher priorized traps may interrupt lower priorized traps,
122 * lower priorized traps wait until higher priorized traps finished.
123 * Note: This concept is known as "system priority level" (spl)
124 * in the UNIX world. */
125 uint16_t old_trap_priority;
126 uint16_t trap_priority;
127 #define VCPU_TRAP_NONE 0
128 #define VCPU_TRAP_NMI 1
129 #define VCPU_TRAP_MCE 2
131 /* Require shutdown to be deferred for some asynchronous operation? */
132 bool_t defer_shutdown;
133 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
134 bool_t paused_for_shutdown;
135 /* VCPU affinity is temporarily locked from controller changes? */
136 bool_t affinity_locked;
138 /*
139 * > 0: a single port is being polled;
140 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
141 * < 0: multiple ports may be being polled.
142 */
143 int poll_evtchn;
145 unsigned long pause_flags;
146 atomic_t pause_count;
148 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
149 u16 virq_to_evtchn[NR_VIRQS];
150 spinlock_t virq_lock;
152 /* Bitmask of CPUs on which this VCPU may run. */
153 cpumask_t cpu_affinity;
154 /* Used to change affinity temporarily. */
155 cpumask_t cpu_affinity_tmp;
157 /* Bitmask of CPUs which are holding onto this VCPU's state. */
158 cpumask_t vcpu_dirty_cpumask;
160 struct arch_vcpu arch;
161 };
163 /* Per-domain lock can be recursively acquired in fault handlers. */
164 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
165 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
166 #define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
168 struct domain
169 {
170 domid_t domain_id;
172 shared_info_t *shared_info; /* shared data area */
174 spinlock_t domain_lock;
176 spinlock_t page_alloc_lock; /* protects all the following fields */
177 struct page_list_head page_list; /* linked list, of size tot_pages */
178 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
179 unsigned int tot_pages; /* number of pages currently possesed */
180 unsigned int max_pages; /* maximum value for tot_pages */
181 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
183 unsigned int max_vcpus;
185 /* Scheduling. */
186 void *sched_priv; /* scheduler-specific data */
188 struct domain *next_in_list;
189 struct domain *next_in_hashbucket;
191 struct list_head rangesets;
192 spinlock_t rangesets_lock;
194 /* Event channel information. */
195 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
196 spinlock_t event_lock;
198 struct grant_table *grant_table;
200 /*
201 * Interrupt to event-channel mappings. Updates should be protected by the
202 * domain's event-channel spinlock. Read accesses can also synchronise on
203 * the lock, but races don't usually matter.
204 */
205 unsigned int nr_pirqs;
206 u16 *pirq_to_evtchn;
207 unsigned long *pirq_mask;
209 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
210 struct rangeset *iomem_caps;
211 struct rangeset *irq_caps;
213 /* Is this an HVM guest? */
214 bool_t is_hvm;
215 /* Does this guest need iommu mappings? */
216 bool_t need_iommu;
217 /* Is this guest fully privileged (aka dom0)? */
218 bool_t is_privileged;
219 /* Which guest this guest has privileges on */
220 struct domain *target;
221 /* Is this guest being debugged by dom0? */
222 bool_t debugger_attached;
223 /* Is this guest dying (i.e., a zombie)? */
224 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
225 /* Domain is paused by controller software? */
226 bool_t is_paused_by_controller;
227 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
228 bool_t is_pinned;
230 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
231 #if MAX_VIRT_CPUS <= BITS_PER_LONG
232 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
233 #else
234 unsigned long *poll_mask;
235 #endif
237 /* Guest has shut down (inc. reason code)? */
238 spinlock_t shutdown_lock;
239 bool_t is_shutting_down; /* in process of shutting down? */
240 bool_t is_shut_down; /* fully shut down? */
241 int shutdown_code;
243 /* If this is not 0, send suspend notification here instead of
244 * raising DOM_EXC */
245 int suspend_evtchn;
247 atomic_t pause_count;
249 unsigned long vm_assist;
251 atomic_t refcnt;
253 struct vcpu **vcpu;
255 /* Bitmask of CPUs which are holding onto this domain's state. */
256 cpumask_t domain_dirty_cpumask;
258 struct arch_domain arch;
260 void *ssid; /* sHype security subject identifier */
262 /* Control-plane tools handle for this domain. */
263 xen_domain_handle_t handle;
265 /* OProfile support. */
266 struct xenoprof *xenoprof;
267 int32_t time_offset_seconds;
269 struct rcu_head rcu;
271 /*
272 * Hypercall deadlock avoidance lock. Used if a hypercall might
273 * cause a deadlock. Acquirers don't spin waiting; they preempt.
274 */
275 spinlock_t hypercall_deadlock_mutex;
277 /* transcendent memory, auto-allocated on first tmem op by each domain */
278 void *tmem;
279 };
281 struct domain_setup_info
282 {
283 /* Initialised by caller. */
284 unsigned long image_addr;
285 unsigned long image_len;
286 /* Initialised by loader: Public. */
287 unsigned long v_start;
288 unsigned long v_end;
289 unsigned long v_kernstart;
290 unsigned long v_kernend;
291 unsigned long v_kernentry;
292 #define PAEKERN_no 0
293 #define PAEKERN_yes 1
294 #define PAEKERN_extended_cr3 2
295 #define PAEKERN_bimodal 3
296 unsigned int pae_kernel;
297 /* Initialised by loader: Private. */
298 unsigned long elf_paddr_offset;
299 unsigned int load_symtab;
300 unsigned long symtab_addr;
301 unsigned long symtab_len;
302 };
304 extern struct vcpu *idle_vcpu[NR_CPUS];
305 #define IDLE_DOMAIN_ID (0x7FFFU)
306 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
307 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
309 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
310 #define put_domain(_d) \
311 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
313 /*
314 * Use this when you don't have an existing reference to @d. It returns
315 * FALSE if @d is being destroyed.
316 */
317 static always_inline int get_domain(struct domain *d)
318 {
319 atomic_t old, new, seen = d->refcnt;
320 do
321 {
322 old = seen;
323 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
324 return 0;
325 _atomic_set(new, _atomic_read(old) + 1);
326 seen = atomic_compareandswap(old, new, &d->refcnt);
327 }
328 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
329 return 1;
330 }
332 /*
333 * Use this when you already have, or are borrowing, a reference to @d.
334 * In this case we know that @d cannot be destroyed under our feet.
335 */
336 static inline void get_knownalive_domain(struct domain *d)
337 {
338 atomic_inc(&d->refcnt);
339 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
340 }
342 /* Obtain a reference to the currently-running domain. */
343 static inline struct domain *get_current_domain(void)
344 {
345 struct domain *d = current->domain;
346 get_knownalive_domain(d);
347 return d;
348 }
350 struct domain *domain_create(
351 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
352 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
353 #define _DOMCRF_hvm 0
354 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
355 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
356 #define _DOMCRF_hap 1
357 #define DOMCRF_hap (1U<<_DOMCRF_hap)
358 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
359 by tboot */
360 #define _DOMCRF_s3_integrity 2
361 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
362 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
363 #define _DOMCRF_dummy 3
364 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
366 /*
367 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
368 * This is the preferred function if the returned domain reference
369 * is short lived, but it cannot be used if the domain reference needs
370 * to be kept beyond the current scope (e.g., across a softirq).
371 * The returned domain reference must be discarded using rcu_unlock_domain().
372 */
373 struct domain *rcu_lock_domain_by_id(domid_t dom);
375 /*
376 * As above function, but accounts for current domain context:
377 * - Translates target DOMID_SELF into caller's domain id; and
378 * - Checks that caller has permission to act on the target domain.
379 */
380 int rcu_lock_target_domain_by_id(domid_t dom, struct domain **d);
382 /* Finish a RCU critical region started by rcu_lock_domain_by_id(). */
383 static inline void rcu_unlock_domain(struct domain *d)
384 {
385 rcu_read_unlock(&domlist_read_lock);
386 }
388 static inline struct domain *rcu_lock_domain(struct domain *d)
389 {
390 rcu_read_lock(d);
391 return d;
392 }
394 static inline struct domain *rcu_lock_current_domain(void)
395 {
396 return rcu_lock_domain(current->domain);
397 }
399 struct domain *get_domain_by_id(domid_t dom);
400 void domain_destroy(struct domain *d);
401 int domain_kill(struct domain *d);
402 void domain_shutdown(struct domain *d, u8 reason);
403 void domain_resume(struct domain *d);
404 void domain_pause_for_debugger(void);
406 int vcpu_start_shutdown_deferral(struct vcpu *v);
407 void vcpu_end_shutdown_deferral(struct vcpu *v);
409 /*
410 * Mark specified domain as crashed. This function always returns, even if the
411 * caller is the specified domain. The domain is not synchronously descheduled
412 * from any processor.
413 */
414 void __domain_crash(struct domain *d);
415 #define domain_crash(d) do { \
416 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
417 __domain_crash(d); \
418 } while (0)
420 /*
421 * Mark current domain as crashed and synchronously deschedule from the local
422 * processor. This function never returns.
423 */
424 void __domain_crash_synchronous(void) __attribute__((noreturn));
425 #define domain_crash_synchronous() do { \
426 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
427 __domain_crash_synchronous(); \
428 } while (0)
430 #define set_current_state(_s) do { current->state = (_s); } while (0)
431 void scheduler_init(void);
432 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
433 void sched_destroy_vcpu(struct vcpu *v);
434 int sched_init_domain(struct domain *d);
435 void sched_destroy_domain(struct domain *d);
436 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
437 int sched_id(void);
438 void sched_tick_suspend(void);
439 void sched_tick_resume(void);
440 void vcpu_wake(struct vcpu *d);
441 void vcpu_sleep_nosync(struct vcpu *d);
442 void vcpu_sleep_sync(struct vcpu *d);
444 /*
445 * Force synchronisation of given VCPU's state. If it is currently descheduled,
446 * this call will ensure that all its state is committed to memory and that
447 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
448 */
449 void sync_vcpu_execstate(struct vcpu *v);
451 /*
452 * Called by the scheduler to switch to another VCPU. This function must
453 * call context_saved(@prev) when the local CPU is no longer running in
454 * @prev's context, and that context is saved to memory. Alternatively, if
455 * implementing lazy context switching, it suffices to ensure that invoking
456 * sync_vcpu_execstate() will switch and commit @prev's state.
457 */
458 void context_switch(
459 struct vcpu *prev,
460 struct vcpu *next);
462 /*
463 * As described above, context_switch() must call this function when the
464 * local CPU is no longer running in @prev's context, and @prev's context is
465 * saved to memory. Alternatively, if implementing lazy context switching,
466 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
467 */
468 void context_saved(struct vcpu *prev);
470 /* Called by the scheduler to continue running the current VCPU. */
471 void continue_running(
472 struct vcpu *same);
474 void startup_cpu_idle_loop(void);
476 /*
477 * Creates a continuation to resume the current hypercall. The caller should
478 * return immediately, propagating the value returned from this invocation.
479 * The format string specifies the types and number of hypercall arguments.
480 * It contains one character per argument as follows:
481 * 'i' [unsigned] {char, int}
482 * 'l' [unsigned] long
483 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
484 */
485 unsigned long hypercall_create_continuation(
486 unsigned int op, const char *format, ...);
488 #define hypercall_preempt_check() (unlikely( \
489 softirq_pending(smp_processor_id()) | \
490 local_events_need_delivery() \
491 ))
493 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
494 extern spinlock_t domlist_update_lock;
495 extern rcu_read_lock_t domlist_read_lock;
497 extern struct domain *domain_list;
499 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
500 #define for_each_domain(_d) \
501 for ( (_d) = rcu_dereference(domain_list); \
502 (_d) != NULL; \
503 (_d) = rcu_dereference((_d)->next_in_list )) \
505 #define for_each_vcpu(_d,_v) \
506 for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \
507 (_v) != NULL; \
508 (_v) = (_v)->next_in_list )
510 /*
511 * Per-VCPU pause flags.
512 */
513 /* Domain is blocked waiting for an event. */
514 #define _VPF_blocked 0
515 #define VPF_blocked (1UL<<_VPF_blocked)
516 /* VCPU is offline. */
517 #define _VPF_down 1
518 #define VPF_down (1UL<<_VPF_down)
519 /* VCPU is blocked awaiting an event to be consumed by Xen. */
520 #define _VPF_blocked_in_xen 2
521 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
522 /* VCPU affinity has changed: migrating to a new CPU. */
523 #define _VPF_migrating 3
524 #define VPF_migrating (1UL<<_VPF_migrating)
526 static inline int vcpu_runnable(struct vcpu *v)
527 {
528 return !(v->pause_flags |
529 atomic_read(&v->pause_count) |
530 atomic_read(&v->domain->pause_count));
531 }
533 void vcpu_unblock(struct vcpu *v);
534 void vcpu_pause(struct vcpu *v);
535 void vcpu_pause_nosync(struct vcpu *v);
536 void domain_pause(struct domain *d);
537 void vcpu_unpause(struct vcpu *v);
538 void domain_unpause(struct domain *d);
539 void domain_pause_by_systemcontroller(struct domain *d);
540 void domain_unpause_by_systemcontroller(struct domain *d);
541 void cpu_init(void);
543 void vcpu_force_reschedule(struct vcpu *v);
544 void cpu_disable_scheduler(void);
545 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
546 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
547 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
548 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
550 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
551 uint64_t get_cpu_idle_time(unsigned int cpu);
553 #define IS_PRIV(_d) ((_d)->is_privileged)
554 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
556 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
558 #define is_hvm_domain(d) ((d)->is_hvm)
559 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
560 #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
562 void set_vcpu_migration_delay(unsigned int delay);
563 unsigned int get_vcpu_migration_delay(void);
565 extern int sched_smt_power_savings;
567 extern enum cpufreq_controller {
568 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
569 } cpufreq_controller;
571 #endif /* __SCHED_H__ */
573 /*
574 * Local variables:
575 * mode: C
576 * c-set-style: "BSD"
577 * c-basic-offset: 4
578 * tab-width: 4
579 * indent-tabs-mode: nil
580 * End:
581 */