ia64/xen-unstable

view xen/include/xen/sched.h @ 19650:6705898f768d

x86: eliminate hard-coded NR_IRQS

... splitting it into global nr_irqs (determined at boot time) and
per- domain nr_pirqs (derived from nr_irqs and a possibly command line
specified value, which probably should later become a per-domain
config setting).

This has the (desirable imo) side effect of reducing the size of
struct hvm_irq_dpci from requiring an order-3 page to order-2 (on
x86-64), which nevertheless still is too large.

However, there is now a variable size bit array on the stack in
pt_irq_time_out() - while for the moment this probably is okay, it
certainly doesn't look nice. However, replacing this with a static
(pre-)allocation also seems less than ideal, because that would
require at least min(d->nr_pirqs, NR_VECTORS) bit arrays of
d->nr_pirqs bits, since this bit array is used outside of the
serialized code region in that function, and keeping the domain's
event lock acquired across pirq_guest_eoi() doesn't look like a good
idea either.

The IRQ- and vector-indexed arrays hanging off struct hvm_irq_dpci
could in fact be changed further to dynamically use the smaller of the
two ranges for indexing, since there are other assumptions about a
one-to-one relationship between IRQs and vectors here and elsewhere.

Additionally, it seems to me that struct hvm_mirq_dpci_mapping's
digl_list and gmsi fields could really be overlayed, which would yield
significant savings since this structure gets always instanciated in
form of d->nr_pirqs (as per the above could also be the smaller of
this and NR_VECTORS) dimensioned arrays.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed May 27 10:38:51 2009 +0100 (2009-05-27)
parents f210a633571c
children 8dd5c3cae086
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <xen/shared.h>
10 #include <public/xen.h>
11 #include <public/domctl.h>
12 #include <public/vcpu.h>
13 #include <public/xsm/acm.h>
14 #include <xen/time.h>
15 #include <xen/timer.h>
16 #include <xen/grant_table.h>
17 #include <xen/rangeset.h>
18 #include <asm/domain.h>
19 #include <xen/xenoprof.h>
20 #include <xen/rcupdate.h>
21 #include <xen/irq.h>
22 #include <xen/mm.h>
24 #ifdef CONFIG_COMPAT
25 #include <compat/vcpu.h>
26 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
27 #endif
29 /* A global pointer to the initial domain (DOM0). */
30 extern struct domain *dom0;
32 #ifndef CONFIG_COMPAT
33 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_LONG
34 #else
35 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_LONG)
36 #endif
37 #define MAX_EVTCHNS(d) (BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d))
38 #define EVTCHNS_PER_BUCKET 128
39 #define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
41 struct evtchn
42 {
43 #define ECS_FREE 0 /* Channel is available for use. */
44 #define ECS_RESERVED 1 /* Channel is reserved. */
45 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
46 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
47 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
48 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
49 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
50 u8 state; /* ECS_* */
51 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
52 u16 notify_vcpu_id; /* VCPU for local delivery notification */
53 union {
54 struct {
55 domid_t remote_domid;
56 } unbound; /* state == ECS_UNBOUND */
57 struct {
58 u16 remote_port;
59 struct domain *remote_dom;
60 } interdomain; /* state == ECS_INTERDOMAIN */
61 u16 pirq; /* state == ECS_PIRQ */
62 u16 virq; /* state == ECS_VIRQ */
63 } u;
64 #ifdef FLASK_ENABLE
65 void *ssid;
66 #endif
67 };
69 int evtchn_init(struct domain *d);
70 void evtchn_destroy(struct domain *d);
72 struct vcpu
73 {
74 int vcpu_id;
76 int processor;
78 vcpu_info_t *vcpu_info;
80 struct domain *domain;
82 struct vcpu *next_in_list;
84 uint64_t periodic_period;
85 uint64_t periodic_last_event;
86 struct timer periodic_timer;
87 struct timer singleshot_timer;
89 struct timer poll_timer; /* timeout for SCHEDOP_poll */
91 void *sched_priv; /* scheduler-specific data */
93 struct vcpu_runstate_info runstate;
94 #ifndef CONFIG_COMPAT
95 # define runstate_guest(v) ((v)->runstate_guest)
96 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
97 #else
98 # define runstate_guest(v) ((v)->runstate_guest.native)
99 union {
100 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
101 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
102 } runstate_guest; /* guest address */
103 #endif
105 /* last time when vCPU is scheduled out */
106 uint64_t last_run_time;
108 /* Has the FPU been initialised? */
109 bool_t fpu_initialised;
110 /* Has the FPU been used since it was last saved? */
111 bool_t fpu_dirtied;
112 /* Initialization completed for this VCPU? */
113 bool_t is_initialised;
114 /* Currently running on a CPU? */
115 bool_t is_running;
116 /* MCE callback pending for this VCPU? */
117 bool_t mce_pending;
118 /* NMI callback pending for this VCPU? */
119 bool_t nmi_pending;
121 /* Higher priorized traps may interrupt lower priorized traps,
122 * lower priorized traps wait until higher priorized traps finished.
123 * Note: This concept is known as "system priority level" (spl)
124 * in the UNIX world. */
125 uint16_t old_trap_priority;
126 uint16_t trap_priority;
127 #define VCPU_TRAP_NONE 0
128 #define VCPU_TRAP_NMI 1
129 #define VCPU_TRAP_MCE 2
131 /* Require shutdown to be deferred for some asynchronous operation? */
132 bool_t defer_shutdown;
133 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
134 bool_t paused_for_shutdown;
135 /* VCPU affinity is temporarily locked from controller changes? */
136 bool_t affinity_locked;
138 /*
139 * > 0: a single port is being polled;
140 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
141 * < 0: multiple ports may be being polled.
142 */
143 int poll_evtchn;
145 unsigned long pause_flags;
146 atomic_t pause_count;
148 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
149 u16 virq_to_evtchn[NR_VIRQS];
150 spinlock_t virq_lock;
152 /* Bitmask of CPUs on which this VCPU may run. */
153 cpumask_t cpu_affinity;
154 /* Used to change affinity temporarily. */
155 cpumask_t cpu_affinity_tmp;
157 /* Bitmask of CPUs which are holding onto this VCPU's state. */
158 cpumask_t vcpu_dirty_cpumask;
160 struct arch_vcpu arch;
161 };
163 /* Per-domain lock can be recursively acquired in fault handlers. */
164 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
165 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
166 #define domain_is_locked(d) spin_is_locked(&(d)->domain_lock)
168 struct domain
169 {
170 domid_t domain_id;
172 shared_info_t *shared_info; /* shared data area */
174 spinlock_t domain_lock;
176 spinlock_t page_alloc_lock; /* protects all the following fields */
177 struct page_list_head page_list; /* linked list, of size tot_pages */
178 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
179 unsigned int tot_pages; /* number of pages currently possesed */
180 unsigned int max_pages; /* maximum value for tot_pages */
181 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
183 /* Scheduling. */
184 void *sched_priv; /* scheduler-specific data */
186 struct domain *next_in_list;
187 struct domain *next_in_hashbucket;
189 struct list_head rangesets;
190 spinlock_t rangesets_lock;
192 /* Event channel information. */
193 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
194 spinlock_t event_lock;
196 struct grant_table *grant_table;
198 /*
199 * Interrupt to event-channel mappings. Updates should be protected by the
200 * domain's event-channel spinlock. Read accesses can also synchronise on
201 * the lock, but races don't usually matter.
202 */
203 unsigned int nr_pirqs;
204 u16 *pirq_to_evtchn;
205 unsigned long *pirq_mask;
207 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
208 struct rangeset *iomem_caps;
209 struct rangeset *irq_caps;
211 /* Is this an HVM guest? */
212 bool_t is_hvm;
213 /* Does this guest need iommu mappings? */
214 bool_t need_iommu;
215 /* Is this guest fully privileged (aka dom0)? */
216 bool_t is_privileged;
217 /* Which guest this guest has privileges on */
218 struct domain *target;
219 /* Is this guest being debugged by dom0? */
220 bool_t debugger_attached;
221 /* Is this guest dying (i.e., a zombie)? */
222 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
223 /* Domain is paused by controller software? */
224 bool_t is_paused_by_controller;
225 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
226 bool_t is_pinned;
228 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
229 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
231 /* Guest has shut down (inc. reason code)? */
232 spinlock_t shutdown_lock;
233 bool_t is_shutting_down; /* in process of shutting down? */
234 bool_t is_shut_down; /* fully shut down? */
235 int shutdown_code;
237 /* If this is not 0, send suspend notification here instead of
238 * raising DOM_EXC */
239 int suspend_evtchn;
241 atomic_t pause_count;
243 unsigned long vm_assist;
245 atomic_t refcnt;
247 struct vcpu *vcpu[MAX_VIRT_CPUS];
249 /* Bitmask of CPUs which are holding onto this domain's state. */
250 cpumask_t domain_dirty_cpumask;
252 struct arch_domain arch;
254 void *ssid; /* sHype security subject identifier */
256 /* Control-plane tools handle for this domain. */
257 xen_domain_handle_t handle;
259 /* OProfile support. */
260 struct xenoprof *xenoprof;
261 int32_t time_offset_seconds;
263 struct rcu_head rcu;
265 /*
266 * Hypercall deadlock avoidance lock. Used if a hypercall might
267 * cause a deadlock. Acquirers don't spin waiting; they preempt.
268 */
269 spinlock_t hypercall_deadlock_mutex;
271 /* VRAM dirty support. */
272 struct sh_dirty_vram *dirty_vram;
274 /* transcendent memory, auto-allocated on first tmem op by each domain */
275 void *tmem;
276 };
278 struct domain_setup_info
279 {
280 /* Initialised by caller. */
281 unsigned long image_addr;
282 unsigned long image_len;
283 /* Initialised by loader: Public. */
284 unsigned long v_start;
285 unsigned long v_end;
286 unsigned long v_kernstart;
287 unsigned long v_kernend;
288 unsigned long v_kernentry;
289 #define PAEKERN_no 0
290 #define PAEKERN_yes 1
291 #define PAEKERN_extended_cr3 2
292 #define PAEKERN_bimodal 3
293 unsigned int pae_kernel;
294 /* Initialised by loader: Private. */
295 unsigned long elf_paddr_offset;
296 unsigned int load_symtab;
297 unsigned long symtab_addr;
298 unsigned long symtab_len;
299 };
301 extern struct vcpu *idle_vcpu[NR_CPUS];
302 #define IDLE_DOMAIN_ID (0x7FFFU)
303 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
304 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
306 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
307 #define put_domain(_d) \
308 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
310 /*
311 * Use this when you don't have an existing reference to @d. It returns
312 * FALSE if @d is being destroyed.
313 */
314 static always_inline int get_domain(struct domain *d)
315 {
316 atomic_t old, new, seen = d->refcnt;
317 do
318 {
319 old = seen;
320 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
321 return 0;
322 _atomic_set(new, _atomic_read(old) + 1);
323 seen = atomic_compareandswap(old, new, &d->refcnt);
324 }
325 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
326 return 1;
327 }
329 /*
330 * Use this when you already have, or are borrowing, a reference to @d.
331 * In this case we know that @d cannot be destroyed under our feet.
332 */
333 static inline void get_knownalive_domain(struct domain *d)
334 {
335 atomic_inc(&d->refcnt);
336 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
337 }
339 /* Obtain a reference to the currently-running domain. */
340 static inline struct domain *get_current_domain(void)
341 {
342 struct domain *d = current->domain;
343 get_knownalive_domain(d);
344 return d;
345 }
347 struct domain *domain_create(
348 domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
349 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
350 #define _DOMCRF_hvm 0
351 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
352 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
353 #define _DOMCRF_hap 1
354 #define DOMCRF_hap (1U<<_DOMCRF_hap)
355 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
356 by tboot */
357 #define _DOMCRF_s3_integrity 2
358 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
359 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
360 #define _DOMCRF_dummy 3
361 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
363 /*
364 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
365 * This is the preferred function if the returned domain reference
366 * is short lived, but it cannot be used if the domain reference needs
367 * to be kept beyond the current scope (e.g., across a softirq).
368 * The returned domain reference must be discarded using rcu_unlock_domain().
369 */
370 struct domain *rcu_lock_domain_by_id(domid_t dom);
372 /*
373 * As above function, but accounts for current domain context:
374 * - Translates target DOMID_SELF into caller's domain id; and
375 * - Checks that caller has permission to act on the target domain.
376 */
377 int rcu_lock_target_domain_by_id(domid_t dom, struct domain **d);
379 /* Finish a RCU critical region started by rcu_lock_domain_by_id(). */
380 static inline void rcu_unlock_domain(struct domain *d)
381 {
382 rcu_read_unlock(&domlist_read_lock);
383 }
385 static inline struct domain *rcu_lock_domain(struct domain *d)
386 {
387 rcu_read_lock(d);
388 return d;
389 }
391 static inline struct domain *rcu_lock_current_domain(void)
392 {
393 return rcu_lock_domain(current->domain);
394 }
396 struct domain *get_domain_by_id(domid_t dom);
397 void domain_destroy(struct domain *d);
398 int domain_kill(struct domain *d);
399 void domain_shutdown(struct domain *d, u8 reason);
400 void domain_resume(struct domain *d);
401 void domain_pause_for_debugger(void);
403 int vcpu_start_shutdown_deferral(struct vcpu *v);
404 void vcpu_end_shutdown_deferral(struct vcpu *v);
406 /*
407 * Mark specified domain as crashed. This function always returns, even if the
408 * caller is the specified domain. The domain is not synchronously descheduled
409 * from any processor.
410 */
411 void __domain_crash(struct domain *d);
412 #define domain_crash(d) do { \
413 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
414 __domain_crash(d); \
415 } while (0)
417 /*
418 * Mark current domain as crashed and synchronously deschedule from the local
419 * processor. This function never returns.
420 */
421 void __domain_crash_synchronous(void) __attribute__((noreturn));
422 #define domain_crash_synchronous() do { \
423 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
424 __domain_crash_synchronous(); \
425 } while (0)
427 #define set_current_state(_s) do { current->state = (_s); } while (0)
428 void scheduler_init(void);
429 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
430 void sched_destroy_vcpu(struct vcpu *v);
431 int sched_init_domain(struct domain *d);
432 void sched_destroy_domain(struct domain *d);
433 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
434 int sched_id(void);
435 void sched_tick_suspend(void);
436 void sched_tick_resume(void);
437 void vcpu_wake(struct vcpu *d);
438 void vcpu_sleep_nosync(struct vcpu *d);
439 void vcpu_sleep_sync(struct vcpu *d);
441 /*
442 * Force synchronisation of given VCPU's state. If it is currently descheduled,
443 * this call will ensure that all its state is committed to memory and that
444 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
445 */
446 void sync_vcpu_execstate(struct vcpu *v);
448 /*
449 * Called by the scheduler to switch to another VCPU. This function must
450 * call context_saved(@prev) when the local CPU is no longer running in
451 * @prev's context, and that context is saved to memory. Alternatively, if
452 * implementing lazy context switching, it suffices to ensure that invoking
453 * sync_vcpu_execstate() will switch and commit @prev's state.
454 */
455 void context_switch(
456 struct vcpu *prev,
457 struct vcpu *next);
459 /*
460 * As described above, context_switch() must call this function when the
461 * local CPU is no longer running in @prev's context, and @prev's context is
462 * saved to memory. Alternatively, if implementing lazy context switching,
463 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
464 */
465 void context_saved(struct vcpu *prev);
467 /* Called by the scheduler to continue running the current VCPU. */
468 void continue_running(
469 struct vcpu *same);
471 void startup_cpu_idle_loop(void);
473 /*
474 * Creates a continuation to resume the current hypercall. The caller should
475 * return immediately, propagating the value returned from this invocation.
476 * The format string specifies the types and number of hypercall arguments.
477 * It contains one character per argument as follows:
478 * 'i' [unsigned] {char, int}
479 * 'l' [unsigned] long
480 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
481 */
482 unsigned long hypercall_create_continuation(
483 unsigned int op, const char *format, ...);
485 #define hypercall_preempt_check() (unlikely( \
486 softirq_pending(smp_processor_id()) | \
487 local_events_need_delivery() \
488 ))
490 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
491 extern spinlock_t domlist_update_lock;
492 extern rcu_read_lock_t domlist_read_lock;
494 extern struct domain *domain_list;
496 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
497 #define for_each_domain(_d) \
498 for ( (_d) = rcu_dereference(domain_list); \
499 (_d) != NULL; \
500 (_d) = rcu_dereference((_d)->next_in_list )) \
502 #define for_each_vcpu(_d,_v) \
503 for ( (_v) = (_d)->vcpu[0]; \
504 (_v) != NULL; \
505 (_v) = (_v)->next_in_list )
507 /*
508 * Per-VCPU pause flags.
509 */
510 /* Domain is blocked waiting for an event. */
511 #define _VPF_blocked 0
512 #define VPF_blocked (1UL<<_VPF_blocked)
513 /* VCPU is offline. */
514 #define _VPF_down 1
515 #define VPF_down (1UL<<_VPF_down)
516 /* VCPU is blocked awaiting an event to be consumed by Xen. */
517 #define _VPF_blocked_in_xen 2
518 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
519 /* VCPU affinity has changed: migrating to a new CPU. */
520 #define _VPF_migrating 3
521 #define VPF_migrating (1UL<<_VPF_migrating)
523 static inline int vcpu_runnable(struct vcpu *v)
524 {
525 return !(v->pause_flags |
526 atomic_read(&v->pause_count) |
527 atomic_read(&v->domain->pause_count));
528 }
530 void vcpu_unblock(struct vcpu *v);
531 void vcpu_pause(struct vcpu *v);
532 void vcpu_pause_nosync(struct vcpu *v);
533 void domain_pause(struct domain *d);
534 void vcpu_unpause(struct vcpu *v);
535 void domain_unpause(struct domain *d);
536 void domain_pause_by_systemcontroller(struct domain *d);
537 void domain_unpause_by_systemcontroller(struct domain *d);
538 void cpu_init(void);
540 void vcpu_force_reschedule(struct vcpu *v);
541 void cpu_disable_scheduler(void);
542 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
543 int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
544 int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
545 void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
547 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
548 uint64_t get_cpu_idle_time(unsigned int cpu);
550 #define IS_PRIV(_d) ((_d)->is_privileged)
551 #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
553 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
555 #define is_hvm_domain(d) ((d)->is_hvm)
556 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
557 #define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
559 void set_vcpu_migration_delay(unsigned int delay);
560 unsigned int get_vcpu_migration_delay(void);
562 extern int sched_smt_power_savings;
564 extern enum cpufreq_controller {
565 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
566 } cpufreq_controller;
568 #endif /* __SCHED_H__ */
570 /*
571 * Local variables:
572 * mode: C
573 * c-set-style: "BSD"
574 * c-basic-offset: 4
575 * tab-width: 4
576 * indent-tabs-mode: nil
577 * End:
578 */