ia64/xen-unstable

view xen/include/xen/sched.h @ 13011:360eb996fa38

[XEN] Improve scheduler cap mechanism
Somewhat unbastardize the scheduler cap mechanism. We now cleanly
pause and unpause running VCPUs of capped out domains instead of
using sub-idle priorities. This also improves the precision of
caps a bit.
Signed-off-by: Emmanuel Ackaouy <ack@xensource.com>
author Emmanuel Ackaouy <ack@xensource.com>
date Wed Dec 13 16:13:26 2006 +0000 (2006-12-13)
parents 3fa6635d04b9
children ac51e8f37108
line source
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
5 #include <xen/config.h>
6 #include <xen/types.h>
7 #include <xen/spinlock.h>
8 #include <xen/smp.h>
9 #include <public/xen.h>
10 #include <public/domctl.h>
11 #include <public/vcpu.h>
12 #include <xen/time.h>
13 #include <xen/timer.h>
14 #include <xen/grant_table.h>
15 #include <xen/rangeset.h>
16 #include <asm/domain.h>
17 #include <xen/xenoprof.h>
18 #include <xen/irq.h>
20 extern unsigned long volatile jiffies;
21 extern rwlock_t domlist_lock;
23 /* A global pointer to the initial domain (DOM0). */
24 extern struct domain *dom0;
26 #define MAX_EVTCHNS NR_EVENT_CHANNELS
27 #define EVTCHNS_PER_BUCKET 128
28 #define NR_EVTCHN_BUCKETS (MAX_EVTCHNS / EVTCHNS_PER_BUCKET)
30 struct evtchn
31 {
32 #define ECS_FREE 0 /* Channel is available for use. */
33 #define ECS_RESERVED 1 /* Channel is reserved. */
34 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
35 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
36 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
37 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
38 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
39 u8 state; /* ECS_* */
40 u8 consumer_is_xen; /* Consumed by Xen or by guest? */
41 u16 notify_vcpu_id; /* VCPU for local delivery notification */
42 union {
43 struct {
44 domid_t remote_domid;
45 } unbound; /* state == ECS_UNBOUND */
46 struct {
47 u16 remote_port;
48 struct domain *remote_dom;
49 } interdomain; /* state == ECS_INTERDOMAIN */
50 u16 pirq; /* state == ECS_PIRQ */
51 u16 virq; /* state == ECS_VIRQ */
52 } u;
53 };
55 int evtchn_init(struct domain *d);
56 void evtchn_destroy(struct domain *d);
58 struct vcpu
59 {
60 int vcpu_id;
62 int processor;
64 vcpu_info_t *vcpu_info;
66 struct domain *domain;
68 struct vcpu *next_in_list;
70 struct timer timer; /* one-shot timer for timeout values */
71 unsigned long sleep_tick; /* tick at which this vcpu started sleep */
73 struct timer poll_timer; /* timeout for SCHEDOP_poll */
75 void *sched_priv; /* scheduler-specific data */
77 struct vcpu_runstate_info runstate;
78 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
80 unsigned long vcpu_flags;
82 spinlock_t pause_lock;
83 unsigned int pause_count;
85 u16 virq_to_evtchn[NR_VIRQS];
87 /* Bitmask of CPUs on which this VCPU may run. */
88 cpumask_t cpu_affinity;
90 unsigned long nmi_addr; /* NMI callback address. */
92 /* Bitmask of CPUs which are holding onto this VCPU's state. */
93 cpumask_t vcpu_dirty_cpumask;
95 struct arch_vcpu arch;
96 };
98 /* Per-domain lock can be recursively acquired in fault handlers. */
99 #define LOCK_BIGLOCK(_d) spin_lock_recursive(&(_d)->big_lock)
100 #define UNLOCK_BIGLOCK(_d) spin_unlock_recursive(&(_d)->big_lock)
102 struct domain
103 {
104 domid_t domain_id;
106 shared_info_t *shared_info; /* shared data area */
108 spinlock_t big_lock;
110 spinlock_t page_alloc_lock; /* protects all the following fields */
111 struct list_head page_list; /* linked list, of size tot_pages */
112 struct list_head xenpage_list; /* linked list, of size xenheap_pages */
113 unsigned int tot_pages; /* number of pages currently possesed */
114 unsigned int max_pages; /* maximum value for tot_pages */
115 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
117 /* Scheduling. */
118 int shutdown_code; /* code value from OS (if DOMF_shutdown) */
119 void *sched_priv; /* scheduler-specific data */
121 struct domain *next_in_list;
122 struct domain *next_in_hashbucket;
124 struct list_head rangesets;
125 spinlock_t rangesets_lock;
127 /* Event channel information. */
128 struct evtchn *evtchn[NR_EVTCHN_BUCKETS];
129 spinlock_t evtchn_lock;
131 struct grant_table *grant_table;
133 /*
134 * Interrupt to event-channel mappings. Updates should be protected by the
135 * domain's event-channel spinlock. Read accesses can also synchronise on
136 * the lock, but races don't usually matter.
137 */
138 u16 pirq_to_evtchn[NR_IRQS];
139 DECLARE_BITMAP(pirq_mask, NR_IRQS);
141 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
142 struct rangeset *iomem_caps;
143 struct rangeset *irq_caps;
145 unsigned long domain_flags;
147 /* Boolean: Is this an HVM guest? */
148 char is_hvm;
150 /* Boolean: Is this guest fully privileged (aka dom0)? */
151 char is_privileged;
153 spinlock_t pause_lock;
154 unsigned int pause_count;
156 unsigned long vm_assist;
158 atomic_t refcnt;
160 struct vcpu *vcpu[MAX_VIRT_CPUS];
162 /* Bitmask of CPUs which are holding onto this domain's state. */
163 cpumask_t domain_dirty_cpumask;
165 struct arch_domain arch;
167 void *ssid; /* sHype security subject identifier */
169 /* Control-plane tools handle for this domain. */
170 xen_domain_handle_t handle;
172 /* OProfile support. */
173 struct xenoprof *xenoprof;
174 int32_t time_offset_seconds;
175 };
177 struct domain_setup_info
178 {
179 /* Initialised by caller. */
180 unsigned long image_addr;
181 unsigned long image_len;
182 /* Initialised by loader: Public. */
183 unsigned long v_start;
184 unsigned long v_end;
185 unsigned long v_kernstart;
186 unsigned long v_kernend;
187 unsigned long v_kernentry;
188 #define PAEKERN_no 0
189 #define PAEKERN_yes 1
190 #define PAEKERN_extended_cr3 2
191 unsigned int pae_kernel;
192 /* Initialised by loader: Private. */
193 unsigned long elf_paddr_offset;
194 unsigned int load_symtab;
195 unsigned long symtab_addr;
196 unsigned long symtab_len;
197 /*
198 * Only one of __elfnote_* or __xen_guest_string will be
199 * non-NULL.
200 *
201 * You should use the xen_elfnote_* accessors below in order to
202 * pickup the correct one and retain backwards compatibility.
203 */
204 void *__elfnote_section, *__elfnote_section_end;
205 char *__xen_guest_string;
206 };
208 extern struct vcpu *idle_vcpu[NR_CPUS];
209 #define IDLE_DOMAIN_ID (0x7FFFU)
210 #define is_idle_domain(d) ((d)->domain_id == IDLE_DOMAIN_ID)
211 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
213 #define DOMAIN_DESTROYED (1<<31) /* assumes atomic_t is >= 32 bits */
214 #define put_domain(_d) \
215 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
217 /*
218 * Use this when you don't have an existing reference to @d. It returns
219 * FALSE if @d is being destroyed.
220 */
221 static always_inline int get_domain(struct domain *d)
222 {
223 atomic_t old, new, seen = d->refcnt;
224 do
225 {
226 old = seen;
227 if ( unlikely(_atomic_read(old) & DOMAIN_DESTROYED) )
228 return 0;
229 _atomic_set(new, _atomic_read(old) + 1);
230 seen = atomic_compareandswap(old, new, &d->refcnt);
231 }
232 while ( unlikely(_atomic_read(seen) != _atomic_read(old)) );
233 return 1;
234 }
236 /*
237 * Use this when you already have, or are borrowing, a reference to @d.
238 * In this case we know that @d cannot be destroyed under our feet.
239 */
240 static inline void get_knownalive_domain(struct domain *d)
241 {
242 atomic_inc(&d->refcnt);
243 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
244 }
246 struct domain *domain_create(domid_t domid, unsigned int domcr_flags);
247 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
248 #define _DOMCRF_hvm 0
249 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
251 int construct_dom0(
252 struct domain *d,
253 unsigned long image_start, unsigned long image_len,
254 unsigned long initrd_start, unsigned long initrd_len,
255 char *cmdline);
256 int set_info_guest(struct domain *d, xen_domctl_vcpucontext_t *);
258 struct domain *find_domain_by_id(domid_t dom);
259 void domain_destroy(struct domain *d);
260 void domain_kill(struct domain *d);
261 void domain_shutdown(struct domain *d, u8 reason);
262 void domain_pause_for_debugger(void);
264 /*
265 * Mark specified domain as crashed. This function always returns, even if the
266 * caller is the specified domain. The domain is not synchronously descheduled
267 * from any processor.
268 */
269 void __domain_crash(struct domain *d);
270 #define domain_crash(d) do { \
271 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
272 __domain_crash(d); \
273 } while (0)
275 /*
276 * Mark current domain as crashed and synchronously deschedule from the local
277 * processor. This function never returns.
278 */
279 void __domain_crash_synchronous(void) __attribute__((noreturn));
280 #define domain_crash_synchronous() do { \
281 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
282 __domain_crash_synchronous(); \
283 } while (0)
285 #define set_current_state(_s) do { current->state = (_s); } while (0)
286 void scheduler_init(void);
287 void schedulers_start(void);
288 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
289 void sched_destroy_vcpu(struct vcpu *v);
290 int sched_init_domain(struct domain *d);
291 void sched_destroy_domain(struct domain *d);
292 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
293 int sched_id(void);
294 void vcpu_wake(struct vcpu *d);
295 void vcpu_sleep_nosync(struct vcpu *d);
296 void vcpu_sleep_sync(struct vcpu *d);
298 /*
299 * Force synchronisation of given VCPU's state. If it is currently descheduled,
300 * this call will ensure that all its state is committed to memory and that
301 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
302 */
303 void sync_vcpu_execstate(struct vcpu *v);
305 /*
306 * Called by the scheduler to switch to another VCPU. This function must
307 * call context_saved(@prev) when the local CPU is no longer running in
308 * @prev's context, and that context is saved to memory. Alternatively, if
309 * implementing lazy context switching, it suffices to ensure that invoking
310 * sync_vcpu_execstate() will switch and commit @prev's state.
311 */
312 void context_switch(
313 struct vcpu *prev,
314 struct vcpu *next);
316 /*
317 * As described above, context_switch() must call this function when the
318 * local CPU is no longer running in @prev's context, and @prev's context is
319 * saved to memory. Alternatively, if implementing lazy context switching,
320 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
321 */
322 void context_saved(struct vcpu *prev);
324 /* Called by the scheduler to continue running the current VCPU. */
325 void continue_running(
326 struct vcpu *same);
328 void startup_cpu_idle_loop(void);
330 /*
331 * Creates a continuation to resume the current hypercall. The caller should
332 * return immediately, propagating the value returned from this invocation.
333 * The format string specifies the types and number of hypercall arguments.
334 * It contains one character per argument as follows:
335 * 'i' [unsigned] {char, int}
336 * 'l' [unsigned] long
337 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
338 */
339 unsigned long hypercall_create_continuation(
340 unsigned int op, const char *format, ...);
342 #define hypercall_preempt_check() (unlikely( \
343 softirq_pending(smp_processor_id()) | \
344 local_events_need_delivery() \
345 ))
347 /* This domain_hash and domain_list are protected by the domlist_lock. */
348 #define DOMAIN_HASH_SIZE 256
349 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
350 extern struct domain *domain_hash[DOMAIN_HASH_SIZE];
351 extern struct domain *domain_list;
353 #define for_each_domain(_d) \
354 for ( (_d) = domain_list; \
355 (_d) != NULL; \
356 (_d) = (_d)->next_in_list )
358 #define for_each_vcpu(_d,_v) \
359 for ( (_v) = (_d)->vcpu[0]; \
360 (_v) != NULL; \
361 (_v) = (_v)->next_in_list )
363 /*
364 * Per-VCPU flags (vcpu_flags).
365 */
366 /* Has the FPU been initialised? */
367 #define _VCPUF_fpu_initialised 0
368 #define VCPUF_fpu_initialised (1UL<<_VCPUF_fpu_initialised)
369 /* Has the FPU been used since it was last saved? */
370 #define _VCPUF_fpu_dirtied 1
371 #define VCPUF_fpu_dirtied (1UL<<_VCPUF_fpu_dirtied)
372 /* Domain is blocked waiting for an event. */
373 #define _VCPUF_blocked 2
374 #define VCPUF_blocked (1UL<<_VCPUF_blocked)
375 /* Currently running on a CPU? */
376 #define _VCPUF_running 3
377 #define VCPUF_running (1UL<<_VCPUF_running)
378 /* Initialization completed. */
379 #define _VCPUF_initialised 4
380 #define VCPUF_initialised (1UL<<_VCPUF_initialised)
381 /* VCPU is offline. */
382 #define _VCPUF_down 5
383 #define VCPUF_down (1UL<<_VCPUF_down)
384 /* NMI callback pending for this VCPU? */
385 #define _VCPUF_nmi_pending 8
386 #define VCPUF_nmi_pending (1UL<<_VCPUF_nmi_pending)
387 /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
388 #define _VCPUF_nmi_masked 9
389 #define VCPUF_nmi_masked (1UL<<_VCPUF_nmi_masked)
390 /* VCPU is polling a set of event channels (SCHEDOP_poll). */
391 #define _VCPUF_polling 10
392 #define VCPUF_polling (1UL<<_VCPUF_polling)
393 /* VCPU is paused by the hypervisor? */
394 #define _VCPUF_paused 11
395 #define VCPUF_paused (1UL<<_VCPUF_paused)
396 /* VCPU is blocked awaiting an event to be consumed by Xen. */
397 #define _VCPUF_blocked_in_xen 12
398 #define VCPUF_blocked_in_xen (1UL<<_VCPUF_blocked_in_xen)
399 /* VCPU affinity has changed: migrating to a new CPU. */
400 #define _VCPUF_migrating 13
401 #define VCPUF_migrating (1UL<<_VCPUF_migrating)
403 /*
404 * Per-domain flags (domain_flags).
405 */
406 /* Guest shut itself down for some reason. */
407 #define _DOMF_shutdown 0
408 #define DOMF_shutdown (1UL<<_DOMF_shutdown)
409 /* Death rattle. */
410 #define _DOMF_dying 1
411 #define DOMF_dying (1UL<<_DOMF_dying)
412 /* Domain is paused by controller software. */
413 #define _DOMF_ctrl_pause 2
414 #define DOMF_ctrl_pause (1UL<<_DOMF_ctrl_pause)
415 /* Domain is being debugged by controller software. */
416 #define _DOMF_debugging 3
417 #define DOMF_debugging (1UL<<_DOMF_debugging)
418 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
419 #define _DOMF_polling 4
420 #define DOMF_polling (1UL<<_DOMF_polling)
421 /* Domain is paused by the hypervisor? */
422 #define _DOMF_paused 5
423 #define DOMF_paused (1UL<<_DOMF_paused)
425 static inline int vcpu_runnable(struct vcpu *v)
426 {
427 return ( !(v->vcpu_flags &
428 ( VCPUF_blocked |
429 VCPUF_down |
430 VCPUF_paused |
431 VCPUF_blocked_in_xen |
432 VCPUF_migrating )) &&
433 !(v->domain->domain_flags &
434 ( DOMF_shutdown |
435 DOMF_ctrl_pause |
436 DOMF_paused )));
437 }
439 void vcpu_pause(struct vcpu *v);
440 void vcpu_pause_nosync(struct vcpu *v);
441 void domain_pause(struct domain *d);
442 void vcpu_unpause(struct vcpu *v);
443 void domain_unpause(struct domain *d);
444 void domain_pause_by_systemcontroller(struct domain *d);
445 void domain_unpause_by_systemcontroller(struct domain *d);
446 void cpu_init(void);
448 int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
450 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
452 static inline void vcpu_unblock(struct vcpu *v)
453 {
454 if ( test_and_clear_bit(_VCPUF_blocked, &v->vcpu_flags) )
455 vcpu_wake(v);
456 }
458 #define IS_PRIV(_d) ((_d)->is_privileged)
460 #define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
462 #define is_hvm_domain(d) ((d)->is_hvm)
463 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
465 #endif /* __SCHED_H__ */
467 /*
468 * Local variables:
469 * mode: C
470 * c-set-style: "BSD"
471 * c-basic-offset: 4
472 * tab-width: 4
473 * indent-tabs-mode: nil
474 * End:
475 */