ia64/xen-unstable

view xen/common/sched_sedf.c @ 19835:edfdeb150f27

Fix buildsystem to detect udev > version 124

udev removed the udevinfo symlink from versions higher than 123 and
xen's build-system could not detect if udev is in place and has the
required version.

Signed-off-by: Marc-A. Dahlhaus <mad@wol.de>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 25 13:02:37 2009 +0100 (2009-06-25)
parents 2f9e1348aa98
children
line source
1 /******************************************************************************
2 * Simple EDF scheduler for xen
3 *
4 * by Stephan Diestelhorst (C) 2004 Cambridge University
5 * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
6 */
8 #include <xen/lib.h>
9 #include <xen/sched.h>
10 #include <xen/sched-if.h>
11 #include <xen/timer.h>
12 #include <xen/softirq.h>
13 #include <xen/time.h>
14 #include <xen/errno.h>
16 /*verbosity settings*/
17 #define SEDFLEVEL 0
18 #define PRINT(_f, _a...) \
19 do { \
20 if ( (_f) <= SEDFLEVEL ) \
21 printk(_a ); \
22 } while ( 0 )
24 #ifndef NDEBUG
25 #define SEDF_STATS
26 #define CHECK(_p) \
27 do { \
28 if ( !(_p) ) \
29 printk("Check '%s' failed, line %d, file %s\n", \
30 #_p , __LINE__, __FILE__); \
31 } while ( 0 )
32 #else
33 #define CHECK(_p) ((void)0)
34 #endif
36 #define EXTRA_NONE (0)
37 #define EXTRA_AWARE (1)
38 #define EXTRA_RUN_PEN (2)
39 #define EXTRA_RUN_UTIL (4)
40 #define EXTRA_WANT_PEN_Q (8)
41 #define EXTRA_PEN_Q (0)
42 #define EXTRA_UTIL_Q (1)
43 #define SEDF_ASLEEP (16)
45 #define EXTRA_QUANTUM (MICROSECS(500))
46 #define WEIGHT_PERIOD (MILLISECS(100))
47 #define WEIGHT_SAFETY (MILLISECS(5))
49 #define PERIOD_MAX MILLISECS(10000) /* 10s */
50 #define PERIOD_MIN (MICROSECS(10)) /* 10us */
51 #define SLICE_MIN (MICROSECS(5)) /* 5us */
53 #define IMPLY(a, b) (!(a) || (b))
54 #define EQ(a, b) ((!!(a)) == (!!(b)))
57 struct sedf_dom_info {
58 struct domain *domain;
59 };
61 struct sedf_vcpu_info {
62 struct vcpu *vcpu;
63 struct list_head list;
64 struct list_head extralist[2];
66 /*Parameters for EDF*/
67 s_time_t period; /*=(relative deadline)*/
68 s_time_t slice; /*=worst case execution time*/
70 /*Advaced Parameters*/
71 /*Latency Scaling*/
72 s_time_t period_orig;
73 s_time_t slice_orig;
74 s_time_t latency;
76 /*status of domain*/
77 int status;
78 /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
79 short weight;
80 short extraweight;
81 /*Bookkeeping*/
82 s_time_t deadl_abs;
83 s_time_t sched_start_abs;
84 s_time_t cputime;
85 /* times the domain un-/blocked */
86 s_time_t block_abs;
87 s_time_t unblock_abs;
89 /*scores for {util, block penalty}-weighted extratime distribution*/
90 int score[2];
91 s_time_t short_block_lost_tot;
93 /*Statistics*/
94 s_time_t extra_time_tot;
96 #ifdef SEDF_STATS
97 s_time_t block_time_tot;
98 s_time_t penalty_time_tot;
99 int block_tot;
100 int short_block_tot;
101 int long_block_tot;
102 int short_cont;
103 int pen_extra_blocks;
104 int pen_extra_slices;
105 #endif
106 };
108 struct sedf_cpu_info {
109 struct list_head runnableq;
110 struct list_head waitq;
111 struct list_head extraq[2];
112 s_time_t current_slice_expires;
113 };
115 #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
116 #define CPU_INFO(cpu) \
117 ((struct sedf_cpu_info *)per_cpu(schedule_data, cpu).sched_priv)
118 #define LIST(d) (&EDOM_INFO(d)->list)
119 #define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
120 #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
121 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
122 #define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
123 #define IDLETASK(cpu) ((struct vcpu *)per_cpu(schedule_data, cpu).idle)
125 #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
127 #define MIN(x,y) (((x)<(y))?(x):(y))
128 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
130 #define extra_runs(inf) ((inf->status) & 6)
131 #define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
132 #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
135 static void sedf_dump_cpu_state(int i);
137 static inline int extraq_on(struct vcpu *d, int i)
138 {
139 return ((EXTRALIST(d,i)->next != NULL) &&
140 (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
141 }
143 static inline void extraq_add_head(struct vcpu *d, int i)
144 {
145 list_add(EXTRALIST(d,i), EXTRAQ(d->processor,i));
146 ASSERT(extraq_on(d, i));
147 }
149 static inline void extraq_add_tail(struct vcpu *d, int i)
150 {
151 list_add_tail(EXTRALIST(d,i), EXTRAQ(d->processor,i));
152 ASSERT(extraq_on(d, i));
153 }
155 static inline void extraq_del(struct vcpu *d, int i)
156 {
157 struct list_head *list = EXTRALIST(d,i);
158 ASSERT(extraq_on(d,i));
159 PRINT(3, "Removing domain %i.%i from L%i extraq\n",
160 d->domain->domain_id, d->vcpu_id, i);
161 list_del(list);
162 list->next = NULL;
163 ASSERT(!extraq_on(d, i));
164 }
166 /* adds a domain to the queue of processes which are aware of extra time. List
167 is sorted by score, where a lower score means higher priority for an extra
168 slice. It also updates the score, by simply subtracting a fixed value from
169 each entry, in order to avoid overflow. The algorithm works by simply
170 charging each domain that recieved extratime with an inverse of its weight.
171 */
172 static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
173 {
174 struct list_head *cur;
175 struct sedf_vcpu_info *curinf;
177 ASSERT(!extraq_on(d,i));
179 PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
180 " to L%i extraq\n",
181 d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
182 EDOM_INFO(d)->short_block_lost_tot, i);
184 /*
185 * Iterate through all elements to find our "hole" and on our way
186 * update all the other scores.
187 */
188 list_for_each ( cur, EXTRAQ(d->processor, i) )
189 {
190 curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
191 curinf->score[i] -= sub;
192 if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
193 break;
194 PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
195 curinf->vcpu->domain->domain_id,
196 curinf->vcpu->vcpu_id, curinf->score[i]);
197 }
199 /* cur now contains the element, before which we'll enqueue. */
200 PRINT(3, "\tlist_add to %p\n", cur->prev);
201 list_add(EXTRALIST(d,i),cur->prev);
203 /* Continue updating the extraq. */
204 if ( (cur != EXTRAQ(d->processor,i)) && sub )
205 {
206 for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
207 {
208 curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
209 curinf->score[i] -= sub;
210 PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
211 curinf->vcpu->domain->domain_id,
212 curinf->vcpu->vcpu_id, curinf->score[i]);
213 }
214 }
216 ASSERT(extraq_on(d,i));
217 }
218 static inline void extraq_check(struct vcpu *d)
219 {
220 if ( extraq_on(d, EXTRA_UTIL_Q) )
221 {
222 PRINT(2,"Dom %i.%i is on L1 extraQ\n",
223 d->domain->domain_id, d->vcpu_id);
225 if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
226 !extra_runs(EDOM_INFO(d)) )
227 {
228 extraq_del(d, EXTRA_UTIL_Q);
229 PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
230 d->domain->domain_id, d->vcpu_id);
231 }
232 }
233 else
234 {
235 PRINT(2, "Dom %i.%i is NOT on L1 extraQ\n",
236 d->domain->domain_id,
237 d->vcpu_id);
239 if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
240 {
241 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
242 PRINT(2,"Added dom %i.%i to L1 extraQ\n",
243 d->domain->domain_id, d->vcpu_id);
244 }
245 }
246 }
248 static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
249 {
250 struct sedf_vcpu_info *inf = EDOM_INFO(d);
252 if ( inf->status & EXTRA_AWARE )
253 /* Put on the weighted extraq without updating any scores. */
254 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
255 }
257 static inline int __task_on_queue(struct vcpu *d)
258 {
259 return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
260 }
262 static inline void __del_from_queue(struct vcpu *d)
263 {
264 struct list_head *list = LIST(d);
265 ASSERT(__task_on_queue(d));
266 PRINT(3,"Removing domain %i.%i (bop= %"PRIu64") from runq/waitq\n",
267 d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
268 list_del(list);
269 list->next = NULL;
270 ASSERT(!__task_on_queue(d));
271 }
273 typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
275 static inline void list_insert_sort(
276 struct list_head *list, struct list_head *element, list_comparer comp)
277 {
278 struct list_head *cur;
280 /* Iterate through all elements to find our "hole". */
281 list_for_each( cur, list )
282 if ( comp(element, cur) < 0 )
283 break;
285 /* cur now contains the element, before which we'll enqueue. */
286 PRINT(3,"\tlist_add to %p\n",cur->prev);
287 list_add(element, cur->prev);
288 }
290 #define DOMAIN_COMPARER(name, field, comp1, comp2) \
291 static int name##_comp(struct list_head* el1, struct list_head* el2) \
292 { \
293 struct sedf_vcpu_info *d1, *d2; \
294 d1 = list_entry(el1,struct sedf_vcpu_info, field); \
295 d2 = list_entry(el2,struct sedf_vcpu_info, field); \
296 if ( (comp1) == (comp2) ) \
297 return 0; \
298 if ( (comp1) < (comp2) ) \
299 return -1; \
300 else \
301 return 1; \
302 }
304 /* adds a domain to the queue of processes which wait for the beginning of the
305 next period; this list is therefore sortet by this time, which is simply
306 absol. deadline - period
307 */
308 DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2));
309 static inline void __add_to_waitqueue_sort(struct vcpu *v)
310 {
311 ASSERT(!__task_on_queue(v));
312 PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
313 v->domain->domain_id, v->vcpu_id, PERIOD_BEGIN(EDOM_INFO(v)));
314 list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp);
315 ASSERT(__task_on_queue(v));
316 }
318 /* adds a domain to the queue of processes which have started their current
319 period and are runnable (i.e. not blocked, dieing,...). The first element
320 on this list is running on the processor, if the list is empty the idle
321 task will run. As we are implementing EDF, this list is sorted by deadlines.
322 */
323 DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs);
324 static inline void __add_to_runqueue_sort(struct vcpu *v)
325 {
326 PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
327 v->domain->domain_id, v->vcpu_id, EDOM_INFO(v)->deadl_abs);
328 list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
329 }
332 static int sedf_init_vcpu(struct vcpu *v)
333 {
334 struct sedf_vcpu_info *inf;
336 if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
337 return -1;
338 memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
340 inf = EDOM_INFO(v);
341 inf->vcpu = v;
343 /* Allocate per-CPU context if this is the first domain to be added. */
344 if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
345 {
346 per_cpu(schedule_data, v->processor).sched_priv =
347 xmalloc(struct sedf_cpu_info);
348 BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL);
349 memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
350 INIT_LIST_HEAD(WAITQ(v->processor));
351 INIT_LIST_HEAD(RUNQ(v->processor));
352 INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
353 INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
354 }
356 /* Every VCPU gets an equal share of extratime by default. */
357 inf->deadl_abs = 0;
358 inf->latency = 0;
359 inf->status = EXTRA_AWARE | SEDF_ASLEEP;
360 inf->extraweight = 1;
362 if ( v->domain->domain_id == 0 )
363 {
364 /* Domain0 gets 75% guaranteed (15ms every 20ms). */
365 inf->period = MILLISECS(20);
366 inf->slice = MILLISECS(15);
367 }
368 else
369 {
370 /* Best-effort extratime only. */
371 inf->period = WEIGHT_PERIOD;
372 inf->slice = 0;
373 }
375 inf->period_orig = inf->period; inf->slice_orig = inf->slice;
376 INIT_LIST_HEAD(&(inf->list));
377 INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
378 INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
380 if ( !is_idle_vcpu(v) )
381 {
382 extraq_check(v);
383 }
384 else
385 {
386 EDOM_INFO(v)->deadl_abs = 0;
387 EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
388 }
390 return 0;
391 }
393 static void sedf_destroy_vcpu(struct vcpu *v)
394 {
395 xfree(v->sched_priv);
396 }
398 static int sedf_init_domain(struct domain *d)
399 {
400 d->sched_priv = xmalloc(struct sedf_dom_info);
401 if ( d->sched_priv == NULL )
402 return -ENOMEM;
404 memset(d->sched_priv, 0, sizeof(struct sedf_dom_info));
406 return 0;
407 }
409 static void sedf_destroy_domain(struct domain *d)
410 {
411 xfree(d->sched_priv);
412 }
414 static int sedf_pick_cpu(struct vcpu *v)
415 {
416 cpumask_t online_affinity;
418 cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
419 return first_cpu(online_affinity);
420 }
422 /*
423 * Handles the rescheduling & bookkeeping of domains running in their
424 * guaranteed timeslice.
425 */
426 static void desched_edf_dom(s_time_t now, struct vcpu* d)
427 {
428 struct sedf_vcpu_info* inf = EDOM_INFO(d);
430 /* Current domain is running in real time mode. */
431 ASSERT(__task_on_queue(d));
433 /* Update the domain's cputime. */
434 inf->cputime += now - inf->sched_start_abs;
436 /*
437 * Scheduling decisions which don't remove the running domain from the
438 * runq.
439 */
440 if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
441 return;
443 __del_from_queue(d);
445 /*
446 * Manage bookkeeping (i.e. calculate next deadline, memorise
447 * overrun-time of slice) of finished domains.
448 */
449 if ( inf->cputime >= inf->slice )
450 {
451 inf->cputime -= inf->slice;
453 if ( inf->period < inf->period_orig )
454 {
455 /* This domain runs in latency scaling or burst mode. */
456 inf->period *= 2;
457 inf->slice *= 2;
458 if ( (inf->period > inf->period_orig) ||
459 (inf->slice > inf->slice_orig) )
460 {
461 /* Reset slice and period. */
462 inf->period = inf->period_orig;
463 inf->slice = inf->slice_orig;
464 }
465 }
467 /* Set next deadline. */
468 inf->deadl_abs += inf->period;
469 }
471 /* Add a runnable domain to the waitqueue. */
472 if ( sedf_runnable(d) )
473 {
474 __add_to_waitqueue_sort(d);
475 }
476 else
477 {
478 /* We have a blocked realtime task -> remove it from exqs too. */
479 if ( extraq_on(d, EXTRA_PEN_Q) )
480 extraq_del(d, EXTRA_PEN_Q);
481 if ( extraq_on(d, EXTRA_UTIL_Q) )
482 extraq_del(d, EXTRA_UTIL_Q);
483 }
485 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
486 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
487 sedf_runnable(d)));
488 }
491 /* Update all elements on the queues */
492 static void update_queues(
493 s_time_t now, struct list_head *runq, struct list_head *waitq)
494 {
495 struct list_head *cur, *tmp;
496 struct sedf_vcpu_info *curinf;
498 PRINT(3,"Updating waitq..\n");
500 /*
501 * Check for the first elements of the waitqueue, whether their
502 * next period has already started.
503 */
504 list_for_each_safe ( cur, tmp, waitq )
505 {
506 curinf = list_entry(cur, struct sedf_vcpu_info, list);
507 PRINT(4,"\tLooking @ dom %i.%i\n",
508 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
509 if ( PERIOD_BEGIN(curinf) > now )
510 break;
511 __del_from_queue(curinf->vcpu);
512 __add_to_runqueue_sort(curinf->vcpu);
513 }
515 PRINT(3,"Updating runq..\n");
517 /* Process the runq, find domains that are on the runq that shouldn't. */
518 list_for_each_safe ( cur, tmp, runq )
519 {
520 curinf = list_entry(cur,struct sedf_vcpu_info,list);
521 PRINT(4,"\tLooking @ dom %i.%i\n",
522 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
524 if ( unlikely(curinf->slice == 0) )
525 {
526 /* Ignore domains with empty slice. */
527 PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
528 curinf->vcpu->domain->domain_id,
529 curinf->vcpu->vcpu_id);
530 __del_from_queue(curinf->vcpu);
532 /* Move them to their next period. */
533 curinf->deadl_abs += curinf->period;
535 /* Ensure that the start of the next period is in the future. */
536 if ( unlikely(PERIOD_BEGIN(curinf) < now) )
537 curinf->deadl_abs +=
538 (DIV_UP(now - PERIOD_BEGIN(curinf),
539 curinf->period)) * curinf->period;
541 /* Put them back into the queue. */
542 __add_to_waitqueue_sort(curinf->vcpu);
543 }
544 else if ( unlikely((curinf->deadl_abs < now) ||
545 (curinf->cputime > curinf->slice)) )
546 {
547 /*
548 * We missed the deadline or the slice was already finished.
549 * Might hapen because of dom_adj.
550 */
551 PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
552 "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
553 " cputime: %"PRIu64"\n",
554 curinf->vcpu->domain->domain_id,
555 curinf->vcpu->vcpu_id,
556 curinf->deadl_abs, curinf->slice, now,
557 curinf->cputime);
558 __del_from_queue(curinf->vcpu);
560 /* Common case: we miss one period. */
561 curinf->deadl_abs += curinf->period;
563 /*
564 * If we are still behind: modulo arithmetic, force deadline
565 * to be in future and aligned to period borders.
566 */
567 if ( unlikely(curinf->deadl_abs < now) )
568 curinf->deadl_abs +=
569 DIV_UP(now - curinf->deadl_abs,
570 curinf->period) * curinf->period;
571 ASSERT(curinf->deadl_abs >= now);
573 /* Give a fresh slice. */
574 curinf->cputime = 0;
575 if ( PERIOD_BEGIN(curinf) > now )
576 __add_to_waitqueue_sort(curinf->vcpu);
577 else
578 __add_to_runqueue_sort(curinf->vcpu);
579 }
580 else
581 break;
582 }
584 PRINT(3,"done updating the queues\n");
585 }
588 /* removes a domain from the head of the according extraQ and
589 requeues it at a specified position:
590 round-robin extratime: end of extraQ
591 weighted ext.: insert in sorted list by score
592 if the domain is blocked / has regained its short-block-loss
593 time it is not put on any queue */
594 static void desched_extra_dom(s_time_t now, struct vcpu *d)
595 {
596 struct sedf_vcpu_info *inf = EDOM_INFO(d);
597 int i = extra_get_cur_q(inf);
598 unsigned long oldscore;
600 ASSERT(extraq_on(d, i));
602 /* Unset all running flags. */
603 inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
604 /* Fresh slice for the next run. */
605 inf->cputime = 0;
606 /* Accumulate total extratime. */
607 inf->extra_time_tot += now - inf->sched_start_abs;
608 /* Remove extradomain from head of the queue. */
609 extraq_del(d, i);
611 /* Update the score. */
612 oldscore = inf->score[i];
613 if ( i == EXTRA_PEN_Q )
614 {
615 /*domain was running in L0 extraq*/
616 /*reduce block lost, probably more sophistication here!*/
617 /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
618 inf->short_block_lost_tot -= now - inf->sched_start_abs;
619 PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
620 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
621 inf->short_block_lost_tot);
622 #if 0
623 /*
624 * KAF: If we don't exit short-blocking state at this point
625 * domain0 can steal all CPU for up to 10 seconds before
626 * scheduling settles down (when competing against another
627 * CPU-bound domain). Doing this seems to make things behave
628 * nicely. Noone gets starved by default.
629 */
630 if ( inf->short_block_lost_tot <= 0 )
631 #endif
632 {
633 PRINT(4,"Domain %i.%i compensated short block loss!\n",
634 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
635 /*we have (over-)compensated our block penalty*/
636 inf->short_block_lost_tot = 0;
637 /*we don't want a place on the penalty queue anymore!*/
638 inf->status &= ~EXTRA_WANT_PEN_Q;
639 goto check_extra_queues;
640 }
642 /*we have to go again for another try in the block-extraq,
643 the score is not used incremantally here, as this is
644 already done by recalculating the block_lost*/
645 inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
646 inf->short_block_lost_tot;
647 oldscore = 0;
648 }
649 else
650 {
651 /*domain was running in L1 extraq => score is inverse of
652 utilization and is used somewhat incremental!*/
653 if ( !inf->extraweight )
654 /*NB: use fixed point arithmetic with 10 bits*/
655 inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
656 inf->slice;
657 else
658 /*conversion between realtime utilisation and extrawieght:
659 full (ie 100%) utilization is equivalent to 128 extraweight*/
660 inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
661 }
663 check_extra_queues:
664 /* Adding a runnable domain to the right queue and removing blocked ones*/
665 if ( sedf_runnable(d) )
666 {
667 /*add according to score: weighted round robin*/
668 if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
669 ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
670 extraq_add_sort_update(d, i, oldscore);
671 }
672 else
673 {
674 /*remove this blocked domain from the waitq!*/
675 __del_from_queue(d);
676 /*make sure that we remove a blocked domain from the other
677 extraq too*/
678 if ( i == EXTRA_PEN_Q )
679 {
680 if ( extraq_on(d, EXTRA_UTIL_Q) )
681 extraq_del(d, EXTRA_UTIL_Q);
682 }
683 else
684 {
685 if ( extraq_on(d, EXTRA_PEN_Q) )
686 extraq_del(d, EXTRA_PEN_Q);
687 }
688 }
690 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
691 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
692 sedf_runnable(d)));
693 }
696 static struct task_slice sedf_do_extra_schedule(
697 s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
698 {
699 struct task_slice ret;
700 struct sedf_vcpu_info *runinf;
701 ASSERT(end_xt > now);
703 /* Enough time left to use for extratime? */
704 if ( end_xt - now < EXTRA_QUANTUM )
705 goto return_idle;
707 if ( !list_empty(extraq[EXTRA_PEN_Q]) )
708 {
709 /*we still have elements on the level 0 extraq
710 => let those run first!*/
711 runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
712 struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
713 runinf->status |= EXTRA_RUN_PEN;
714 ret.task = runinf->vcpu;
715 ret.time = EXTRA_QUANTUM;
716 #ifdef SEDF_STATS
717 runinf->pen_extra_slices++;
718 #endif
719 }
720 else
721 {
722 if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
723 {
724 /*use elements from the normal extraqueue*/
725 runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
726 struct sedf_vcpu_info,
727 extralist[EXTRA_UTIL_Q]);
728 runinf->status |= EXTRA_RUN_UTIL;
729 ret.task = runinf->vcpu;
730 ret.time = EXTRA_QUANTUM;
731 }
732 else
733 goto return_idle;
734 }
736 ASSERT(ret.time > 0);
737 ASSERT(sedf_runnable(ret.task));
738 return ret;
740 return_idle:
741 ret.task = IDLETASK(cpu);
742 ret.time = end_xt - now;
743 ASSERT(ret.time > 0);
744 ASSERT(sedf_runnable(ret.task));
745 return ret;
746 }
749 /* Main scheduling function
750 Reasons for calling this function are:
751 -timeslice for the current period used up
752 -domain on waitqueue has started it's period
753 -and various others ;) in general: determine which domain to run next*/
754 static struct task_slice sedf_do_schedule(s_time_t now)
755 {
756 int cpu = smp_processor_id();
757 struct list_head *runq = RUNQ(cpu);
758 struct list_head *waitq = WAITQ(cpu);
759 struct sedf_vcpu_info *inf = EDOM_INFO(current);
760 struct list_head *extraq[] = {
761 EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
762 struct sedf_vcpu_info *runinf, *waitinf;
763 struct task_slice ret;
765 /*idle tasks don't need any of the following stuf*/
766 if ( is_idle_vcpu(current) )
767 goto check_waitq;
769 /* create local state of the status of the domain, in order to avoid
770 inconsistent state during scheduling decisions, because data for
771 vcpu_runnable is not protected by the scheduling lock!*/
772 if ( !vcpu_runnable(current) )
773 inf->status |= SEDF_ASLEEP;
775 if ( inf->status & SEDF_ASLEEP )
776 inf->block_abs = now;
778 if ( unlikely(extra_runs(inf)) )
779 {
780 /*special treatment of domains running in extra time*/
781 desched_extra_dom(now, current);
782 }
783 else
784 {
785 desched_edf_dom(now, current);
786 }
787 check_waitq:
788 update_queues(now, runq, waitq);
790 /*now simply pick the first domain from the runqueue, which has the
791 earliest deadline, because the list is sorted*/
793 if ( !list_empty(runq) )
794 {
795 runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
796 ret.task = runinf->vcpu;
797 if ( !list_empty(waitq) )
798 {
799 waitinf = list_entry(waitq->next,
800 struct sedf_vcpu_info,list);
801 /*rerun scheduler, when scheduled domain reaches it's
802 end of slice or the first domain from the waitqueue
803 gets ready*/
804 ret.time = MIN(now + runinf->slice - runinf->cputime,
805 PERIOD_BEGIN(waitinf)) - now;
806 }
807 else
808 {
809 ret.time = runinf->slice - runinf->cputime;
810 }
811 CHECK(ret.time > 0);
812 goto sched_done;
813 }
815 if ( !list_empty(waitq) )
816 {
817 waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
818 /*we could not find any suitable domain
819 => look for domains that are aware of extratime*/
820 ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
821 extraq, cpu);
822 CHECK(ret.time > 0);
823 }
824 else
825 {
826 /*this could probably never happen, but one never knows...*/
827 /*it can... imagine a second CPU, which is pure scifi ATM,
828 but one never knows ;)*/
829 ret.task = IDLETASK(cpu);
830 ret.time = SECONDS(1);
831 }
833 sched_done:
834 /*TODO: Do something USEFUL when this happens and find out, why it
835 still can happen!!!*/
836 if ( ret.time < 0)
837 {
838 printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
839 ret.time);
840 ret.time = EXTRA_QUANTUM;
841 }
843 EDOM_INFO(ret.task)->sched_start_abs = now;
844 CHECK(ret.time > 0);
845 ASSERT(sedf_runnable(ret.task));
846 CPU_INFO(cpu)->current_slice_expires = now + ret.time;
847 return ret;
848 }
851 static void sedf_sleep(struct vcpu *d)
852 {
853 PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
854 d->domain->domain_id, d->vcpu_id);
856 if ( is_idle_vcpu(d) )
857 return;
859 EDOM_INFO(d)->status |= SEDF_ASLEEP;
861 if ( per_cpu(schedule_data, d->processor).curr == d )
862 {
863 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
864 }
865 else
866 {
867 if ( __task_on_queue(d) )
868 __del_from_queue(d);
869 if ( extraq_on(d, EXTRA_UTIL_Q) )
870 extraq_del(d, EXTRA_UTIL_Q);
871 if ( extraq_on(d, EXTRA_PEN_Q) )
872 extraq_del(d, EXTRA_PEN_Q);
873 }
874 }
877 /* This function wakes up a domain, i.e. moves them into the waitqueue
878 * things to mention are: admission control is taking place nowhere at
879 * the moment, so we can't be sure, whether it is safe to wake the domain
880 * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
881 * some considerations on when to allow the domain to wake up and have it's
882 * first deadline...
883 * I detected 3 cases, which could describe the possible behaviour of the
884 * scheduler,
885 * and I'll try to make them more clear:
886 *
887 * 1. Very conservative
888 * -when a blocked domain unblocks, it is allowed to start execution at
889 * the beginning of the next complete period
890 * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
891 *
892 * DRRB_____D__U_____DRRRRR___D________ ...
893 *
894 * -this causes the domain to miss a period (and a deadlline)
895 * -doesn't disturb the schedule at all
896 * -deadlines keep occuring isochronous
897 *
898 * 2. Conservative Part 1: Short Unblocking
899 * -when a domain unblocks in the same period as it was blocked it
900 * unblocks and may consume the rest of it's original time-slice minus
901 * the time it was blocked
902 * (assume period=9, slice=5)
903 *
904 * DRB_UR___DRRRRR___D...
905 *
906 * -this also doesn't disturb scheduling, but might lead to the fact, that
907 * the domain can't finish it's workload in the period
908 * -in addition to that the domain can be treated prioritised when
909 * extratime is available
910 * -addition: experiments have shown that this may have a HUGE impact on
911 * performance of other domains, becaus it can lead to excessive context
912 * switches
913 *
914 * Part2: Long Unblocking
915 * Part 2a
916 * -it is obvious that such accounting of block time, applied when
917 * unblocking is happening in later periods, works fine aswell
918 * -the domain is treated as if it would have been running since the start
919 * of its new period
920 *
921 * DRB______D___UR___D...
922 *
923 * Part 2b
924 * -if one needs the full slice in the next period, it is necessary to
925 * treat the unblocking time as the start of the new period, i.e. move
926 * the deadline further back (later)
927 * -this doesn't disturb scheduling as well, because for EDF periods can
928 * be treated as minimal inter-release times and scheduling stays
929 * correct, when deadlines are kept relative to the time the process
930 * unblocks
931 *
932 * DRB______D___URRRR___D...<prev [Thread] next>
933 * (D) <- old deadline was here
934 * -problem: deadlines don't occur isochronous anymore
935 * Part 2c (Improved Atropos design)
936 * -when a domain unblocks it is given a very short period (=latency hint)
937 * and slice length scaled accordingly
938 * -both rise again to the original value (e.g. get doubled every period)
939 *
940 * 3. Unconservative (i.e. incorrect)
941 * -to boost the performance of I/O dependent domains it would be possible
942 * to put the domain into the runnable queue immediately, and let it run
943 * for the remainder of the slice of the current period
944 * (or even worse: allocate a new full slice for the domain)
945 * -either behaviour can lead to missed deadlines in other domains as
946 * opposed to approaches 1,2a,2b
947 */
948 static void unblock_short_extra_support(
949 struct sedf_vcpu_info* inf, s_time_t now)
950 {
951 /*this unblocking scheme tries to support the domain, by assigning it
952 a priority in extratime distribution according to the loss of time
953 in this slice due to blocking*/
954 s_time_t pen;
956 /*no more realtime execution in this period!*/
957 inf->deadl_abs += inf->period;
958 if ( likely(inf->block_abs) )
959 {
960 //treat blocked time as consumed by the domain*/
961 /*inf->cputime += now - inf->block_abs;*/
962 /*penalty is time the domain would have
963 had if it continued to run */
964 pen = (inf->slice - inf->cputime);
965 if ( pen < 0 )
966 pen = 0;
967 /*accumulate all penalties over the periods*/
968 /*inf->short_block_lost_tot += pen;*/
969 /*set penalty to the current value*/
970 inf->short_block_lost_tot = pen;
971 /*not sure which one is better.. but seems to work well...*/
973 if ( inf->short_block_lost_tot )
974 {
975 inf->score[0] = (inf->period << 10) /
976 inf->short_block_lost_tot;
977 #ifdef SEDF_STATS
978 inf->pen_extra_blocks++;
979 #endif
980 if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
981 /*remove domain for possible resorting!*/
982 extraq_del(inf->vcpu, EXTRA_PEN_Q);
983 else
984 /*remember that we want to be on the penalty q
985 so that we can continue when we (un-)block
986 in penalty-extratime*/
987 inf->status |= EXTRA_WANT_PEN_Q;
989 /*(re-)add domain to the penalty extraq*/
990 extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
991 }
992 }
994 /*give it a fresh slice in the next period!*/
995 inf->cputime = 0;
996 }
999 static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
1001 /*Conservative 2b*/
1002 /*Treat the unblocking time as a start of a new period */
1003 inf->deadl_abs = now + inf->period;
1004 inf->cputime = 0;
1008 #define DOMAIN_EDF 1
1009 #define DOMAIN_EXTRA_PEN 2
1010 #define DOMAIN_EXTRA_UTIL 3
1011 #define DOMAIN_IDLE 4
1012 static inline int get_run_type(struct vcpu* d)
1014 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1015 if (is_idle_vcpu(d))
1016 return DOMAIN_IDLE;
1017 if (inf->status & EXTRA_RUN_PEN)
1018 return DOMAIN_EXTRA_PEN;
1019 if (inf->status & EXTRA_RUN_UTIL)
1020 return DOMAIN_EXTRA_UTIL;
1021 return DOMAIN_EDF;
1025 /*Compares two domains in the relation of whether the one is allowed to
1026 interrupt the others execution.
1027 It returns true (!=0) if a switch to the other domain is good.
1028 Current Priority scheme is as follows:
1029 EDF > L0 (penalty based) extra-time >
1030 L1 (utilization) extra-time > idle-domain
1031 In the same class priorities are assigned as following:
1032 EDF: early deadline > late deadline
1033 L0 extra-time: lower score > higher score*/
1034 static inline int should_switch(struct vcpu *cur,
1035 struct vcpu *other,
1036 s_time_t now)
1038 struct sedf_vcpu_info *cur_inf, *other_inf;
1039 cur_inf = EDOM_INFO(cur);
1040 other_inf = EDOM_INFO(other);
1042 /* Check whether we need to make an earlier scheduling decision. */
1043 if ( PERIOD_BEGIN(other_inf) <
1044 CPU_INFO(other->processor)->current_slice_expires )
1045 return 1;
1047 /* No timing-based switches need to be taken into account here. */
1048 switch ( get_run_type(cur) )
1050 case DOMAIN_EDF:
1051 /* Do not interrupt a running EDF domain. */
1052 return 0;
1053 case DOMAIN_EXTRA_PEN:
1054 /* Check whether we also want the L0 ex-q with lower score. */
1055 return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
1056 (other_inf->score[EXTRA_PEN_Q] <
1057 cur_inf->score[EXTRA_PEN_Q]));
1058 case DOMAIN_EXTRA_UTIL:
1059 /* Check whether we want the L0 extraq. Don't
1060 * switch if both domains want L1 extraq.
1061 */
1062 return !!(other_inf->status & EXTRA_WANT_PEN_Q);
1063 case DOMAIN_IDLE:
1064 return 1;
1067 return 1;
1070 static void sedf_wake(struct vcpu *d)
1072 s_time_t now = NOW();
1073 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1075 PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
1076 d->vcpu_id);
1078 if ( unlikely(is_idle_vcpu(d)) )
1079 return;
1081 if ( unlikely(__task_on_queue(d)) )
1083 PRINT(3,"\tdomain %i.%i is already in some queue\n",
1084 d->domain->domain_id, d->vcpu_id);
1085 return;
1088 ASSERT(!sedf_runnable(d));
1089 inf->status &= ~SEDF_ASLEEP;
1090 ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
1091 ASSERT(!extraq_on(d, EXTRA_PEN_Q));
1093 if ( unlikely(inf->deadl_abs == 0) )
1095 /*initial setup of the deadline*/
1096 inf->deadl_abs = now + inf->slice;
1099 PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1100 "now= %"PRIu64")\n",
1101 d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
1103 #ifdef SEDF_STATS
1104 inf->block_tot++;
1105 #endif
1107 if ( unlikely(now < PERIOD_BEGIN(inf)) )
1109 PRINT(4,"extratime unblock\n");
1110 /* unblocking in extra-time! */
1111 if ( inf->status & EXTRA_WANT_PEN_Q )
1113 /*we have a domain that wants compensation
1114 for block penalty and did just block in
1115 its compensation time. Give it another
1116 chance!*/
1117 extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
1119 extraq_check_add_unblocked(d, 0);
1121 else
1123 if ( now < inf->deadl_abs )
1125 PRINT(4,"short unblocking\n");
1126 /*short blocking*/
1127 #ifdef SEDF_STATS
1128 inf->short_block_tot++;
1129 #endif
1130 unblock_short_extra_support(inf, now);
1132 extraq_check_add_unblocked(d, 1);
1134 else
1136 PRINT(4,"long unblocking\n");
1137 /*long unblocking*/
1138 #ifdef SEDF_STATS
1139 inf->long_block_tot++;
1140 #endif
1141 unblock_long_cons_b(inf, now);
1143 extraq_check_add_unblocked(d, 1);
1147 PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1148 "now= %"PRIu64")\n",
1149 d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
1150 inf->period, now);
1152 if ( PERIOD_BEGIN(inf) > now )
1154 __add_to_waitqueue_sort(d);
1155 PRINT(3,"added to waitq\n");
1157 else
1159 __add_to_runqueue_sort(d);
1160 PRINT(3,"added to runq\n");
1163 #ifdef SEDF_STATS
1164 /*do some statistics here...*/
1165 if ( inf->block_abs != 0 )
1167 inf->block_time_tot += now - inf->block_abs;
1168 inf->penalty_time_tot +=
1169 PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
1171 #endif
1173 /*sanity check: make sure each extra-aware domain IS on the util-q!*/
1174 ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
1175 ASSERT(__task_on_queue(d));
1176 /*check whether the awakened task needs to invoke the do_schedule
1177 routine. Try to avoid unnecessary runs but:
1178 Save approximation: Always switch to scheduler!*/
1179 ASSERT(d->processor >= 0);
1180 ASSERT(d->processor < NR_CPUS);
1181 ASSERT(per_cpu(schedule_data, d->processor).curr);
1183 if ( should_switch(per_cpu(schedule_data, d->processor).curr, d, now) )
1184 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
1188 /* Print a lot of useful information about a domains in the system */
1189 static void sedf_dump_domain(struct vcpu *d)
1191 printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
1192 d->is_running ? 'T':'F');
1193 printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
1194 " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
1195 EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
1196 EDOM_INFO(d)->weight,
1197 EDOM_INFO(d)->score[EXTRA_UTIL_Q],
1198 (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
1199 EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
1201 #ifdef SEDF_STATS
1202 if ( EDOM_INFO(d)->block_time_tot != 0 )
1203 printk(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
1204 EDOM_INFO(d)->block_time_tot);
1205 if ( EDOM_INFO(d)->block_tot != 0 )
1206 printk("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
1207 "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
1208 EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
1209 (EDOM_INFO(d)->short_block_tot * 100)
1210 / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
1211 (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
1212 EDOM_INFO(d)->pen_extra_blocks,
1213 EDOM_INFO(d)->pen_extra_slices,
1214 EDOM_INFO(d)->long_block_tot,
1215 (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
1216 (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
1217 (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
1218 #endif
1219 printk("\n");
1223 /* dumps all domains on hte specified cpu */
1224 static void sedf_dump_cpu_state(int i)
1226 struct list_head *list, *queue, *tmp;
1227 struct sedf_vcpu_info *d_inf;
1228 struct domain *d;
1229 struct vcpu *ed;
1230 int loop = 0;
1232 printk("now=%"PRIu64"\n",NOW());
1233 queue = RUNQ(i);
1234 printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1235 (unsigned long) queue->next, (unsigned long) queue->prev);
1236 list_for_each_safe ( list, tmp, queue )
1238 printk("%3d: ",loop++);
1239 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1240 sedf_dump_domain(d_inf->vcpu);
1243 queue = WAITQ(i); loop = 0;
1244 printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1245 (unsigned long) queue->next, (unsigned long) queue->prev);
1246 list_for_each_safe ( list, tmp, queue )
1248 printk("%3d: ",loop++);
1249 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1250 sedf_dump_domain(d_inf->vcpu);
1253 queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
1254 printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
1255 (unsigned long)queue, (unsigned long) queue->next,
1256 (unsigned long) queue->prev);
1257 list_for_each_safe ( list, tmp, queue )
1259 d_inf = list_entry(list, struct sedf_vcpu_info,
1260 extralist[EXTRA_PEN_Q]);
1261 printk("%3d: ",loop++);
1262 sedf_dump_domain(d_inf->vcpu);
1265 queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
1266 printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
1267 (unsigned long)queue, (unsigned long) queue->next,
1268 (unsigned long) queue->prev);
1269 list_for_each_safe ( list, tmp, queue )
1271 d_inf = list_entry(list, struct sedf_vcpu_info,
1272 extralist[EXTRA_UTIL_Q]);
1273 printk("%3d: ",loop++);
1274 sedf_dump_domain(d_inf->vcpu);
1277 loop = 0;
1278 printk("\nnot on Q\n");
1280 rcu_read_lock(&domlist_read_lock);
1281 for_each_domain ( d )
1283 for_each_vcpu(d, ed)
1285 if ( !__task_on_queue(ed) && (ed->processor == i) )
1287 printk("%3d: ",loop++);
1288 sedf_dump_domain(ed);
1292 rcu_read_unlock(&domlist_read_lock);
1296 /* Adjusts periods and slices of the domains accordingly to their weights. */
1297 static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
1299 struct vcpu *p;
1300 struct domain *d;
1301 unsigned int nr_cpus = last_cpu(cpu_possible_map) + 1;
1302 int *sumw = xmalloc_array(int, nr_cpus);
1303 s_time_t *sumt = xmalloc_array(s_time_t, nr_cpus);
1305 if ( !sumw || !sumt )
1307 xfree(sumt);
1308 xfree(sumw);
1309 return -ENOMEM;
1311 memset(sumw, 0, nr_cpus * sizeof(*sumw));
1312 memset(sumt, 0, nr_cpus * sizeof(*sumt));
1314 /* Sum across all weights. */
1315 rcu_read_lock(&domlist_read_lock);
1316 for_each_domain( d )
1318 for_each_vcpu( d, p )
1320 if ( EDOM_INFO(p)->weight )
1322 sumw[p->processor] += EDOM_INFO(p)->weight;
1324 else
1326 /*don't modify domains who don't have a weight, but sum
1327 up the time they need, projected to a WEIGHT_PERIOD,
1328 so that this time is not given to the weight-driven
1329 domains*/
1330 /*check for overflows*/
1331 ASSERT((WEIGHT_PERIOD < ULONG_MAX)
1332 && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
1333 sumt[p->processor] +=
1334 (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
1335 EDOM_INFO(p)->period_orig;
1339 rcu_read_unlock(&domlist_read_lock);
1341 /* Adjust all slices (and periods) to the new weight. */
1342 rcu_read_lock(&domlist_read_lock);
1343 for_each_domain( d )
1345 for_each_vcpu ( d, p )
1347 if ( EDOM_INFO(p)->weight )
1349 EDOM_INFO(p)->period_orig =
1350 EDOM_INFO(p)->period = WEIGHT_PERIOD;
1351 EDOM_INFO(p)->slice_orig =
1352 EDOM_INFO(p)->slice =
1353 (EDOM_INFO(p)->weight *
1354 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
1355 sumw[p->processor];
1359 rcu_read_unlock(&domlist_read_lock);
1361 xfree(sumt);
1362 xfree(sumw);
1364 return 0;
1368 /* set or fetch domain scheduling parameters */
1369 static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
1371 struct vcpu *v;
1372 int rc;
1374 PRINT(2,"sedf_adjust was called, domain-id %i new period %"PRIu64" "
1375 "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
1376 p->domain_id, op->u.sedf.period, op->u.sedf.slice,
1377 op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
1379 if ( !p->vcpu )
1380 return -EINVAL;
1382 if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
1384 /* Check for sane parameters. */
1385 if ( !op->u.sedf.period && !op->u.sedf.weight )
1386 return -EINVAL;
1387 if ( op->u.sedf.weight )
1389 if ( (op->u.sedf.extratime & EXTRA_AWARE) &&
1390 (!op->u.sedf.period) )
1392 /* Weight-driven domains with extratime only. */
1393 for_each_vcpu ( p, v )
1395 EDOM_INFO(v)->extraweight = op->u.sedf.weight;
1396 EDOM_INFO(v)->weight = 0;
1397 EDOM_INFO(v)->slice = 0;
1398 EDOM_INFO(v)->period = WEIGHT_PERIOD;
1401 else
1403 /* Weight-driven domains with real-time execution. */
1404 for_each_vcpu ( p, v )
1405 EDOM_INFO(v)->weight = op->u.sedf.weight;
1408 else
1410 /* Time-driven domains. */
1411 for_each_vcpu ( p, v )
1413 /*
1414 * Sanity checking: note that disabling extra weight requires
1415 * that we set a non-zero slice.
1416 */
1417 if ( (op->u.sedf.period > PERIOD_MAX) ||
1418 (op->u.sedf.period < PERIOD_MIN) ||
1419 (op->u.sedf.slice > op->u.sedf.period) ||
1420 (op->u.sedf.slice < SLICE_MIN) )
1421 return -EINVAL;
1422 EDOM_INFO(v)->weight = 0;
1423 EDOM_INFO(v)->extraweight = 0;
1424 EDOM_INFO(v)->period_orig =
1425 EDOM_INFO(v)->period = op->u.sedf.period;
1426 EDOM_INFO(v)->slice_orig =
1427 EDOM_INFO(v)->slice = op->u.sedf.slice;
1431 rc = sedf_adjust_weights(op);
1432 if ( rc )
1433 return rc;
1435 for_each_vcpu ( p, v )
1437 EDOM_INFO(v)->status =
1438 (EDOM_INFO(v)->status &
1439 ~EXTRA_AWARE) | (op->u.sedf.extratime & EXTRA_AWARE);
1440 EDOM_INFO(v)->latency = op->u.sedf.latency;
1441 extraq_check(v);
1444 else if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
1446 if ( p->vcpu[0] == NULL )
1447 return -EINVAL;
1448 op->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
1449 op->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
1450 op->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
1451 op->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
1452 op->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
1455 PRINT(2,"sedf_adjust_finished\n");
1456 return 0;
1459 struct scheduler sched_sedf_def = {
1460 .name = "Simple EDF Scheduler",
1461 .opt_name = "sedf",
1462 .sched_id = XEN_SCHEDULER_SEDF,
1464 .init_domain = sedf_init_domain,
1465 .destroy_domain = sedf_destroy_domain,
1467 .init_vcpu = sedf_init_vcpu,
1468 .destroy_vcpu = sedf_destroy_vcpu,
1470 .do_schedule = sedf_do_schedule,
1471 .pick_cpu = sedf_pick_cpu,
1472 .dump_cpu_state = sedf_dump_cpu_state,
1473 .sleep = sedf_sleep,
1474 .wake = sedf_wake,
1475 .adjust = sedf_adjust,
1476 };
1478 /*
1479 * Local variables:
1480 * mode: C
1481 * c-set-style: "BSD"
1482 * c-basic-offset: 4
1483 * tab-width: 4
1484 * indent-tabs-mode: nil
1485 * End:
1486 */