ia64/xen-unstable

view xen/common/sched_sedf.c @ 8609:85d693e6f61a

Arch-specific per-vcpu info should be initialised to zero
when allocating a new vcpu structure, not copied from
CPU0's idle VCPU. Especially now that the idle VCPU itself
is dynamically allocated.

This should fix assertions people have been seeing in
getdomain_info_ctxt() relation to IOPL in eflags.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sat Jan 14 21:26:40 2006 +0100 (2006-01-14)
parents c055d76ec559
children 5bf4d9a9694f
line source
1 /******************************************************************************
2 * Simple EDF scheduler for xen
3 *
4 * by Stephan Diestelhorst (C) 2004 Cambridge University
5 * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
6 */
8 #include <xen/lib.h>
9 #include <xen/sched.h>
10 #include <xen/sched-if.h>
11 #include <public/sched_ctl.h>
12 #include <xen/timer.h>
13 #include <xen/softirq.h>
14 #include <xen/time.h>
16 /*verbosity settings*/
17 #define SEDFLEVEL 0
18 #define PRINT(_f, _a...) \
19 if ((_f)<=SEDFLEVEL) printk(_a );
21 #ifndef NDEBUG
22 #define SEDF_STATS
23 #define CHECK(_p) if ( !(_p) ) \
24 { printk("Check '%s' failed, line %d, file %s\n", #_p , __LINE__,\
25 __FILE__);}
26 #else
27 #define CHECK(_p) ((void)0)
28 #endif
30 /*various ways of unblocking domains*/
31 #define UNBLOCK_ISOCHRONOUS_EDF 1
32 #define UNBLOCK_EDF 2
33 #define UNBLOCK_ATROPOS 3
34 #define UNBLOCK_SHORT_RESUME 4
35 #define UNBLOCK_BURST 5
36 #define UNBLOCK_EXTRA_SUPPORT 6
37 #define UNBLOCK UNBLOCK_EXTRA_SUPPORT
39 /*various ways of treating extra-time*/
40 #define EXTRA_OFF 1
41 #define EXTRA_ROUNDR 2
42 #define EXTRA_SLICE_WEIGHT 3
43 #define EXTRA_BLOCK_WEIGHT 4
45 #define EXTRA EXTRA_BLOCK_WEIGHT
47 #define EXTRA_NONE (0)
48 #define EXTRA_AWARE (1)
49 #define EXTRA_RUN_PEN (2)
50 #define EXTRA_RUN_UTIL (4)
51 #define EXTRA_WANT_PEN_Q (8)
52 #define EXTRA_PEN_Q (0)
53 #define EXTRA_UTIL_Q (1)
54 #define SEDF_ASLEEP (16)
56 #define EXTRA_QUANTUM (MICROSECS(500))
57 #define WEIGHT_PERIOD (MILLISECS(100))
58 #define WEIGHT_SAFETY (MILLISECS(5))
60 #define IMPLY(a, b) (!(a) || (b))
61 #define EQ(a, b) ((!!(a)) == (!!(b)))
64 struct sedf_dom_info {
65 struct domain *domain;
66 };
67 struct sedf_vcpu_info
68 {
69 struct vcpu *vcpu;
70 struct list_head list;
71 struct list_head extralist[2];
73 /*Parameters for EDF*/
74 s_time_t period; /*=(relative deadline)*/
75 s_time_t slice; /*=worst case execution time*/
77 /*Advaced Parameters*/
78 /*Latency Scaling*/
79 s_time_t period_orig;
80 s_time_t slice_orig;
81 s_time_t latency;
83 /*status of domain*/
84 int status;
85 /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
86 short weight;
87 short extraweight;
88 /*Bookkeeping*/
89 s_time_t deadl_abs;
90 s_time_t sched_start_abs;
91 s_time_t cputime;
92 /* times the domain un-/blocked */
93 s_time_t block_abs;
94 s_time_t unblock_abs;
96 /*scores for {util, block penalty}-weighted extratime distribution*/
97 int score[2];
98 s_time_t short_block_lost_tot;
100 /*Statistics*/
101 s_time_t extra_time_tot;
103 #ifdef SEDF_STATS
104 s_time_t block_time_tot;
105 s_time_t penalty_time_tot;
106 int block_tot;
107 int short_block_tot;
108 int long_block_tot;
109 int short_cont;
110 int pen_extra_blocks;
111 int pen_extra_slices;
112 #endif
113 };
115 struct sedf_cpu_info {
116 struct list_head runnableq;
117 struct list_head waitq;
118 struct list_head extraq[2];
119 s_time_t current_slice_expires;
120 };
122 #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
123 #define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
124 #define LIST(d) (&EDOM_INFO(d)->list)
125 #define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
126 #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
127 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
128 #define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
129 #define IDLETASK(cpu) ((struct vcpu *)schedule_data[cpu].idle)
131 #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
133 #define MIN(x,y) (((x)<(y))?(x):(y))
134 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
136 #define extra_runs(inf) ((inf->status) & 6)
137 #define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
138 #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
141 static void sedf_dump_cpu_state(int i);
143 static inline int extraq_on(struct vcpu *d, int i) {
144 return ((EXTRALIST(d,i)->next != NULL) &&
145 (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
146 }
148 static inline void extraq_add_head(struct vcpu *d, int i)
149 {
150 list_add(EXTRALIST(d,i), EXTRAQ(d->processor,i));
151 ASSERT(extraq_on(d, i));
152 }
154 static inline void extraq_add_tail(struct vcpu *d, int i)
155 {
156 list_add_tail(EXTRALIST(d,i), EXTRAQ(d->processor,i));
157 ASSERT(extraq_on(d, i));
158 }
160 static inline void extraq_del(struct vcpu *d, int i)
161 {
162 struct list_head *list = EXTRALIST(d,i);
163 ASSERT(extraq_on(d,i));
164 PRINT(3, "Removing domain %i.%i from L%i extraq\n", d->domain->domain_id,
165 d->vcpu_id, i);
166 list_del(list);
167 list->next = NULL;
168 ASSERT(!extraq_on(d, i));
169 }
171 /* adds a domain to the queue of processes which are aware of extra time. List
172 is sorted by score, where a lower score means higher priority for an extra
173 slice. It also updates the score, by simply subtracting a fixed value from
174 each entry, in order to avoid overflow. The algorithm works by simply
175 charging each domain that recieved extratime with an inverse of its weight.
176 */
177 static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub) {
178 struct list_head *cur;
179 struct sedf_vcpu_info *curinf;
181 ASSERT(!extraq_on(d,i));
182 PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
183 " to L%i extraq\n",
184 d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
185 EDOM_INFO(d)->short_block_lost_tot, i);
186 /*iterate through all elements to find our "hole" and on our way
187 update all the other scores*/
188 list_for_each(cur,EXTRAQ(d->processor,i)){
189 curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
190 curinf->score[i] -= sub;
191 if (EDOM_INFO(d)->score[i] < curinf->score[i])
192 break;
193 else
194 PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
195 curinf->vcpu->domain->domain_id,
196 curinf->vcpu->vcpu_id, curinf->score[i]);
197 }
198 /*cur now contains the element, before which we'll enqueue*/
199 PRINT(3, "\tlist_add to %p\n", cur->prev);
200 list_add(EXTRALIST(d,i),cur->prev);
202 /*continue updating the extraq*/
203 if ((cur != EXTRAQ(d->processor,i)) && sub)
204 for (cur = cur->next; cur != EXTRAQ(d->processor,i);
205 cur = cur-> next) {
206 curinf = list_entry(cur,struct sedf_vcpu_info,
207 extralist[i]);
208 curinf->score[i] -= sub;
209 PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
210 curinf->vcpu->domain->domain_id,
211 curinf->vcpu->vcpu_id, curinf->score[i]);
212 }
213 ASSERT(extraq_on(d,i));
214 }
215 static inline void extraq_check(struct vcpu *d) {
216 if (extraq_on(d, EXTRA_UTIL_Q)) {
217 PRINT(2,"Dom %i.%i is on L1 extraQ\n",d->domain->domain_id, d->vcpu_id);
218 if (!(EDOM_INFO(d)->status & EXTRA_AWARE) &&
219 !extra_runs(EDOM_INFO(d))) {
220 extraq_del(d, EXTRA_UTIL_Q);
221 PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
222 d->domain->domain_id, d->vcpu_id);
223 }
224 } else {
225 PRINT(2,"Dom %i.%i is NOT on L1 extraQ\n",d->domain->domain_id,
226 d->vcpu_id);
227 if ((EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d))
228 {
229 #if (EXTRA == EXTRA_ROUNDR)
230 extraq_add_tail(d, EXTRA_UTIL_Q);
231 #elif (EXTRA == EXTRA_SLICE_WEIGHT || \
232 EXTRA == EXTRA_BLOCK_WEIGHT)
233 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
234 #elif
235 ;
236 #endif
237 PRINT(2,"Added dom %i.%i to L1 extraQ\n",d->domain->domain_id,
238 d->vcpu_id);
239 }
240 }
241 }
243 static inline void extraq_check_add_unblocked(struct vcpu *d,
244 int priority) {
245 struct sedf_vcpu_info *inf = EDOM_INFO(d);
246 if (inf->status & EXTRA_AWARE)
247 #if (EXTRA == EXTRA_ROUNDR)
248 if (priority)
249 extraq_add_head(d,EXTRA_UTIL_Q);
250 else
251 extraq_add_tail(d,EXTRA_UTIL_Q);
252 #elif (EXTRA == EXTRA_SLICE_WEIGHT \
253 || EXTRA == EXTRA_BLOCK_WEIGHT)
254 /*put in on the weighted extraq,
255 without updating any scores*/
256 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
257 #else
258 ;
259 #endif
260 }
262 static inline int __task_on_queue(struct vcpu *d) {
263 return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
264 }
265 static inline void __del_from_queue(struct vcpu *d)
266 {
267 struct list_head *list = LIST(d);
268 ASSERT(__task_on_queue(d));
269 PRINT(3,"Removing domain %i.%i (bop= %"PRIu64") from runq/waitq\n",
270 d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
271 list_del(list);
272 list->next = NULL;
273 ASSERT(!__task_on_queue(d));
274 }
276 typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
278 static inline void list_insert_sort(struct list_head *list,
279 struct list_head *element, list_comparer comp) {
280 struct list_head *cur;
281 /*iterate through all elements to find our "hole"*/
282 list_for_each(cur,list){
283 if (comp(element, cur) < 0)
284 break;
285 }
286 /*cur now contains the element, before which we'll enqueue*/
287 PRINT(3,"\tlist_add to %p\n",cur->prev);
288 list_add(element, cur->prev);
289 }
290 #define DOMAIN_COMPARER(name, field, comp1, comp2) \
291 int name##_comp(struct list_head* el1, struct list_head* el2) \
292 { \
293 struct sedf_vcpu_info *d1, *d2; \
294 d1 = list_entry(el1,struct sedf_vcpu_info, field); \
295 d2 = list_entry(el2,struct sedf_vcpu_info, field); \
296 if ((comp1) == (comp2)) \
297 return 0; \
298 if ((comp1) < (comp2)) \
299 return -1; \
300 else \
301 return 1; \
302 }
303 /* adds a domain to the queue of processes which wait for the beginning of the
304 next period; this list is therefore sortet by this time, which is simply
305 absol. deadline - period
306 */
307 DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2))
308 static inline void __add_to_waitqueue_sort(struct vcpu *d) {
309 ASSERT(!__task_on_queue(d));
310 PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
311 d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
312 list_insert_sort(WAITQ(d->processor), LIST(d), waitq_comp);
313 ASSERT(__task_on_queue(d));
314 }
316 /* adds a domain to the queue of processes which have started their current
317 period and are runnable (i.e. not blocked, dieing,...). The first element
318 on this list is running on the processor, if the list is empty the idle
319 task will run. As we are implementing EDF, this list is sorted by deadlines.
320 */
321 DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs)
322 static inline void __add_to_runqueue_sort(struct vcpu *d) {
323 PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
324 d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->deadl_abs);
325 list_insert_sort(RUNQ(d->processor), LIST(d), runq_comp);
326 }
329 /* Allocates memory for per domain private scheduling data*/
330 static int sedf_alloc_task(struct vcpu *d)
331 {
332 PRINT(2, "sedf_alloc_task was called, domain-id %i.%i\n",
333 d->domain->domain_id, d->vcpu_id);
335 if ( d->domain->sched_priv == NULL )
336 {
337 d->domain->sched_priv = xmalloc(struct sedf_dom_info);
338 if ( d->domain->sched_priv == NULL )
339 return -1;
340 memset(d->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
341 }
343 if ( (d->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
344 return -1;
346 memset(d->sched_priv, 0, sizeof(struct sedf_vcpu_info));
348 return 0;
349 }
352 /* Setup the sedf_dom_info */
353 static void sedf_add_task(struct vcpu *d)
354 {
355 struct sedf_vcpu_info *inf = EDOM_INFO(d);
356 inf->vcpu = d;
358 PRINT(2,"sedf_add_task was called, domain-id %i.%i\n",d->domain->domain_id,
359 d->vcpu_id);
361 /* Allocate per-CPU context if this is the first domain to be added. */
362 if ( unlikely(schedule_data[d->processor].sched_priv == NULL) )
363 {
364 schedule_data[d->processor].sched_priv =
365 xmalloc(struct sedf_cpu_info);
366 BUG_ON(schedule_data[d->processor].sched_priv == NULL);
367 memset(CPU_INFO(d->processor), 0, sizeof(*CPU_INFO(d->processor)));
368 INIT_LIST_HEAD(WAITQ(d->processor));
369 INIT_LIST_HEAD(RUNQ(d->processor));
370 INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_PEN_Q));
371 INIT_LIST_HEAD(EXTRAQ(d->processor,EXTRA_UTIL_Q));
372 }
374 if ( d->domain->domain_id == 0 )
375 {
376 /*set dom0 to something useful to boot the machine*/
377 inf->period = MILLISECS(20);
378 inf->slice = MILLISECS(15);
379 inf->latency = 0;
380 inf->deadl_abs = 0;
381 inf->status = EXTRA_AWARE | SEDF_ASLEEP;
382 }
383 else
384 {
385 /*other domains run in best effort mode*/
386 inf->period = WEIGHT_PERIOD;
387 inf->slice = 0;
388 inf->deadl_abs = 0;
389 inf->latency = 0;
390 inf->status = EXTRA_AWARE | SEDF_ASLEEP;
391 inf->extraweight = 1;
392 }
394 inf->period_orig = inf->period; inf->slice_orig = inf->slice;
395 INIT_LIST_HEAD(&(inf->list));
396 INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
397 INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
399 if ( !is_idle_vcpu(d) )
400 {
401 extraq_check(d);
402 }
403 else
404 {
405 EDOM_INFO(d)->deadl_abs = 0;
406 EDOM_INFO(d)->status &= ~SEDF_ASLEEP;
407 }
408 }
410 /* Frees memory used by domain info */
411 static void sedf_free_task(struct domain *d)
412 {
413 int i;
415 PRINT(2,"sedf_free_task was called, domain-id %i\n",d->domain_id);
417 ASSERT(d->sched_priv != NULL);
418 xfree(d->sched_priv);
420 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
421 {
422 if ( d->vcpu[i] )
423 {
424 ASSERT(d->vcpu[i]->sched_priv != NULL);
425 xfree(d->vcpu[i]->sched_priv);
426 }
427 }
428 }
430 /*
431 * Handles the rescheduling & bookkeeping of domains running in their
432 * guaranteed timeslice.
433 */
434 static void desched_edf_dom(s_time_t now, struct vcpu* d)
435 {
436 struct sedf_vcpu_info* inf = EDOM_INFO(d);
437 /*current domain is running in real time mode*/
439 ASSERT(__task_on_queue(d));
440 /*update the domains cputime*/
441 inf->cputime += now - inf->sched_start_abs;
443 /*scheduling decisions, which don't remove the running domain
444 from the runq*/
445 if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
446 return;
448 __del_from_queue(d);
450 /*manage bookkeeping (i.e. calculate next deadline,
451 memorize overun-time of slice) of finished domains*/
452 if ( inf->cputime >= inf->slice )
453 {
454 inf->cputime -= inf->slice;
456 if ( inf->period < inf->period_orig )
457 {
458 /*this domain runs in latency scaling or burst mode*/
459 #if (UNBLOCK == UNBLOCK_BURST)
460 /*if we are runnig in burst scaling wait for two periods
461 before scaling periods up again*/
462 if ( (now - inf->unblock_abs) >= (2 * inf->period) )
463 #endif
464 {
465 inf->period *= 2; inf->slice *= 2;
466 if ( (inf->period > inf->period_orig) ||
467 (inf->slice > inf->slice_orig) )
468 {
469 /*reset slice & period*/
470 inf->period = inf->period_orig;
471 inf->slice = inf->slice_orig;
472 }
473 }
474 }
475 /*set next deadline*/
476 inf->deadl_abs += inf->period;
477 }
479 /*add a runnable domain to the waitqueue*/
480 if ( sedf_runnable(d) )
481 {
482 __add_to_waitqueue_sort(d);
483 }
484 else
485 {
486 /*we have a blocked realtime task -> remove it from exqs too*/
487 #if (EXTRA > EXTRA_OFF)
488 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
489 if ( extraq_on(d, EXTRA_PEN_Q) )
490 extraq_del(d, EXTRA_PEN_Q);
491 #endif
492 if ( extraq_on(d, EXTRA_UTIL_Q) )
493 extraq_del(d, EXTRA_UTIL_Q);
494 #endif
495 }
497 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
498 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
499 sedf_runnable(d)));
500 }
503 /* Update all elements on the queues */
504 static void update_queues(
505 s_time_t now, struct list_head *runq, struct list_head *waitq)
506 {
507 struct list_head *cur, *tmp;
508 struct sedf_vcpu_info *curinf;
510 PRINT(3,"Updating waitq..\n");
512 /*check for the first elements of the waitqueue, whether their
513 next period has already started*/
514 list_for_each_safe(cur, tmp, waitq) {
515 curinf = list_entry(cur, struct sedf_vcpu_info, list);
516 PRINT(4,"\tLooking @ dom %i.%i\n",
517 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
518 if ( PERIOD_BEGIN(curinf) <= now )
519 {
520 __del_from_queue(curinf->vcpu);
521 __add_to_runqueue_sort(curinf->vcpu);
522 }
523 else
524 break;
525 }
527 PRINT(3,"Updating runq..\n");
529 /*process the runq, find domains that are on
530 the runqueue which shouldn't be there*/
531 list_for_each_safe(cur, tmp, runq) {
532 curinf = list_entry(cur,struct sedf_vcpu_info,list);
533 PRINT(4,"\tLooking @ dom %i.%i\n",
534 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
536 if ( unlikely(curinf->slice == 0) )
537 {
538 /*ignore domains with empty slice*/
539 PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
540 curinf->vcpu->domain->domain_id,
541 curinf->vcpu->vcpu_id);
542 __del_from_queue(curinf->vcpu);
544 /*move them to their next period*/
545 curinf->deadl_abs += curinf->period;
546 /*ensure that the start of the next period is in the future*/
547 if ( unlikely(PERIOD_BEGIN(curinf) < now) )
548 {
549 curinf->deadl_abs +=
550 (DIV_UP(now - PERIOD_BEGIN(curinf),
551 curinf->period)) * curinf->period;
552 }
553 /*and put them back into the queue*/
554 __add_to_waitqueue_sort(curinf->vcpu);
555 continue;
556 }
558 if ( unlikely((curinf->deadl_abs < now) ||
559 (curinf->cputime > curinf->slice)) )
560 {
561 /*we missed the deadline or the slice was
562 already finished... might hapen because
563 of dom_adj.*/
564 PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
565 "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
566 " cputime: %"PRIu64"\n",
567 curinf->vcpu->domain->domain_id,
568 curinf->vcpu->vcpu_id,
569 curinf->deadl_abs, curinf->slice, now,
570 curinf->cputime);
571 __del_from_queue(curinf->vcpu);
572 /*common case: we miss one period!*/
573 curinf->deadl_abs += curinf->period;
575 /*if we are still behind: modulo arithmetic,
576 force deadline to be in future and
577 aligned to period borders!*/
578 if (unlikely(curinf->deadl_abs < now))
579 curinf->deadl_abs +=
580 DIV_UP(now - curinf->deadl_abs,
581 curinf->period) * curinf->period;
582 ASSERT(curinf->deadl_abs > now);
583 /*give a fresh slice*/
584 curinf->cputime = 0;
585 if (PERIOD_BEGIN(curinf) > now)
586 __add_to_waitqueue_sort(curinf->vcpu);
587 else
588 __add_to_runqueue_sort(curinf->vcpu);
589 }
590 else
591 break;
592 }
593 PRINT(3,"done updating the queues\n");
594 }
597 #if (EXTRA > EXTRA_OFF)
598 /* removes a domain from the head of the according extraQ and
599 requeues it at a specified position:
600 round-robin extratime: end of extraQ
601 weighted ext.: insert in sorted list by score
602 if the domain is blocked / has regained its short-block-loss
603 time it is not put on any queue */
604 static void desched_extra_dom(s_time_t now, struct vcpu* d)
605 {
606 struct sedf_vcpu_info *inf = EDOM_INFO(d);
607 int i = extra_get_cur_q(inf);
609 #if (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
610 unsigned long oldscore;
611 #endif
612 ASSERT(extraq_on(d, i));
613 /*unset all running flags*/
614 inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
615 /*fresh slice for the next run*/
616 inf->cputime = 0;
617 /*accumulate total extratime*/
618 inf->extra_time_tot += now - inf->sched_start_abs;
619 /*remove extradomain from head of the queue*/
620 extraq_del(d, i);
622 #if (EXTRA == EXTRA_ROUNDR)
623 if ( sedf_runnable(d) && (inf->status & EXTRA_AWARE) )
624 /*add to the tail if it is runnable => round-robin*/
625 extraq_add_tail(d, EXTRA_UTIL_Q);
626 #elif (EXTRA == EXTRA_SLICE_WEIGHT || EXTRA == EXTRA_BLOCK_WEIGHT)
627 /*update the score*/
628 oldscore = inf->score[i];
629 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
630 if ( i == EXTRA_PEN_Q )
631 {
632 /*domain was running in L0 extraq*/
633 /*reduce block lost, probably more sophistication here!*/
634 /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
635 inf->short_block_lost_tot -= now - inf->sched_start_abs;
636 PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
637 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
638 inf->short_block_lost_tot);
639 if (inf->short_block_lost_tot <= 0) {
640 PRINT(4,"Domain %i.%i compensated short block loss!\n",
641 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
642 /*we have (over-)compensated our block penalty*/
643 inf->short_block_lost_tot = 0;
644 /*we don't want a place on the penalty queue anymore!*/
645 inf->status &= ~EXTRA_WANT_PEN_Q;
646 goto check_extra_queues;
647 }
648 /*we have to go again for another try in the block-extraq,
649 the score is not used incremantally here, as this is
650 already done by recalculating the block_lost*/
651 inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
652 inf->short_block_lost_tot;
653 oldscore = 0;
654 }
655 else
656 #endif
657 {
658 /*domain was running in L1 extraq => score is inverse of
659 utilization and is used somewhat incremental!*/
660 if ( !inf->extraweight )
661 /*NB: use fixed point arithmetic with 10 bits*/
662 inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
663 inf->slice;
664 else
665 /*conversion between realtime utilisation and extrawieght:
666 full (ie 100%) utilization is equivalent to 128 extraweight*/
667 inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
668 }
670 check_extra_queues:
671 /* Adding a runnable domain to the right queue and removing blocked ones*/
672 if ( sedf_runnable(d) )
673 {
674 /*add according to score: weighted round robin*/
675 if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
676 ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
677 extraq_add_sort_update(d, i, oldscore);
678 }
679 else
680 {
681 /*remove this blocked domain from the waitq!*/
682 __del_from_queue(d);
683 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
684 /*make sure that we remove a blocked domain from the other
685 extraq too*/
686 if ( i == EXTRA_PEN_Q )
687 {
688 if ( extraq_on(d, EXTRA_UTIL_Q) )
689 extraq_del(d, EXTRA_UTIL_Q);
690 }
691 else
692 {
693 if ( extraq_on(d, EXTRA_PEN_Q) )
694 extraq_del(d, EXTRA_PEN_Q);
695 }
696 #endif
697 }
698 #endif
699 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
700 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
701 sedf_runnable(d)));
702 }
703 #endif
706 static struct task_slice sedf_do_extra_schedule(
707 s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
708 {
709 struct task_slice ret;
710 struct sedf_vcpu_info *runinf;
711 ASSERT(end_xt > now);
713 /* Enough time left to use for extratime? */
714 if ( end_xt - now < EXTRA_QUANTUM )
715 goto return_idle;
717 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
718 if ( !list_empty(extraq[EXTRA_PEN_Q]) )
719 {
720 /*we still have elements on the level 0 extraq
721 => let those run first!*/
722 runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
723 struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
724 runinf->status |= EXTRA_RUN_PEN;
725 ret.task = runinf->vcpu;
726 ret.time = EXTRA_QUANTUM;
727 #ifdef SEDF_STATS
728 runinf->pen_extra_slices++;
729 #endif
730 }
731 else
732 #endif
733 {
734 if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
735 {
736 /*use elements from the normal extraqueue*/
737 runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
738 struct sedf_vcpu_info,
739 extralist[EXTRA_UTIL_Q]);
740 runinf->status |= EXTRA_RUN_UTIL;
741 ret.task = runinf->vcpu;
742 ret.time = EXTRA_QUANTUM;
743 }
744 else
745 goto return_idle;
746 }
748 ASSERT(ret.time > 0);
749 ASSERT(sedf_runnable(ret.task));
750 return ret;
752 return_idle:
753 ret.task = IDLETASK(cpu);
754 ret.time = end_xt - now;
755 ASSERT(ret.time > 0);
756 ASSERT(sedf_runnable(ret.task));
757 return ret;
758 }
761 /* Main scheduling function
762 Reasons for calling this function are:
763 -timeslice for the current period used up
764 -domain on waitqueue has started it's period
765 -and various others ;) in general: determine which domain to run next*/
766 static struct task_slice sedf_do_schedule(s_time_t now)
767 {
768 int cpu = smp_processor_id();
769 struct list_head *runq = RUNQ(cpu);
770 struct list_head *waitq = WAITQ(cpu);
771 #if (EXTRA > EXTRA_OFF)
772 struct sedf_vcpu_info *inf = EDOM_INFO(current);
773 struct list_head *extraq[] = {
774 EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
775 #endif
776 struct sedf_vcpu_info *runinf, *waitinf;
777 struct task_slice ret;
779 /*idle tasks don't need any of the following stuf*/
780 if ( is_idle_vcpu(current) )
781 goto check_waitq;
783 /* create local state of the status of the domain, in order to avoid
784 inconsistent state during scheduling decisions, because data for
785 vcpu_runnable is not protected by the scheduling lock!*/
786 if ( !vcpu_runnable(current) )
787 inf->status |= SEDF_ASLEEP;
789 if ( inf->status & SEDF_ASLEEP )
790 inf->block_abs = now;
792 #if (EXTRA > EXTRA_OFF)
793 if ( unlikely(extra_runs(inf)) )
794 {
795 /*special treatment of domains running in extra time*/
796 desched_extra_dom(now, current);
797 }
798 else
799 #endif
800 {
801 desched_edf_dom(now, current);
802 }
803 check_waitq:
804 update_queues(now, runq, waitq);
806 /*now simply pick the first domain from the runqueue, which has the
807 earliest deadline, because the list is sorted*/
809 if ( !list_empty(runq) )
810 {
811 runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
812 ret.task = runinf->vcpu;
813 if ( !list_empty(waitq) )
814 {
815 waitinf = list_entry(waitq->next,
816 struct sedf_vcpu_info,list);
817 /*rerun scheduler, when scheduled domain reaches it's
818 end of slice or the first domain from the waitqueue
819 gets ready*/
820 ret.time = MIN(now + runinf->slice - runinf->cputime,
821 PERIOD_BEGIN(waitinf)) - now;
822 }
823 else
824 {
825 ret.time = runinf->slice - runinf->cputime;
826 }
827 CHECK(ret.time > 0);
828 goto sched_done;
829 }
831 if ( !list_empty(waitq) )
832 {
833 waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
834 /*we could not find any suitable domain
835 => look for domains that are aware of extratime*/
836 #if (EXTRA > EXTRA_OFF)
837 ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
838 extraq, cpu);
839 #else
840 ret.task = IDLETASK(cpu);
841 ret.time = PERIOD_BEGIN(waitinf) - now;
842 #endif
843 CHECK(ret.time > 0);
844 }
845 else
846 {
847 /*this could probably never happen, but one never knows...*/
848 /*it can... imagine a second CPU, which is pure scifi ATM,
849 but one never knows ;)*/
850 ret.task = IDLETASK(cpu);
851 ret.time = SECONDS(1);
852 }
854 sched_done:
855 /*TODO: Do something USEFUL when this happens and find out, why it
856 still can happen!!!*/
857 if ( ret.time < 0)
858 {
859 printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
860 ret.time);
861 ret.time = EXTRA_QUANTUM;
862 }
864 EDOM_INFO(ret.task)->sched_start_abs = now;
865 CHECK(ret.time > 0);
866 ASSERT(sedf_runnable(ret.task));
867 CPU_INFO(cpu)->current_slice_expires = now + ret.time;
868 return ret;
869 }
872 static void sedf_sleep(struct vcpu *d)
873 {
874 PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
875 d->domain->domain_id, d->vcpu_id);
877 if ( is_idle_vcpu(d) )
878 return;
880 EDOM_INFO(d)->status |= SEDF_ASLEEP;
882 if ( schedule_data[d->processor].curr == d )
883 {
884 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
885 }
886 else
887 {
888 if ( __task_on_queue(d) )
889 __del_from_queue(d);
890 #if (EXTRA > EXTRA_OFF)
891 if ( extraq_on(d, EXTRA_UTIL_Q) )
892 extraq_del(d, EXTRA_UTIL_Q);
893 #endif
894 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
895 if ( extraq_on(d, EXTRA_PEN_Q) )
896 extraq_del(d, EXTRA_PEN_Q);
897 #endif
898 }
899 }
902 /* This function wakes up a domain, i.e. moves them into the waitqueue
903 * things to mention are: admission control is taking place nowhere at
904 * the moment, so we can't be sure, whether it is safe to wake the domain
905 * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
906 * some considerations on when to allow the domain to wake up and have it's
907 * first deadline...
908 * I detected 3 cases, which could describe the possible behaviour of the
909 * scheduler,
910 * and I'll try to make them more clear:
911 *
912 * 1. Very conservative
913 * -when a blocked domain unblocks, it is allowed to start execution at
914 * the beginning of the next complete period
915 * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
916 *
917 * DRRB_____D__U_____DRRRRR___D________ ...
918 *
919 * -this causes the domain to miss a period (and a deadlline)
920 * -doesn't disturb the schedule at all
921 * -deadlines keep occuring isochronous
922 *
923 * 2. Conservative Part 1: Short Unblocking
924 * -when a domain unblocks in the same period as it was blocked it
925 * unblocks and may consume the rest of it's original time-slice minus
926 * the time it was blocked
927 * (assume period=9, slice=5)
928 *
929 * DRB_UR___DRRRRR___D...
930 *
931 * -this also doesn't disturb scheduling, but might lead to the fact, that
932 * the domain can't finish it's workload in the period
933 * -in addition to that the domain can be treated prioritised when
934 * extratime is available
935 * -addition: experiments have shown that this may have a HUGE impact on
936 * performance of other domains, becaus it can lead to excessive context
937 * switches
939 * Part2: Long Unblocking
940 * Part 2a
941 * -it is obvious that such accounting of block time, applied when
942 * unblocking is happening in later periods, works fine aswell
943 * -the domain is treated as if it would have been running since the start
944 * of its new period
945 *
946 * DRB______D___UR___D...
947 *
948 * Part 2b
949 * -if one needs the full slice in the next period, it is necessary to
950 * treat the unblocking time as the start of the new period, i.e. move
951 * the deadline further back (later)
952 * -this doesn't disturb scheduling as well, because for EDF periods can
953 * be treated as minimal inter-release times and scheduling stays
954 * correct, when deadlines are kept relative to the time the process
955 * unblocks
956 *
957 * DRB______D___URRRR___D...<prev [Thread] next>
958 * (D) <- old deadline was here
959 * -problem: deadlines don't occur isochronous anymore
960 * Part 2c (Improved Atropos design)
961 * -when a domain unblocks it is given a very short period (=latency hint)
962 * and slice length scaled accordingly
963 * -both rise again to the original value (e.g. get doubled every period)
964 *
965 * 3. Unconservative (i.e. incorrect)
966 * -to boost the performance of I/O dependent domains it would be possible
967 * to put the domain into the runnable queue immediately, and let it run
968 * for the remainder of the slice of the current period
969 * (or even worse: allocate a new full slice for the domain)
970 * -either behaviour can lead to missed deadlines in other domains as
971 * opposed to approaches 1,2a,2b
972 */
973 #if (UNBLOCK <= UNBLOCK_SHORT_RESUME)
974 static void unblock_short_vcons(struct sedf_vcpu_info* inf, s_time_t now)
975 {
976 inf->deadl_abs += inf->period;
977 inf->cputime = 0;
978 }
979 #endif
981 #if (UNBLOCK == UNBLOCK_SHORT_RESUME)
982 static void unblock_short_cons(struct sedf_vcpu_info* inf, s_time_t now)
983 {
984 /*treat blocked time as consumed by the domain*/
985 inf->cputime += now - inf->block_abs;
986 if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
987 {
988 /*we don't have a reasonable amount of time in
989 our slice left :( => start in next period!*/
990 unblock_short_vcons(inf, now);
991 }
992 #ifdef SEDF_STATS
993 else
994 inf->short_cont++;
995 #endif
996 }
997 #endif
999 static void unblock_short_extra_support(
1000 struct sedf_vcpu_info* inf, s_time_t now)
1002 /*this unblocking scheme tries to support the domain, by assigning it
1003 a priority in extratime distribution according to the loss of time
1004 in this slice due to blocking*/
1005 s_time_t pen;
1007 /*no more realtime execution in this period!*/
1008 inf->deadl_abs += inf->period;
1009 if ( likely(inf->block_abs) )
1011 //treat blocked time as consumed by the domain*/
1012 /*inf->cputime += now - inf->block_abs;*/
1013 /*penalty is time the domain would have
1014 had if it continued to run */
1015 pen = (inf->slice - inf->cputime);
1016 if ( pen < 0 )
1017 pen = 0;
1018 /*accumulate all penalties over the periods*/
1019 /*inf->short_block_lost_tot += pen;*/
1020 /*set penalty to the current value*/
1021 inf->short_block_lost_tot = pen;
1022 /*not sure which one is better.. but seems to work well...*/
1024 if ( inf->short_block_lost_tot )
1026 inf->score[0] = (inf->period << 10) /
1027 inf->short_block_lost_tot;
1028 #ifdef SEDF_STATS
1029 inf->pen_extra_blocks++;
1030 #endif
1031 if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
1032 /*remove domain for possible resorting!*/
1033 extraq_del(inf->vcpu, EXTRA_PEN_Q);
1034 else
1035 /*remember that we want to be on the penalty q
1036 so that we can continue when we (un-)block
1037 in penalty-extratime*/
1038 inf->status |= EXTRA_WANT_PEN_Q;
1040 /*(re-)add domain to the penalty extraq*/
1041 extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
1045 /*give it a fresh slice in the next period!*/
1046 inf->cputime = 0;
1050 #if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
1051 static void unblock_long_vcons(struct sedf_vcpu_info* inf, s_time_t now)
1053 /* align to next future period */
1054 inf->deadl_abs += (DIV_UP(now - inf->deadl_abs, inf->period) +1)
1055 * inf->period;
1056 inf->cputime = 0;
1058 #endif
1061 #if 0
1062 static void unblock_long_cons_a (struct sedf_vcpu_info* inf, s_time_t now)
1064 /*treat the time the domain was blocked in the
1065 CURRENT period as consumed by the domain*/
1066 inf->cputime = (now - inf->deadl_abs) % inf->period;
1067 if ( (inf->cputime + EXTRA_QUANTUM) > inf->slice )
1069 /*we don't have a reasonable amount of time in our slice
1070 left :( => start in next period!*/
1071 unblock_long_vcons(inf, now);
1074 #endif
1077 static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
1079 /*Conservative 2b*/
1080 /*Treat the unblocking time as a start of a new period */
1081 inf->deadl_abs = now + inf->period;
1082 inf->cputime = 0;
1086 #if (UNBLOCK == UNBLOCK_ATROPOS)
1087 static void unblock_long_cons_c(struct sedf_vcpu_info* inf,s_time_t now)
1089 if ( likely(inf->latency) )
1091 /*scale the slice and period accordingly to the latency hint*/
1092 /*reduce period temporarily to the latency hint*/
1093 inf->period = inf->latency;
1094 /*this results in max. 4s slice/period length*/
1095 ASSERT((inf->period < ULONG_MAX)
1096 && (inf->slice_orig < ULONG_MAX));
1097 /*scale slice accordingly, so that utilisation stays the same*/
1098 inf->slice = (inf->period * inf->slice_orig)
1099 / inf->period_orig;
1100 inf->deadl_abs = now + inf->period;
1101 inf->cputime = 0;
1103 else
1105 /*we don't have a latency hint.. use some other technique*/
1106 unblock_long_cons_b(inf, now);
1109 #endif
1112 #if (UNBLOCK == UNBLOCK_BURST)
1113 /*a new idea of dealing with short blocks: burst period scaling*/
1114 static void unblock_short_burst(struct sedf_vcpu_info* inf, s_time_t now)
1116 /*treat blocked time as consumed by the domain*/
1117 inf->cputime += now - inf->block_abs;
1119 if ( (inf->cputime + EXTRA_QUANTUM) <= inf->slice )
1121 /*if we can still use some time in the current slice
1122 then use it!*/
1123 #ifdef SEDF_STATS
1124 /*we let the domain run in the current period*/
1125 inf->short_cont++;
1126 #endif
1128 else
1130 /*we don't have a reasonable amount of time in
1131 our slice left => switch to burst mode*/
1132 if ( likely(inf->unblock_abs) )
1134 /*set the period-length to the current blocking
1135 interval, possible enhancements: average over last
1136 blocking intervals, user-specified minimum,...*/
1137 inf->period = now - inf->unblock_abs;
1138 /*check for overflow on multiplication*/
1139 ASSERT((inf->period < ULONG_MAX)
1140 && (inf->slice_orig < ULONG_MAX));
1141 /*scale slice accordingly, so that utilisation
1142 stays the same*/
1143 inf->slice = (inf->period * inf->slice_orig)
1144 / inf->period_orig;
1145 /*set new (shorter) deadline*/
1146 inf->deadl_abs += inf->period;
1148 else
1150 /*in case we haven't unblocked before
1151 start in next period!*/
1152 inf->cputime=0;
1153 inf->deadl_abs += inf->period;
1157 inf->unblock_abs = now;
1161 static void unblock_long_burst(struct sedf_vcpu_info* inf, s_time_t now)
1163 if ( unlikely(inf->latency && (inf->period > inf->latency)) )
1165 /*scale the slice and period accordingly to the latency hint*/
1166 inf->period = inf->latency;
1167 /*check for overflows on multiplication*/
1168 ASSERT((inf->period < ULONG_MAX)
1169 && (inf->slice_orig < ULONG_MAX));
1170 /*scale slice accordingly, so that utilisation stays the same*/
1171 inf->slice = (inf->period * inf->slice_orig)
1172 / inf->period_orig;
1173 inf->deadl_abs = now + inf->period;
1174 inf->cputime = 0;
1176 else
1178 /*we don't have a latency hint.. or we are currently in
1179 "burst mode": use some other technique
1180 NB: this should be in fact the normal way of operation,
1181 when we are in sync with the device!*/
1182 unblock_long_cons_b(inf, now);
1185 inf->unblock_abs = now;
1187 #endif /* UNBLOCK == UNBLOCK_BURST */
1190 #define DOMAIN_EDF 1
1191 #define DOMAIN_EXTRA_PEN 2
1192 #define DOMAIN_EXTRA_UTIL 3
1193 #define DOMAIN_IDLE 4
1194 static inline int get_run_type(struct vcpu* d)
1196 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1197 if (is_idle_vcpu(d))
1198 return DOMAIN_IDLE;
1199 if (inf->status & EXTRA_RUN_PEN)
1200 return DOMAIN_EXTRA_PEN;
1201 if (inf->status & EXTRA_RUN_UTIL)
1202 return DOMAIN_EXTRA_UTIL;
1203 return DOMAIN_EDF;
1207 /*Compares two domains in the relation of whether the one is allowed to
1208 interrupt the others execution.
1209 It returns true (!=0) if a switch to the other domain is good.
1210 Current Priority scheme is as follows:
1211 EDF > L0 (penalty based) extra-time >
1212 L1 (utilization) extra-time > idle-domain
1213 In the same class priorities are assigned as following:
1214 EDF: early deadline > late deadline
1215 L0 extra-time: lower score > higher score*/
1216 static inline int should_switch(struct vcpu *cur,
1217 struct vcpu *other,
1218 s_time_t now)
1220 struct sedf_vcpu_info *cur_inf, *other_inf;
1221 cur_inf = EDOM_INFO(cur);
1222 other_inf = EDOM_INFO(other);
1224 /*check whether we need to make an earlier sched-decision*/
1225 if (PERIOD_BEGIN(other_inf) <
1226 CPU_INFO(other->processor)->current_slice_expires)
1227 return 1;
1228 /*no timing-based switches need to be taken into account here*/
1229 switch (get_run_type(cur)) {
1230 case DOMAIN_EDF:
1231 /* do not interrupt a running EDF domain */
1232 return 0;
1233 case DOMAIN_EXTRA_PEN:
1234 /*check whether we also want
1235 the L0 ex-q with lower score*/
1236 if ((other_inf->status & EXTRA_WANT_PEN_Q)
1237 && (other_inf->score[EXTRA_PEN_Q] <
1238 cur_inf->score[EXTRA_PEN_Q]))
1239 return 1;
1240 else return 0;
1241 case DOMAIN_EXTRA_UTIL:
1242 /*check whether we want the L0 extraq, don't
1243 switch if both domains want L1 extraq */
1244 if (other_inf->status & EXTRA_WANT_PEN_Q)
1245 return 1;
1246 else return 0;
1247 case DOMAIN_IDLE:
1248 return 1;
1250 return 1;
1253 void sedf_wake(struct vcpu *d)
1255 s_time_t now = NOW();
1256 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1258 PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
1259 d->vcpu_id);
1261 if ( unlikely(is_idle_vcpu(d)) )
1262 return;
1264 if ( unlikely(__task_on_queue(d)) )
1266 PRINT(3,"\tdomain %i.%i is already in some queue\n",
1267 d->domain->domain_id, d->vcpu_id);
1268 return;
1271 ASSERT(!sedf_runnable(d));
1272 inf->status &= ~SEDF_ASLEEP;
1273 ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
1274 ASSERT(!extraq_on(d, EXTRA_PEN_Q));
1276 if ( unlikely(inf->deadl_abs == 0) )
1278 /*initial setup of the deadline*/
1279 inf->deadl_abs = now + inf->slice;
1282 PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1283 "now= %"PRIu64")\n",
1284 d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
1286 #ifdef SEDF_STATS
1287 inf->block_tot++;
1288 #endif
1290 if ( unlikely(now < PERIOD_BEGIN(inf)) )
1292 PRINT(4,"extratime unblock\n");
1293 /* unblocking in extra-time! */
1294 #if (EXTRA == EXTRA_BLOCK_WEIGHT)
1295 if ( inf->status & EXTRA_WANT_PEN_Q )
1297 /*we have a domain that wants compensation
1298 for block penalty and did just block in
1299 its compensation time. Give it another
1300 chance!*/
1301 extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
1303 #endif
1304 extraq_check_add_unblocked(d, 0);
1306 else
1308 if ( now < inf->deadl_abs )
1310 PRINT(4,"short unblocking\n");
1311 /*short blocking*/
1312 #ifdef SEDF_STATS
1313 inf->short_block_tot++;
1314 #endif
1315 #if (UNBLOCK <= UNBLOCK_ATROPOS)
1316 unblock_short_vcons(inf, now);
1317 #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
1318 unblock_short_cons(inf, now);
1319 #elif (UNBLOCK == UNBLOCK_BURST)
1320 unblock_short_burst(inf, now);
1321 #elif (UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
1322 unblock_short_extra_support(inf, now);
1323 #endif
1325 extraq_check_add_unblocked(d, 1);
1327 else
1329 PRINT(4,"long unblocking\n");
1330 /*long unblocking*/
1331 #ifdef SEDF_STATS
1332 inf->long_block_tot++;
1333 #endif
1334 #if (UNBLOCK == UNBLOCK_ISOCHRONOUS_EDF)
1335 unblock_long_vcons(inf, now);
1336 #elif (UNBLOCK == UNBLOCK_EDF \
1337 || UNBLOCK == UNBLOCK_EXTRA_SUPPORT)
1338 unblock_long_cons_b(inf, now);
1339 #elif (UNBLOCK == UNBLOCK_ATROPOS)
1340 unblock_long_cons_c(inf, now);
1341 #elif (UNBLOCK == UNBLOCK_SHORT_RESUME)
1342 unblock_long_cons_b(inf, now);
1343 #elif (UNBLOCK == UNBLOCK_BURST)
1344 unblock_long_burst(inf, now);
1345 #endif
1347 extraq_check_add_unblocked(d, 1);
1351 PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1352 "now= %"PRIu64")\n",
1353 d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
1354 inf->period, now);
1356 if ( PERIOD_BEGIN(inf) > now )
1358 __add_to_waitqueue_sort(d);
1359 PRINT(3,"added to waitq\n");
1361 else
1363 __add_to_runqueue_sort(d);
1364 PRINT(3,"added to runq\n");
1367 #ifdef SEDF_STATS
1368 /*do some statistics here...*/
1369 if ( inf->block_abs != 0 )
1371 inf->block_time_tot += now - inf->block_abs;
1372 inf->penalty_time_tot +=
1373 PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
1375 #endif
1377 /*sanity check: make sure each extra-aware domain IS on the util-q!*/
1378 ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
1379 ASSERT(__task_on_queue(d));
1380 /*check whether the awakened task needs to invoke the do_schedule
1381 routine. Try to avoid unnecessary runs but:
1382 Save approximation: Always switch to scheduler!*/
1383 ASSERT(d->processor >= 0);
1384 ASSERT(d->processor < NR_CPUS);
1385 ASSERT(schedule_data[d->processor].curr);
1387 if ( should_switch(schedule_data[d->processor].curr, d, now) )
1388 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
1392 static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
1394 if ( v == current )
1395 return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
1397 vcpu_pause(v);
1398 v->cpu_affinity = *affinity;
1399 v->processor = first_cpu(v->cpu_affinity);
1400 vcpu_unpause(v);
1402 return 0;
1406 /* Print a lot of useful information about a domains in the system */
1407 static void sedf_dump_domain(struct vcpu *d)
1409 printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
1410 test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
1411 printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu c=%"PRIu64
1412 " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
1413 EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
1414 EDOM_INFO(d)->weight, d->cpu_time,
1415 EDOM_INFO(d)->score[EXTRA_UTIL_Q],
1416 (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
1417 EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
1419 if ( d->cpu_time != 0 )
1420 printf(" (%"PRIu64"%%)", (EDOM_INFO(d)->extra_time_tot * 100)
1421 / d->cpu_time);
1423 #ifdef SEDF_STATS
1424 if ( EDOM_INFO(d)->block_time_tot != 0 )
1425 printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
1426 EDOM_INFO(d)->block_time_tot);
1427 if ( EDOM_INFO(d)->block_tot != 0 )
1428 printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
1429 "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
1430 EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
1431 (EDOM_INFO(d)->short_block_tot * 100)
1432 / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
1433 (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
1434 EDOM_INFO(d)->pen_extra_blocks,
1435 EDOM_INFO(d)->pen_extra_slices,
1436 EDOM_INFO(d)->long_block_tot,
1437 (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
1438 (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
1439 (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
1440 #endif
1441 printf("\n");
1445 /* dumps all domains on hte specified cpu */
1446 static void sedf_dump_cpu_state(int i)
1448 struct list_head *list, *queue, *tmp;
1449 struct sedf_vcpu_info *d_inf;
1450 struct domain *d;
1451 struct vcpu *ed;
1452 int loop = 0;
1454 printk("now=%"PRIu64"\n",NOW());
1455 queue = RUNQ(i);
1456 printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1457 (unsigned long) queue->next, (unsigned long) queue->prev);
1458 list_for_each_safe ( list, tmp, queue )
1460 printk("%3d: ",loop++);
1461 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1462 sedf_dump_domain(d_inf->vcpu);
1465 queue = WAITQ(i); loop = 0;
1466 printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1467 (unsigned long) queue->next, (unsigned long) queue->prev);
1468 list_for_each_safe ( list, tmp, queue )
1470 printk("%3d: ",loop++);
1471 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1472 sedf_dump_domain(d_inf->vcpu);
1475 queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
1476 printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
1477 (unsigned long)queue, (unsigned long) queue->next,
1478 (unsigned long) queue->prev);
1479 list_for_each_safe ( list, tmp, queue )
1481 d_inf = list_entry(list, struct sedf_vcpu_info,
1482 extralist[EXTRA_PEN_Q]);
1483 printk("%3d: ",loop++);
1484 sedf_dump_domain(d_inf->vcpu);
1487 queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
1488 printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
1489 (unsigned long)queue, (unsigned long) queue->next,
1490 (unsigned long) queue->prev);
1491 list_for_each_safe ( list, tmp, queue )
1493 d_inf = list_entry(list, struct sedf_vcpu_info,
1494 extralist[EXTRA_UTIL_Q]);
1495 printk("%3d: ",loop++);
1496 sedf_dump_domain(d_inf->vcpu);
1499 loop = 0;
1500 printk("\nnot on Q\n");
1502 for_each_domain ( d )
1504 for_each_vcpu(d, ed)
1506 if ( !__task_on_queue(ed) && (ed->processor == i) )
1508 printk("%3d: ",loop++);
1509 sedf_dump_domain(ed);
1516 /* Adjusts periods and slices of the domains accordingly to their weights. */
1517 static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
1519 struct vcpu *p;
1520 struct domain *d;
1521 int sumw[NR_CPUS];
1522 s_time_t sumt[NR_CPUS];
1523 int cpu;
1525 for ( cpu = 0; cpu < NR_CPUS; cpu++ )
1527 sumw[cpu] = 0;
1528 sumt[cpu] = 0;
1531 /* sum up all weights */
1532 for_each_domain( d )
1534 for_each_vcpu( d, p )
1536 if ( EDOM_INFO(p)->weight )
1538 sumw[p->processor] += EDOM_INFO(p)->weight;
1540 else
1542 /*don't modify domains who don't have a weight, but sum
1543 up the time they need, projected to a WEIGHT_PERIOD,
1544 so that this time is not given to the weight-driven
1545 domains*/
1546 /*check for overflows*/
1547 ASSERT((WEIGHT_PERIOD < ULONG_MAX)
1548 && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
1549 sumt[p->processor] +=
1550 (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
1551 EDOM_INFO(p)->period_orig;
1556 /* adjust all slices (and periods) to the new weight */
1557 for_each_domain( d )
1559 for_each_vcpu ( d, p )
1561 if ( EDOM_INFO(p)->weight )
1563 EDOM_INFO(p)->period_orig =
1564 EDOM_INFO(p)->period = WEIGHT_PERIOD;
1565 EDOM_INFO(p)->slice_orig =
1566 EDOM_INFO(p)->slice =
1567 (EDOM_INFO(p)->weight *
1568 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
1569 sumw[p->processor];
1574 return 0;
1578 /* set or fetch domain scheduling parameters */
1579 static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
1581 struct vcpu *v;
1583 PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "\
1584 "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
1585 p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
1586 cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
1588 if ( cmd->direction == SCHED_INFO_PUT )
1590 /*check for sane parameters*/
1591 if (!cmd->u.sedf.period && !cmd->u.sedf.weight)
1592 return -EINVAL;
1593 if (cmd->u.sedf.weight) {
1594 if ((cmd->u.sedf.extratime & EXTRA_AWARE) &&
1595 (! cmd->u.sedf.period)) {
1596 /*weight driven domains with xtime ONLY!*/
1597 for_each_vcpu(p, v) {
1598 EDOM_INFO(v)->extraweight = cmd->u.sedf.weight;
1599 EDOM_INFO(v)->weight = 0;
1600 EDOM_INFO(v)->slice = 0;
1601 EDOM_INFO(v)->period = WEIGHT_PERIOD;
1603 } else {
1604 /*weight driven domains with real-time execution*/
1605 for_each_vcpu(p, v)
1606 EDOM_INFO(v)->weight = cmd->u.sedf.weight;
1609 else {
1610 /*time driven domains*/
1611 for_each_vcpu(p, v) {
1612 /* sanity checking! */
1613 if(cmd->u.sedf.slice > cmd->u.sedf.period )
1614 return -EINVAL;
1615 EDOM_INFO(v)->weight = 0;
1616 EDOM_INFO(v)->extraweight = 0;
1617 EDOM_INFO(v)->period_orig =
1618 EDOM_INFO(v)->period = cmd->u.sedf.period;
1619 EDOM_INFO(v)->slice_orig =
1620 EDOM_INFO(v)->slice = cmd->u.sedf.slice;
1623 if (sedf_adjust_weights(cmd))
1624 return -EINVAL;
1626 for_each_vcpu(p, v) {
1627 EDOM_INFO(v)->status =
1628 (EDOM_INFO(v)->status &
1629 ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
1630 EDOM_INFO(v)->latency = cmd->u.sedf.latency;
1631 extraq_check(v);
1634 else if ( cmd->direction == SCHED_INFO_GET )
1636 cmd->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
1637 cmd->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
1638 cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status
1639 & EXTRA_AWARE;
1640 cmd->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
1641 cmd->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
1643 PRINT(2,"sedf_adjdom_finished\n");
1644 return 0;
1647 struct scheduler sched_sedf_def = {
1648 .name = "Simple EDF Scheduler",
1649 .opt_name = "sedf",
1650 .sched_id = SCHED_SEDF,
1652 .alloc_task = sedf_alloc_task,
1653 .add_task = sedf_add_task,
1654 .free_task = sedf_free_task,
1655 .do_schedule = sedf_do_schedule,
1656 .dump_cpu_state = sedf_dump_cpu_state,
1657 .sleep = sedf_sleep,
1658 .wake = sedf_wake,
1659 .adjdom = sedf_adjdom,
1660 .set_affinity = sedf_set_affinity
1661 };
1663 /*
1664 * Local variables:
1665 * mode: C
1666 * c-set-style: "BSD"
1667 * c-basic-offset: 4
1668 * tab-width: 4
1669 * indent-tabs-mode: nil
1670 * End:
1671 */