ia64/xen-unstable

view xen/common/sched_sedf.c @ 10570:8dc4af3f192c

[IA64] Implement and use DOM0_DOMAIN_STEUP.

DOM0_GETMEMLIST now reads ptes and use gpfn.
Domain builder reworked: calls DOMAIN_SETUP, setup start_info page.
SAL data are now in domain memory.
is_vti field added in domain.arch.
Many cleanups (indentation, static, warnings).

Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author awilliam@xenbuild.aw
date Wed Jul 05 09:28:32 2006 -0600 (2006-07-05)
parents ef8cdd1dc836
children 462d6e4cb29a
line source
1 /******************************************************************************
2 * Simple EDF scheduler for xen
3 *
4 * by Stephan Diestelhorst (C) 2004 Cambridge University
5 * based on code by Mark Williamson (C) 2004 Intel Research Cambridge
6 */
8 #include <xen/lib.h>
9 #include <xen/sched.h>
10 #include <xen/sched-if.h>
11 #include <public/sched_ctl.h>
12 #include <xen/timer.h>
13 #include <xen/softirq.h>
14 #include <xen/time.h>
16 /*verbosity settings*/
17 #define SEDFLEVEL 0
18 #define PRINT(_f, _a...) \
19 do { \
20 if ( (_f) <= SEDFLEVEL ) \
21 printk(_a ); \
22 } while ( 0 )
24 #ifndef NDEBUG
25 #define SEDF_STATS
26 #define CHECK(_p) \
27 do { \
28 if ( !(_p) ) \
29 printk("Check '%s' failed, line %d, file %s\n", \
30 #_p , __LINE__, __FILE__); \
31 } while ( 0 )
32 #else
33 #define CHECK(_p) ((void)0)
34 #endif
36 #define EXTRA_NONE (0)
37 #define EXTRA_AWARE (1)
38 #define EXTRA_RUN_PEN (2)
39 #define EXTRA_RUN_UTIL (4)
40 #define EXTRA_WANT_PEN_Q (8)
41 #define EXTRA_PEN_Q (0)
42 #define EXTRA_UTIL_Q (1)
43 #define SEDF_ASLEEP (16)
45 #define EXTRA_QUANTUM (MICROSECS(500))
46 #define WEIGHT_PERIOD (MILLISECS(100))
47 #define WEIGHT_SAFETY (MILLISECS(5))
49 #define PERIOD_MAX MILLISECS(10000) /* 10s */
50 #define PERIOD_MIN (MICROSECS(10)) /* 10us */
51 #define SLICE_MIN (MICROSECS(5)) /* 5us */
53 #define IMPLY(a, b) (!(a) || (b))
54 #define EQ(a, b) ((!!(a)) == (!!(b)))
57 struct sedf_dom_info {
58 struct domain *domain;
59 };
61 struct sedf_vcpu_info {
62 struct vcpu *vcpu;
63 struct list_head list;
64 struct list_head extralist[2];
66 /*Parameters for EDF*/
67 s_time_t period; /*=(relative deadline)*/
68 s_time_t slice; /*=worst case execution time*/
70 /*Advaced Parameters*/
71 /*Latency Scaling*/
72 s_time_t period_orig;
73 s_time_t slice_orig;
74 s_time_t latency;
76 /*status of domain*/
77 int status;
78 /*weights for "Scheduling for beginners/ lazy/ etc." ;)*/
79 short weight;
80 short extraweight;
81 /*Bookkeeping*/
82 s_time_t deadl_abs;
83 s_time_t sched_start_abs;
84 s_time_t cputime;
85 /* times the domain un-/blocked */
86 s_time_t block_abs;
87 s_time_t unblock_abs;
89 /*scores for {util, block penalty}-weighted extratime distribution*/
90 int score[2];
91 s_time_t short_block_lost_tot;
93 /*Statistics*/
94 s_time_t extra_time_tot;
96 #ifdef SEDF_STATS
97 s_time_t block_time_tot;
98 s_time_t penalty_time_tot;
99 int block_tot;
100 int short_block_tot;
101 int long_block_tot;
102 int short_cont;
103 int pen_extra_blocks;
104 int pen_extra_slices;
105 #endif
106 };
108 struct sedf_cpu_info {
109 struct list_head runnableq;
110 struct list_head waitq;
111 struct list_head extraq[2];
112 s_time_t current_slice_expires;
113 };
115 #define EDOM_INFO(d) ((struct sedf_vcpu_info *)((d)->sched_priv))
116 #define CPU_INFO(cpu) ((struct sedf_cpu_info *)schedule_data[cpu].sched_priv)
117 #define LIST(d) (&EDOM_INFO(d)->list)
118 #define EXTRALIST(d,i) (&(EDOM_INFO(d)->extralist[i]))
119 #define RUNQ(cpu) (&CPU_INFO(cpu)->runnableq)
120 #define WAITQ(cpu) (&CPU_INFO(cpu)->waitq)
121 #define EXTRAQ(cpu,i) (&(CPU_INFO(cpu)->extraq[i]))
122 #define IDLETASK(cpu) ((struct vcpu *)schedule_data[cpu].idle)
124 #define PERIOD_BEGIN(inf) ((inf)->deadl_abs - (inf)->period)
126 #define MIN(x,y) (((x)<(y))?(x):(y))
127 #define DIV_UP(x,y) (((x) + (y) - 1) / y)
129 #define extra_runs(inf) ((inf->status) & 6)
130 #define extra_get_cur_q(inf) (((inf->status & 6) >> 1)-1)
131 #define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
134 static void sedf_dump_cpu_state(int i);
136 static inline int extraq_on(struct vcpu *d, int i)
137 {
138 return ((EXTRALIST(d,i)->next != NULL) &&
139 (EXTRALIST(d,i)->next != EXTRALIST(d,i)));
140 }
142 static inline void extraq_add_head(struct vcpu *d, int i)
143 {
144 list_add(EXTRALIST(d,i), EXTRAQ(d->processor,i));
145 ASSERT(extraq_on(d, i));
146 }
148 static inline void extraq_add_tail(struct vcpu *d, int i)
149 {
150 list_add_tail(EXTRALIST(d,i), EXTRAQ(d->processor,i));
151 ASSERT(extraq_on(d, i));
152 }
154 static inline void extraq_del(struct vcpu *d, int i)
155 {
156 struct list_head *list = EXTRALIST(d,i);
157 ASSERT(extraq_on(d,i));
158 PRINT(3, "Removing domain %i.%i from L%i extraq\n",
159 d->domain->domain_id, d->vcpu_id, i);
160 list_del(list);
161 list->next = NULL;
162 ASSERT(!extraq_on(d, i));
163 }
165 /* adds a domain to the queue of processes which are aware of extra time. List
166 is sorted by score, where a lower score means higher priority for an extra
167 slice. It also updates the score, by simply subtracting a fixed value from
168 each entry, in order to avoid overflow. The algorithm works by simply
169 charging each domain that recieved extratime with an inverse of its weight.
170 */
171 static inline void extraq_add_sort_update(struct vcpu *d, int i, int sub)
172 {
173 struct list_head *cur;
174 struct sedf_vcpu_info *curinf;
176 ASSERT(!extraq_on(d,i));
178 PRINT(3, "Adding domain %i.%i (score= %i, short_pen= %"PRIi64")"
179 " to L%i extraq\n",
180 d->domain->domain_id, d->vcpu_id, EDOM_INFO(d)->score[i],
181 EDOM_INFO(d)->short_block_lost_tot, i);
183 /*
184 * Iterate through all elements to find our "hole" and on our way
185 * update all the other scores.
186 */
187 list_for_each ( cur, EXTRAQ(d->processor, i) )
188 {
189 curinf = list_entry(cur,struct sedf_vcpu_info,extralist[i]);
190 curinf->score[i] -= sub;
191 if ( EDOM_INFO(d)->score[i] < curinf->score[i] )
192 break;
193 PRINT(4,"\tbehind domain %i.%i (score= %i)\n",
194 curinf->vcpu->domain->domain_id,
195 curinf->vcpu->vcpu_id, curinf->score[i]);
196 }
198 /* cur now contains the element, before which we'll enqueue. */
199 PRINT(3, "\tlist_add to %p\n", cur->prev);
200 list_add(EXTRALIST(d,i),cur->prev);
202 /* Continue updating the extraq. */
203 if ( (cur != EXTRAQ(d->processor,i)) && sub )
204 {
205 for ( cur = cur->next; cur != EXTRAQ(d->processor,i); cur = cur->next )
206 {
207 curinf = list_entry(cur,struct sedf_vcpu_info, extralist[i]);
208 curinf->score[i] -= sub;
209 PRINT(4, "\tupdating domain %i.%i (score= %u)\n",
210 curinf->vcpu->domain->domain_id,
211 curinf->vcpu->vcpu_id, curinf->score[i]);
212 }
213 }
215 ASSERT(extraq_on(d,i));
216 }
217 static inline void extraq_check(struct vcpu *d)
218 {
219 if ( extraq_on(d, EXTRA_UTIL_Q) )
220 {
221 PRINT(2,"Dom %i.%i is on L1 extraQ\n",
222 d->domain->domain_id, d->vcpu_id);
224 if ( !(EDOM_INFO(d)->status & EXTRA_AWARE) &&
225 !extra_runs(EDOM_INFO(d)) )
226 {
227 extraq_del(d, EXTRA_UTIL_Q);
228 PRINT(2,"Removed dom %i.%i from L1 extraQ\n",
229 d->domain->domain_id, d->vcpu_id);
230 }
231 }
232 else
233 {
234 PRINT(2, "Dom %i.%i is NOT on L1 extraQ\n",
235 d->domain->domain_id,
236 d->vcpu_id);
238 if ( (EDOM_INFO(d)->status & EXTRA_AWARE) && sedf_runnable(d) )
239 {
240 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
241 PRINT(2,"Added dom %i.%i to L1 extraQ\n",
242 d->domain->domain_id, d->vcpu_id);
243 }
244 }
245 }
247 static inline void extraq_check_add_unblocked(struct vcpu *d, int priority)
248 {
249 struct sedf_vcpu_info *inf = EDOM_INFO(d);
251 if ( inf->status & EXTRA_AWARE )
252 /* Put on the weighted extraq without updating any scores. */
253 extraq_add_sort_update(d, EXTRA_UTIL_Q, 0);
254 }
256 static inline int __task_on_queue(struct vcpu *d)
257 {
258 return (((LIST(d))->next != NULL) && (LIST(d)->next != LIST(d)));
259 }
261 static inline void __del_from_queue(struct vcpu *d)
262 {
263 struct list_head *list = LIST(d);
264 ASSERT(__task_on_queue(d));
265 PRINT(3,"Removing domain %i.%i (bop= %"PRIu64") from runq/waitq\n",
266 d->domain->domain_id, d->vcpu_id, PERIOD_BEGIN(EDOM_INFO(d)));
267 list_del(list);
268 list->next = NULL;
269 ASSERT(!__task_on_queue(d));
270 }
272 typedef int(*list_comparer)(struct list_head* el1, struct list_head* el2);
274 static inline void list_insert_sort(
275 struct list_head *list, struct list_head *element, list_comparer comp)
276 {
277 struct list_head *cur;
279 /* Iterate through all elements to find our "hole". */
280 list_for_each( cur, list )
281 if ( comp(element, cur) < 0 )
282 break;
284 /* cur now contains the element, before which we'll enqueue. */
285 PRINT(3,"\tlist_add to %p\n",cur->prev);
286 list_add(element, cur->prev);
287 }
289 #define DOMAIN_COMPARER(name, field, comp1, comp2) \
290 int name##_comp(struct list_head* el1, struct list_head* el2) \
291 { \
292 struct sedf_vcpu_info *d1, *d2; \
293 d1 = list_entry(el1,struct sedf_vcpu_info, field); \
294 d2 = list_entry(el2,struct sedf_vcpu_info, field); \
295 if ( (comp1) == (comp2) ) \
296 return 0; \
297 if ( (comp1) < (comp2) ) \
298 return -1; \
299 else \
300 return 1; \
301 }
303 /* adds a domain to the queue of processes which wait for the beginning of the
304 next period; this list is therefore sortet by this time, which is simply
305 absol. deadline - period
306 */
307 DOMAIN_COMPARER(waitq, list, PERIOD_BEGIN(d1), PERIOD_BEGIN(d2));
308 static inline void __add_to_waitqueue_sort(struct vcpu *v)
309 {
310 ASSERT(!__task_on_queue(v));
311 PRINT(3,"Adding domain %i.%i (bop= %"PRIu64") to waitq\n",
312 v->domain->domain_id, v->vcpu_id, PERIOD_BEGIN(EDOM_INFO(v)));
313 list_insert_sort(WAITQ(v->processor), LIST(v), waitq_comp);
314 ASSERT(__task_on_queue(v));
315 }
317 /* adds a domain to the queue of processes which have started their current
318 period and are runnable (i.e. not blocked, dieing,...). The first element
319 on this list is running on the processor, if the list is empty the idle
320 task will run. As we are implementing EDF, this list is sorted by deadlines.
321 */
322 DOMAIN_COMPARER(runq, list, d1->deadl_abs, d2->deadl_abs);
323 static inline void __add_to_runqueue_sort(struct vcpu *v)
324 {
325 PRINT(3,"Adding domain %i.%i (deadl= %"PRIu64") to runq\n",
326 v->domain->domain_id, v->vcpu_id, EDOM_INFO(v)->deadl_abs);
327 list_insert_sort(RUNQ(v->processor), LIST(v), runq_comp);
328 }
331 static int sedf_init_vcpu(struct vcpu *v)
332 {
333 struct sedf_vcpu_info *inf;
335 if ( v->domain->sched_priv == NULL )
336 {
337 v->domain->sched_priv = xmalloc(struct sedf_dom_info);
338 if ( v->domain->sched_priv == NULL )
339 return -1;
340 memset(v->domain->sched_priv, 0, sizeof(struct sedf_dom_info));
341 }
343 if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
344 return -1;
345 memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
347 inf = EDOM_INFO(v);
348 inf->vcpu = v;
350 /* Allocate per-CPU context if this is the first domain to be added. */
351 if ( unlikely(schedule_data[v->processor].sched_priv == NULL) )
352 {
353 schedule_data[v->processor].sched_priv =
354 xmalloc(struct sedf_cpu_info);
355 BUG_ON(schedule_data[v->processor].sched_priv == NULL);
356 memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
357 INIT_LIST_HEAD(WAITQ(v->processor));
358 INIT_LIST_HEAD(RUNQ(v->processor));
359 INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
360 INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
361 }
363 /* Every VCPU gets an equal share of extratime by default. */
364 inf->deadl_abs = 0;
365 inf->latency = 0;
366 inf->status = EXTRA_AWARE | SEDF_ASLEEP;
367 inf->extraweight = 1;
369 if ( v->domain->domain_id == 0 )
370 {
371 /* Domain0 gets 75% guaranteed (15ms every 20ms). */
372 inf->period = MILLISECS(20);
373 inf->slice = MILLISECS(15);
374 }
375 else
376 {
377 /* Best-effort extratime only. */
378 inf->period = WEIGHT_PERIOD;
379 inf->slice = 0;
380 }
382 inf->period_orig = inf->period; inf->slice_orig = inf->slice;
383 INIT_LIST_HEAD(&(inf->list));
384 INIT_LIST_HEAD(&(inf->extralist[EXTRA_PEN_Q]));
385 INIT_LIST_HEAD(&(inf->extralist[EXTRA_UTIL_Q]));
387 if ( !is_idle_vcpu(v) )
388 {
389 extraq_check(v);
390 }
391 else
392 {
393 EDOM_INFO(v)->deadl_abs = 0;
394 EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
395 }
397 return 0;
398 }
400 static void sedf_destroy_domain(struct domain *d)
401 {
402 int i;
404 xfree(d->sched_priv);
406 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
407 if ( d->vcpu[i] )
408 xfree(d->vcpu[i]->sched_priv);
409 }
411 /*
412 * Handles the rescheduling & bookkeeping of domains running in their
413 * guaranteed timeslice.
414 */
415 static void desched_edf_dom(s_time_t now, struct vcpu* d)
416 {
417 struct sedf_vcpu_info* inf = EDOM_INFO(d);
419 /* Current domain is running in real time mode. */
420 ASSERT(__task_on_queue(d));
422 /* Update the domain's cputime. */
423 inf->cputime += now - inf->sched_start_abs;
425 /*
426 * Scheduling decisions which don't remove the running domain from the
427 * runq.
428 */
429 if ( (inf->cputime < inf->slice) && sedf_runnable(d) )
430 return;
432 __del_from_queue(d);
434 /*
435 * Manage bookkeeping (i.e. calculate next deadline, memorise
436 * overrun-time of slice) of finished domains.
437 */
438 if ( inf->cputime >= inf->slice )
439 {
440 inf->cputime -= inf->slice;
442 if ( inf->period < inf->period_orig )
443 {
444 /* This domain runs in latency scaling or burst mode. */
445 inf->period *= 2;
446 inf->slice *= 2;
447 if ( (inf->period > inf->period_orig) ||
448 (inf->slice > inf->slice_orig) )
449 {
450 /* Reset slice and period. */
451 inf->period = inf->period_orig;
452 inf->slice = inf->slice_orig;
453 }
454 }
456 /* Set next deadline. */
457 inf->deadl_abs += inf->period;
458 }
460 /* Add a runnable domain to the waitqueue. */
461 if ( sedf_runnable(d) )
462 {
463 __add_to_waitqueue_sort(d);
464 }
465 else
466 {
467 /* We have a blocked realtime task -> remove it from exqs too. */
468 if ( extraq_on(d, EXTRA_PEN_Q) )
469 extraq_del(d, EXTRA_PEN_Q);
470 if ( extraq_on(d, EXTRA_UTIL_Q) )
471 extraq_del(d, EXTRA_UTIL_Q);
472 }
474 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
475 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
476 sedf_runnable(d)));
477 }
480 /* Update all elements on the queues */
481 static void update_queues(
482 s_time_t now, struct list_head *runq, struct list_head *waitq)
483 {
484 struct list_head *cur, *tmp;
485 struct sedf_vcpu_info *curinf;
487 PRINT(3,"Updating waitq..\n");
489 /*
490 * Check for the first elements of the waitqueue, whether their
491 * next period has already started.
492 */
493 list_for_each_safe ( cur, tmp, waitq )
494 {
495 curinf = list_entry(cur, struct sedf_vcpu_info, list);
496 PRINT(4,"\tLooking @ dom %i.%i\n",
497 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
498 if ( PERIOD_BEGIN(curinf) > now )
499 break;
500 __del_from_queue(curinf->vcpu);
501 __add_to_runqueue_sort(curinf->vcpu);
502 }
504 PRINT(3,"Updating runq..\n");
506 /* Process the runq, find domains that are on the runq that shouldn't. */
507 list_for_each_safe ( cur, tmp, runq )
508 {
509 curinf = list_entry(cur,struct sedf_vcpu_info,list);
510 PRINT(4,"\tLooking @ dom %i.%i\n",
511 curinf->vcpu->domain->domain_id, curinf->vcpu->vcpu_id);
513 if ( unlikely(curinf->slice == 0) )
514 {
515 /* Ignore domains with empty slice. */
516 PRINT(4,"\tUpdating zero-slice domain %i.%i\n",
517 curinf->vcpu->domain->domain_id,
518 curinf->vcpu->vcpu_id);
519 __del_from_queue(curinf->vcpu);
521 /* Move them to their next period. */
522 curinf->deadl_abs += curinf->period;
524 /* Ensure that the start of the next period is in the future. */
525 if ( unlikely(PERIOD_BEGIN(curinf) < now) )
526 curinf->deadl_abs +=
527 (DIV_UP(now - PERIOD_BEGIN(curinf),
528 curinf->period)) * curinf->period;
530 /* Put them back into the queue. */
531 __add_to_waitqueue_sort(curinf->vcpu);
532 }
533 else if ( unlikely((curinf->deadl_abs < now) ||
534 (curinf->cputime > curinf->slice)) )
535 {
536 /*
537 * We missed the deadline or the slice was already finished.
538 * Might hapen because of dom_adj.
539 */
540 PRINT(4,"\tDomain %i.%i exceeded it's deadline/"
541 "slice (%"PRIu64" / %"PRIu64") now: %"PRIu64
542 " cputime: %"PRIu64"\n",
543 curinf->vcpu->domain->domain_id,
544 curinf->vcpu->vcpu_id,
545 curinf->deadl_abs, curinf->slice, now,
546 curinf->cputime);
547 __del_from_queue(curinf->vcpu);
549 /* Common case: we miss one period. */
550 curinf->deadl_abs += curinf->period;
552 /*
553 * If we are still behind: modulo arithmetic, force deadline
554 * to be in future and aligned to period borders.
555 */
556 if ( unlikely(curinf->deadl_abs < now) )
557 curinf->deadl_abs +=
558 DIV_UP(now - curinf->deadl_abs,
559 curinf->period) * curinf->period;
560 ASSERT(curinf->deadl_abs >= now);
562 /* Give a fresh slice. */
563 curinf->cputime = 0;
564 if ( PERIOD_BEGIN(curinf) > now )
565 __add_to_waitqueue_sort(curinf->vcpu);
566 else
567 __add_to_runqueue_sort(curinf->vcpu);
568 }
569 else
570 break;
571 }
573 PRINT(3,"done updating the queues\n");
574 }
577 /* removes a domain from the head of the according extraQ and
578 requeues it at a specified position:
579 round-robin extratime: end of extraQ
580 weighted ext.: insert in sorted list by score
581 if the domain is blocked / has regained its short-block-loss
582 time it is not put on any queue */
583 static void desched_extra_dom(s_time_t now, struct vcpu *d)
584 {
585 struct sedf_vcpu_info *inf = EDOM_INFO(d);
586 int i = extra_get_cur_q(inf);
587 unsigned long oldscore;
589 ASSERT(extraq_on(d, i));
591 /* Unset all running flags. */
592 inf->status &= ~(EXTRA_RUN_PEN | EXTRA_RUN_UTIL);
593 /* Fresh slice for the next run. */
594 inf->cputime = 0;
595 /* Accumulate total extratime. */
596 inf->extra_time_tot += now - inf->sched_start_abs;
597 /* Remove extradomain from head of the queue. */
598 extraq_del(d, i);
600 /* Update the score. */
601 oldscore = inf->score[i];
602 if ( i == EXTRA_PEN_Q )
603 {
604 /*domain was running in L0 extraq*/
605 /*reduce block lost, probably more sophistication here!*/
606 /*inf->short_block_lost_tot -= EXTRA_QUANTUM;*/
607 inf->short_block_lost_tot -= now - inf->sched_start_abs;
608 PRINT(3,"Domain %i.%i: Short_block_loss: %"PRIi64"\n",
609 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id,
610 inf->short_block_lost_tot);
611 #if 0
612 /*
613 * KAF: If we don't exit short-blocking state at this point
614 * domain0 can steal all CPU for up to 10 seconds before
615 * scheduling settles down (when competing against another
616 * CPU-bound domain). Doing this seems to make things behave
617 * nicely. Noone gets starved by default.
618 */
619 if ( inf->short_block_lost_tot <= 0 )
620 #endif
621 {
622 PRINT(4,"Domain %i.%i compensated short block loss!\n",
623 inf->vcpu->domain->domain_id, inf->vcpu->vcpu_id);
624 /*we have (over-)compensated our block penalty*/
625 inf->short_block_lost_tot = 0;
626 /*we don't want a place on the penalty queue anymore!*/
627 inf->status &= ~EXTRA_WANT_PEN_Q;
628 goto check_extra_queues;
629 }
631 /*we have to go again for another try in the block-extraq,
632 the score is not used incremantally here, as this is
633 already done by recalculating the block_lost*/
634 inf->score[EXTRA_PEN_Q] = (inf->period << 10) /
635 inf->short_block_lost_tot;
636 oldscore = 0;
637 }
638 else
639 {
640 /*domain was running in L1 extraq => score is inverse of
641 utilization and is used somewhat incremental!*/
642 if ( !inf->extraweight )
643 /*NB: use fixed point arithmetic with 10 bits*/
644 inf->score[EXTRA_UTIL_Q] = (inf->period << 10) /
645 inf->slice;
646 else
647 /*conversion between realtime utilisation and extrawieght:
648 full (ie 100%) utilization is equivalent to 128 extraweight*/
649 inf->score[EXTRA_UTIL_Q] = (1<<17) / inf->extraweight;
650 }
652 check_extra_queues:
653 /* Adding a runnable domain to the right queue and removing blocked ones*/
654 if ( sedf_runnable(d) )
655 {
656 /*add according to score: weighted round robin*/
657 if (((inf->status & EXTRA_AWARE) && (i == EXTRA_UTIL_Q)) ||
658 ((inf->status & EXTRA_WANT_PEN_Q) && (i == EXTRA_PEN_Q)))
659 extraq_add_sort_update(d, i, oldscore);
660 }
661 else
662 {
663 /*remove this blocked domain from the waitq!*/
664 __del_from_queue(d);
665 /*make sure that we remove a blocked domain from the other
666 extraq too*/
667 if ( i == EXTRA_PEN_Q )
668 {
669 if ( extraq_on(d, EXTRA_UTIL_Q) )
670 extraq_del(d, EXTRA_UTIL_Q);
671 }
672 else
673 {
674 if ( extraq_on(d, EXTRA_PEN_Q) )
675 extraq_del(d, EXTRA_PEN_Q);
676 }
677 }
679 ASSERT(EQ(sedf_runnable(d), __task_on_queue(d)));
680 ASSERT(IMPLY(extraq_on(d, EXTRA_UTIL_Q) || extraq_on(d, EXTRA_PEN_Q),
681 sedf_runnable(d)));
682 }
685 static struct task_slice sedf_do_extra_schedule(
686 s_time_t now, s_time_t end_xt, struct list_head *extraq[], int cpu)
687 {
688 struct task_slice ret;
689 struct sedf_vcpu_info *runinf;
690 ASSERT(end_xt > now);
692 /* Enough time left to use for extratime? */
693 if ( end_xt - now < EXTRA_QUANTUM )
694 goto return_idle;
696 if ( !list_empty(extraq[EXTRA_PEN_Q]) )
697 {
698 /*we still have elements on the level 0 extraq
699 => let those run first!*/
700 runinf = list_entry(extraq[EXTRA_PEN_Q]->next,
701 struct sedf_vcpu_info, extralist[EXTRA_PEN_Q]);
702 runinf->status |= EXTRA_RUN_PEN;
703 ret.task = runinf->vcpu;
704 ret.time = EXTRA_QUANTUM;
705 #ifdef SEDF_STATS
706 runinf->pen_extra_slices++;
707 #endif
708 }
709 else
710 {
711 if ( !list_empty(extraq[EXTRA_UTIL_Q]) )
712 {
713 /*use elements from the normal extraqueue*/
714 runinf = list_entry(extraq[EXTRA_UTIL_Q]->next,
715 struct sedf_vcpu_info,
716 extralist[EXTRA_UTIL_Q]);
717 runinf->status |= EXTRA_RUN_UTIL;
718 ret.task = runinf->vcpu;
719 ret.time = EXTRA_QUANTUM;
720 }
721 else
722 goto return_idle;
723 }
725 ASSERT(ret.time > 0);
726 ASSERT(sedf_runnable(ret.task));
727 return ret;
729 return_idle:
730 ret.task = IDLETASK(cpu);
731 ret.time = end_xt - now;
732 ASSERT(ret.time > 0);
733 ASSERT(sedf_runnable(ret.task));
734 return ret;
735 }
738 /* Main scheduling function
739 Reasons for calling this function are:
740 -timeslice for the current period used up
741 -domain on waitqueue has started it's period
742 -and various others ;) in general: determine which domain to run next*/
743 static struct task_slice sedf_do_schedule(s_time_t now)
744 {
745 int cpu = smp_processor_id();
746 struct list_head *runq = RUNQ(cpu);
747 struct list_head *waitq = WAITQ(cpu);
748 struct sedf_vcpu_info *inf = EDOM_INFO(current);
749 struct list_head *extraq[] = {
750 EXTRAQ(cpu, EXTRA_PEN_Q), EXTRAQ(cpu, EXTRA_UTIL_Q)};
751 struct sedf_vcpu_info *runinf, *waitinf;
752 struct task_slice ret;
754 /*idle tasks don't need any of the following stuf*/
755 if ( is_idle_vcpu(current) )
756 goto check_waitq;
758 /* create local state of the status of the domain, in order to avoid
759 inconsistent state during scheduling decisions, because data for
760 vcpu_runnable is not protected by the scheduling lock!*/
761 if ( !vcpu_runnable(current) )
762 inf->status |= SEDF_ASLEEP;
764 if ( inf->status & SEDF_ASLEEP )
765 inf->block_abs = now;
767 if ( unlikely(extra_runs(inf)) )
768 {
769 /*special treatment of domains running in extra time*/
770 desched_extra_dom(now, current);
771 }
772 else
773 {
774 desched_edf_dom(now, current);
775 }
776 check_waitq:
777 update_queues(now, runq, waitq);
779 /*now simply pick the first domain from the runqueue, which has the
780 earliest deadline, because the list is sorted*/
782 if ( !list_empty(runq) )
783 {
784 runinf = list_entry(runq->next,struct sedf_vcpu_info,list);
785 ret.task = runinf->vcpu;
786 if ( !list_empty(waitq) )
787 {
788 waitinf = list_entry(waitq->next,
789 struct sedf_vcpu_info,list);
790 /*rerun scheduler, when scheduled domain reaches it's
791 end of slice or the first domain from the waitqueue
792 gets ready*/
793 ret.time = MIN(now + runinf->slice - runinf->cputime,
794 PERIOD_BEGIN(waitinf)) - now;
795 }
796 else
797 {
798 ret.time = runinf->slice - runinf->cputime;
799 }
800 CHECK(ret.time > 0);
801 goto sched_done;
802 }
804 if ( !list_empty(waitq) )
805 {
806 waitinf = list_entry(waitq->next,struct sedf_vcpu_info, list);
807 /*we could not find any suitable domain
808 => look for domains that are aware of extratime*/
809 ret = sedf_do_extra_schedule(now, PERIOD_BEGIN(waitinf),
810 extraq, cpu);
811 CHECK(ret.time > 0);
812 }
813 else
814 {
815 /*this could probably never happen, but one never knows...*/
816 /*it can... imagine a second CPU, which is pure scifi ATM,
817 but one never knows ;)*/
818 ret.task = IDLETASK(cpu);
819 ret.time = SECONDS(1);
820 }
822 sched_done:
823 /*TODO: Do something USEFUL when this happens and find out, why it
824 still can happen!!!*/
825 if ( ret.time < 0)
826 {
827 printk("Ouch! We are seriously BEHIND schedule! %"PRIi64"\n",
828 ret.time);
829 ret.time = EXTRA_QUANTUM;
830 }
832 EDOM_INFO(ret.task)->sched_start_abs = now;
833 CHECK(ret.time > 0);
834 ASSERT(sedf_runnable(ret.task));
835 CPU_INFO(cpu)->current_slice_expires = now + ret.time;
836 return ret;
837 }
840 static void sedf_sleep(struct vcpu *d)
841 {
842 PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
843 d->domain->domain_id, d->vcpu_id);
845 if ( is_idle_vcpu(d) )
846 return;
848 EDOM_INFO(d)->status |= SEDF_ASLEEP;
850 if ( schedule_data[d->processor].curr == d )
851 {
852 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
853 }
854 else
855 {
856 if ( __task_on_queue(d) )
857 __del_from_queue(d);
858 if ( extraq_on(d, EXTRA_UTIL_Q) )
859 extraq_del(d, EXTRA_UTIL_Q);
860 if ( extraq_on(d, EXTRA_PEN_Q) )
861 extraq_del(d, EXTRA_PEN_Q);
862 }
863 }
866 /* This function wakes up a domain, i.e. moves them into the waitqueue
867 * things to mention are: admission control is taking place nowhere at
868 * the moment, so we can't be sure, whether it is safe to wake the domain
869 * up at all. Anyway, even if it is safe (total cpu usage <=100%) there are
870 * some considerations on when to allow the domain to wake up and have it's
871 * first deadline...
872 * I detected 3 cases, which could describe the possible behaviour of the
873 * scheduler,
874 * and I'll try to make them more clear:
875 *
876 * 1. Very conservative
877 * -when a blocked domain unblocks, it is allowed to start execution at
878 * the beginning of the next complete period
879 * (D..deadline, R..running, B..blocking/sleeping, U..unblocking/waking up
880 *
881 * DRRB_____D__U_____DRRRRR___D________ ...
882 *
883 * -this causes the domain to miss a period (and a deadlline)
884 * -doesn't disturb the schedule at all
885 * -deadlines keep occuring isochronous
886 *
887 * 2. Conservative Part 1: Short Unblocking
888 * -when a domain unblocks in the same period as it was blocked it
889 * unblocks and may consume the rest of it's original time-slice minus
890 * the time it was blocked
891 * (assume period=9, slice=5)
892 *
893 * DRB_UR___DRRRRR___D...
894 *
895 * -this also doesn't disturb scheduling, but might lead to the fact, that
896 * the domain can't finish it's workload in the period
897 * -in addition to that the domain can be treated prioritised when
898 * extratime is available
899 * -addition: experiments have shown that this may have a HUGE impact on
900 * performance of other domains, becaus it can lead to excessive context
901 * switches
902 *
903 * Part2: Long Unblocking
904 * Part 2a
905 * -it is obvious that such accounting of block time, applied when
906 * unblocking is happening in later periods, works fine aswell
907 * -the domain is treated as if it would have been running since the start
908 * of its new period
909 *
910 * DRB______D___UR___D...
911 *
912 * Part 2b
913 * -if one needs the full slice in the next period, it is necessary to
914 * treat the unblocking time as the start of the new period, i.e. move
915 * the deadline further back (later)
916 * -this doesn't disturb scheduling as well, because for EDF periods can
917 * be treated as minimal inter-release times and scheduling stays
918 * correct, when deadlines are kept relative to the time the process
919 * unblocks
920 *
921 * DRB______D___URRRR___D...<prev [Thread] next>
922 * (D) <- old deadline was here
923 * -problem: deadlines don't occur isochronous anymore
924 * Part 2c (Improved Atropos design)
925 * -when a domain unblocks it is given a very short period (=latency hint)
926 * and slice length scaled accordingly
927 * -both rise again to the original value (e.g. get doubled every period)
928 *
929 * 3. Unconservative (i.e. incorrect)
930 * -to boost the performance of I/O dependent domains it would be possible
931 * to put the domain into the runnable queue immediately, and let it run
932 * for the remainder of the slice of the current period
933 * (or even worse: allocate a new full slice for the domain)
934 * -either behaviour can lead to missed deadlines in other domains as
935 * opposed to approaches 1,2a,2b
936 */
937 static void unblock_short_extra_support(
938 struct sedf_vcpu_info* inf, s_time_t now)
939 {
940 /*this unblocking scheme tries to support the domain, by assigning it
941 a priority in extratime distribution according to the loss of time
942 in this slice due to blocking*/
943 s_time_t pen;
945 /*no more realtime execution in this period!*/
946 inf->deadl_abs += inf->period;
947 if ( likely(inf->block_abs) )
948 {
949 //treat blocked time as consumed by the domain*/
950 /*inf->cputime += now - inf->block_abs;*/
951 /*penalty is time the domain would have
952 had if it continued to run */
953 pen = (inf->slice - inf->cputime);
954 if ( pen < 0 )
955 pen = 0;
956 /*accumulate all penalties over the periods*/
957 /*inf->short_block_lost_tot += pen;*/
958 /*set penalty to the current value*/
959 inf->short_block_lost_tot = pen;
960 /*not sure which one is better.. but seems to work well...*/
962 if ( inf->short_block_lost_tot )
963 {
964 inf->score[0] = (inf->period << 10) /
965 inf->short_block_lost_tot;
966 #ifdef SEDF_STATS
967 inf->pen_extra_blocks++;
968 #endif
969 if ( extraq_on(inf->vcpu, EXTRA_PEN_Q) )
970 /*remove domain for possible resorting!*/
971 extraq_del(inf->vcpu, EXTRA_PEN_Q);
972 else
973 /*remember that we want to be on the penalty q
974 so that we can continue when we (un-)block
975 in penalty-extratime*/
976 inf->status |= EXTRA_WANT_PEN_Q;
978 /*(re-)add domain to the penalty extraq*/
979 extraq_add_sort_update(inf->vcpu, EXTRA_PEN_Q, 0);
980 }
981 }
983 /*give it a fresh slice in the next period!*/
984 inf->cputime = 0;
985 }
988 static void unblock_long_cons_b(struct sedf_vcpu_info* inf,s_time_t now)
989 {
990 /*Conservative 2b*/
991 /*Treat the unblocking time as a start of a new period */
992 inf->deadl_abs = now + inf->period;
993 inf->cputime = 0;
994 }
997 #define DOMAIN_EDF 1
998 #define DOMAIN_EXTRA_PEN 2
999 #define DOMAIN_EXTRA_UTIL 3
1000 #define DOMAIN_IDLE 4
1001 static inline int get_run_type(struct vcpu* d)
1003 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1004 if (is_idle_vcpu(d))
1005 return DOMAIN_IDLE;
1006 if (inf->status & EXTRA_RUN_PEN)
1007 return DOMAIN_EXTRA_PEN;
1008 if (inf->status & EXTRA_RUN_UTIL)
1009 return DOMAIN_EXTRA_UTIL;
1010 return DOMAIN_EDF;
1014 /*Compares two domains in the relation of whether the one is allowed to
1015 interrupt the others execution.
1016 It returns true (!=0) if a switch to the other domain is good.
1017 Current Priority scheme is as follows:
1018 EDF > L0 (penalty based) extra-time >
1019 L1 (utilization) extra-time > idle-domain
1020 In the same class priorities are assigned as following:
1021 EDF: early deadline > late deadline
1022 L0 extra-time: lower score > higher score*/
1023 static inline int should_switch(struct vcpu *cur,
1024 struct vcpu *other,
1025 s_time_t now)
1027 struct sedf_vcpu_info *cur_inf, *other_inf;
1028 cur_inf = EDOM_INFO(cur);
1029 other_inf = EDOM_INFO(other);
1031 /* Check whether we need to make an earlier scheduling decision. */
1032 if ( PERIOD_BEGIN(other_inf) <
1033 CPU_INFO(other->processor)->current_slice_expires )
1034 return 1;
1036 /* No timing-based switches need to be taken into account here. */
1037 switch ( get_run_type(cur) )
1039 case DOMAIN_EDF:
1040 /* Do not interrupt a running EDF domain. */
1041 return 0;
1042 case DOMAIN_EXTRA_PEN:
1043 /* Check whether we also want the L0 ex-q with lower score. */
1044 return ((other_inf->status & EXTRA_WANT_PEN_Q) &&
1045 (other_inf->score[EXTRA_PEN_Q] <
1046 cur_inf->score[EXTRA_PEN_Q]));
1047 case DOMAIN_EXTRA_UTIL:
1048 /* Check whether we want the L0 extraq. Don't
1049 * switch if both domains want L1 extraq.
1050 */
1051 return !!(other_inf->status & EXTRA_WANT_PEN_Q);
1052 case DOMAIN_IDLE:
1053 return 1;
1056 return 1;
1059 void sedf_wake(struct vcpu *d)
1061 s_time_t now = NOW();
1062 struct sedf_vcpu_info* inf = EDOM_INFO(d);
1064 PRINT(3, "sedf_wake was called, domain-id %i.%i\n",d->domain->domain_id,
1065 d->vcpu_id);
1067 if ( unlikely(is_idle_vcpu(d)) )
1068 return;
1070 if ( unlikely(__task_on_queue(d)) )
1072 PRINT(3,"\tdomain %i.%i is already in some queue\n",
1073 d->domain->domain_id, d->vcpu_id);
1074 return;
1077 ASSERT(!sedf_runnable(d));
1078 inf->status &= ~SEDF_ASLEEP;
1079 ASSERT(!extraq_on(d, EXTRA_UTIL_Q));
1080 ASSERT(!extraq_on(d, EXTRA_PEN_Q));
1082 if ( unlikely(inf->deadl_abs == 0) )
1084 /*initial setup of the deadline*/
1085 inf->deadl_abs = now + inf->slice;
1088 PRINT(3, "waking up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1089 "now= %"PRIu64")\n",
1090 d->domain->domain_id, d->vcpu_id, inf->deadl_abs, inf->period, now);
1092 #ifdef SEDF_STATS
1093 inf->block_tot++;
1094 #endif
1096 if ( unlikely(now < PERIOD_BEGIN(inf)) )
1098 PRINT(4,"extratime unblock\n");
1099 /* unblocking in extra-time! */
1100 if ( inf->status & EXTRA_WANT_PEN_Q )
1102 /*we have a domain that wants compensation
1103 for block penalty and did just block in
1104 its compensation time. Give it another
1105 chance!*/
1106 extraq_add_sort_update(d, EXTRA_PEN_Q, 0);
1108 extraq_check_add_unblocked(d, 0);
1110 else
1112 if ( now < inf->deadl_abs )
1114 PRINT(4,"short unblocking\n");
1115 /*short blocking*/
1116 #ifdef SEDF_STATS
1117 inf->short_block_tot++;
1118 #endif
1119 unblock_short_extra_support(inf, now);
1121 extraq_check_add_unblocked(d, 1);
1123 else
1125 PRINT(4,"long unblocking\n");
1126 /*long unblocking*/
1127 #ifdef SEDF_STATS
1128 inf->long_block_tot++;
1129 #endif
1130 unblock_long_cons_b(inf, now);
1132 extraq_check_add_unblocked(d, 1);
1136 PRINT(3, "woke up domain %i.%i (deadl= %"PRIu64" period= %"PRIu64
1137 "now= %"PRIu64")\n",
1138 d->domain->domain_id, d->vcpu_id, inf->deadl_abs,
1139 inf->period, now);
1141 if ( PERIOD_BEGIN(inf) > now )
1143 __add_to_waitqueue_sort(d);
1144 PRINT(3,"added to waitq\n");
1146 else
1148 __add_to_runqueue_sort(d);
1149 PRINT(3,"added to runq\n");
1152 #ifdef SEDF_STATS
1153 /*do some statistics here...*/
1154 if ( inf->block_abs != 0 )
1156 inf->block_time_tot += now - inf->block_abs;
1157 inf->penalty_time_tot +=
1158 PERIOD_BEGIN(inf) + inf->cputime - inf->block_abs;
1160 #endif
1162 /*sanity check: make sure each extra-aware domain IS on the util-q!*/
1163 ASSERT(IMPLY(inf->status & EXTRA_AWARE, extraq_on(d, EXTRA_UTIL_Q)));
1164 ASSERT(__task_on_queue(d));
1165 /*check whether the awakened task needs to invoke the do_schedule
1166 routine. Try to avoid unnecessary runs but:
1167 Save approximation: Always switch to scheduler!*/
1168 ASSERT(d->processor >= 0);
1169 ASSERT(d->processor < NR_CPUS);
1170 ASSERT(schedule_data[d->processor].curr);
1172 if ( should_switch(schedule_data[d->processor].curr, d, now) )
1173 cpu_raise_softirq(d->processor, SCHEDULE_SOFTIRQ);
1177 static int sedf_set_affinity(struct vcpu *v, cpumask_t *affinity)
1179 if ( v == current )
1180 return cpu_isset(v->processor, *affinity) ? 0 : -EBUSY;
1182 vcpu_pause(v);
1183 v->cpu_affinity = *affinity;
1184 v->processor = first_cpu(v->cpu_affinity);
1185 vcpu_unpause(v);
1187 return 0;
1191 /* Print a lot of useful information about a domains in the system */
1192 static void sedf_dump_domain(struct vcpu *d)
1194 printk("%i.%i has=%c ", d->domain->domain_id, d->vcpu_id,
1195 test_bit(_VCPUF_running, &d->vcpu_flags) ? 'T':'F');
1196 printk("p=%"PRIu64" sl=%"PRIu64" ddl=%"PRIu64" w=%hu"
1197 " sc=%i xtr(%s)=%"PRIu64" ew=%hu",
1198 EDOM_INFO(d)->period, EDOM_INFO(d)->slice, EDOM_INFO(d)->deadl_abs,
1199 EDOM_INFO(d)->weight,
1200 EDOM_INFO(d)->score[EXTRA_UTIL_Q],
1201 (EDOM_INFO(d)->status & EXTRA_AWARE) ? "yes" : "no",
1202 EDOM_INFO(d)->extra_time_tot, EDOM_INFO(d)->extraweight);
1204 #ifdef SEDF_STATS
1205 if ( EDOM_INFO(d)->block_time_tot != 0 )
1206 printf(" pen=%"PRIu64"%%", (EDOM_INFO(d)->penalty_time_tot * 100) /
1207 EDOM_INFO(d)->block_time_tot);
1208 if ( EDOM_INFO(d)->block_tot != 0 )
1209 printf("\n blks=%u sh=%u (%u%%) (shc=%u (%u%%) shex=%i "\
1210 "shexsl=%i) l=%u (%u%%) avg: b=%"PRIu64" p=%"PRIu64"",
1211 EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_block_tot,
1212 (EDOM_INFO(d)->short_block_tot * 100)
1213 / EDOM_INFO(d)->block_tot, EDOM_INFO(d)->short_cont,
1214 (EDOM_INFO(d)->short_cont * 100) / EDOM_INFO(d)->block_tot,
1215 EDOM_INFO(d)->pen_extra_blocks,
1216 EDOM_INFO(d)->pen_extra_slices,
1217 EDOM_INFO(d)->long_block_tot,
1218 (EDOM_INFO(d)->long_block_tot * 100) / EDOM_INFO(d)->block_tot,
1219 (EDOM_INFO(d)->block_time_tot) / EDOM_INFO(d)->block_tot,
1220 (EDOM_INFO(d)->penalty_time_tot) / EDOM_INFO(d)->block_tot);
1221 #endif
1222 printf("\n");
1226 /* dumps all domains on hte specified cpu */
1227 static void sedf_dump_cpu_state(int i)
1229 struct list_head *list, *queue, *tmp;
1230 struct sedf_vcpu_info *d_inf;
1231 struct domain *d;
1232 struct vcpu *ed;
1233 int loop = 0;
1235 printk("now=%"PRIu64"\n",NOW());
1236 queue = RUNQ(i);
1237 printk("RUNQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1238 (unsigned long) queue->next, (unsigned long) queue->prev);
1239 list_for_each_safe ( list, tmp, queue )
1241 printk("%3d: ",loop++);
1242 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1243 sedf_dump_domain(d_inf->vcpu);
1246 queue = WAITQ(i); loop = 0;
1247 printk("\nWAITQ rq %lx n: %lx, p: %lx\n", (unsigned long)queue,
1248 (unsigned long) queue->next, (unsigned long) queue->prev);
1249 list_for_each_safe ( list, tmp, queue )
1251 printk("%3d: ",loop++);
1252 d_inf = list_entry(list, struct sedf_vcpu_info, list);
1253 sedf_dump_domain(d_inf->vcpu);
1256 queue = EXTRAQ(i,EXTRA_PEN_Q); loop = 0;
1257 printk("\nEXTRAQ (penalty) rq %lx n: %lx, p: %lx\n",
1258 (unsigned long)queue, (unsigned long) queue->next,
1259 (unsigned long) queue->prev);
1260 list_for_each_safe ( list, tmp, queue )
1262 d_inf = list_entry(list, struct sedf_vcpu_info,
1263 extralist[EXTRA_PEN_Q]);
1264 printk("%3d: ",loop++);
1265 sedf_dump_domain(d_inf->vcpu);
1268 queue = EXTRAQ(i,EXTRA_UTIL_Q); loop = 0;
1269 printk("\nEXTRAQ (utilization) rq %lx n: %lx, p: %lx\n",
1270 (unsigned long)queue, (unsigned long) queue->next,
1271 (unsigned long) queue->prev);
1272 list_for_each_safe ( list, tmp, queue )
1274 d_inf = list_entry(list, struct sedf_vcpu_info,
1275 extralist[EXTRA_UTIL_Q]);
1276 printk("%3d: ",loop++);
1277 sedf_dump_domain(d_inf->vcpu);
1280 loop = 0;
1281 printk("\nnot on Q\n");
1283 for_each_domain ( d )
1285 for_each_vcpu(d, ed)
1287 if ( !__task_on_queue(ed) && (ed->processor == i) )
1289 printk("%3d: ",loop++);
1290 sedf_dump_domain(ed);
1297 /* Adjusts periods and slices of the domains accordingly to their weights. */
1298 static int sedf_adjust_weights(struct sched_adjdom_cmd *cmd)
1300 struct vcpu *p;
1301 struct domain *d;
1302 int sumw[NR_CPUS];
1303 s_time_t sumt[NR_CPUS];
1304 int cpu;
1306 for ( cpu = 0; cpu < NR_CPUS; cpu++ )
1308 sumw[cpu] = 0;
1309 sumt[cpu] = 0;
1312 /* Sum across all weights. */
1313 for_each_domain( d )
1315 for_each_vcpu( d, p )
1317 if ( EDOM_INFO(p)->weight )
1319 sumw[p->processor] += EDOM_INFO(p)->weight;
1321 else
1323 /*don't modify domains who don't have a weight, but sum
1324 up the time they need, projected to a WEIGHT_PERIOD,
1325 so that this time is not given to the weight-driven
1326 domains*/
1327 /*check for overflows*/
1328 ASSERT((WEIGHT_PERIOD < ULONG_MAX)
1329 && (EDOM_INFO(p)->slice_orig < ULONG_MAX));
1330 sumt[p->processor] +=
1331 (WEIGHT_PERIOD * EDOM_INFO(p)->slice_orig) /
1332 EDOM_INFO(p)->period_orig;
1337 /* Adjust all slices (and periods) to the new weight. */
1338 for_each_domain( d )
1340 for_each_vcpu ( d, p )
1342 if ( EDOM_INFO(p)->weight )
1344 EDOM_INFO(p)->period_orig =
1345 EDOM_INFO(p)->period = WEIGHT_PERIOD;
1346 EDOM_INFO(p)->slice_orig =
1347 EDOM_INFO(p)->slice =
1348 (EDOM_INFO(p)->weight *
1349 (WEIGHT_PERIOD - WEIGHT_SAFETY - sumt[p->processor])) /
1350 sumw[p->processor];
1355 return 0;
1359 /* set or fetch domain scheduling parameters */
1360 static int sedf_adjdom(struct domain *p, struct sched_adjdom_cmd *cmd)
1362 struct vcpu *v;
1364 PRINT(2,"sedf_adjdom was called, domain-id %i new period %"PRIu64" "
1365 "new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
1366 p->domain_id, cmd->u.sedf.period, cmd->u.sedf.slice,
1367 cmd->u.sedf.latency, (cmd->u.sedf.extratime)?"yes":"no");
1369 if ( cmd->direction == SCHED_INFO_PUT )
1371 /* Check for sane parameters. */
1372 if ( !cmd->u.sedf.period && !cmd->u.sedf.weight )
1373 return -EINVAL;
1374 if ( cmd->u.sedf.weight )
1376 if ( (cmd->u.sedf.extratime & EXTRA_AWARE) &&
1377 (!cmd->u.sedf.period) )
1379 /* Weight-driven domains with extratime only. */
1380 for_each_vcpu ( p, v )
1382 EDOM_INFO(v)->extraweight = cmd->u.sedf.weight;
1383 EDOM_INFO(v)->weight = 0;
1384 EDOM_INFO(v)->slice = 0;
1385 EDOM_INFO(v)->period = WEIGHT_PERIOD;
1388 else
1390 /* Weight-driven domains with real-time execution. */
1391 for_each_vcpu ( p, v )
1392 EDOM_INFO(v)->weight = cmd->u.sedf.weight;
1395 else
1397 /* Time-driven domains. */
1398 for_each_vcpu ( p, v )
1400 /*
1401 * Sanity checking: note that disabling extra weight requires
1402 * that we set a non-zero slice.
1403 */
1404 if ( (cmd->u.sedf.period > PERIOD_MAX) ||
1405 (cmd->u.sedf.period < PERIOD_MIN) ||
1406 (cmd->u.sedf.slice > cmd->u.sedf.period) ||
1407 (cmd->u.sedf.slice < SLICE_MIN) )
1408 return -EINVAL;
1409 EDOM_INFO(v)->weight = 0;
1410 EDOM_INFO(v)->extraweight = 0;
1411 EDOM_INFO(v)->period_orig =
1412 EDOM_INFO(v)->period = cmd->u.sedf.period;
1413 EDOM_INFO(v)->slice_orig =
1414 EDOM_INFO(v)->slice = cmd->u.sedf.slice;
1418 if ( sedf_adjust_weights(cmd) )
1419 return -EINVAL;
1421 for_each_vcpu ( p, v )
1423 EDOM_INFO(v)->status =
1424 (EDOM_INFO(v)->status &
1425 ~EXTRA_AWARE) | (cmd->u.sedf.extratime & EXTRA_AWARE);
1426 EDOM_INFO(v)->latency = cmd->u.sedf.latency;
1427 extraq_check(v);
1430 else if ( cmd->direction == SCHED_INFO_GET )
1432 cmd->u.sedf.period = EDOM_INFO(p->vcpu[0])->period;
1433 cmd->u.sedf.slice = EDOM_INFO(p->vcpu[0])->slice;
1434 cmd->u.sedf.extratime = EDOM_INFO(p->vcpu[0])->status & EXTRA_AWARE;
1435 cmd->u.sedf.latency = EDOM_INFO(p->vcpu[0])->latency;
1436 cmd->u.sedf.weight = EDOM_INFO(p->vcpu[0])->weight;
1439 PRINT(2,"sedf_adjdom_finished\n");
1440 return 0;
1443 struct scheduler sched_sedf_def = {
1444 .name = "Simple EDF Scheduler",
1445 .opt_name = "sedf",
1446 .sched_id = SCHED_SEDF,
1448 .init_vcpu = sedf_init_vcpu,
1449 .destroy_domain = sedf_destroy_domain,
1451 .do_schedule = sedf_do_schedule,
1452 .dump_cpu_state = sedf_dump_cpu_state,
1453 .sleep = sedf_sleep,
1454 .wake = sedf_wake,
1455 .adjdom = sedf_adjdom,
1456 .set_affinity = sedf_set_affinity
1457 };
1459 /*
1460 * Local variables:
1461 * mode: C
1462 * c-set-style: "BSD"
1463 * c-basic-offset: 4
1464 * tab-width: 4
1465 * indent-tabs-mode: nil
1466 * End:
1467 */