ia64/xen-unstable

view tools/xenmon/xenbaked.c @ 8740:3d7ea7972b39

Update patches for linux 2.6.15.

Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Thu Feb 02 17:16:00 2006 +0000 (2006-02-02)
parents 394390f6ff85
children a4dc14edd56b
line source
1 /******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 * performing some accumulation operations and other processing
6 * on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 *
11 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
12 * Rob Gardner, rob.gardner@hp.com
13 * Lucy Cherkasova, lucy.cherkasova.hp.com
14 * Much code based on xentrace, authored by Mark Williamson, mark.a.williamson@intel.com
15 * Date: November, 2005
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; under version 2 of the License.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 */
31 #include <time.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <sys/types.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <errno.h>
40 #include <argp.h>
41 #include <signal.h>
42 #include <xenctrl.h>
43 #include <xen/xen.h>
44 #include <string.h>
46 #include "xc_private.h"
47 typedef struct { int counter; } atomic_t;
48 #define _atomic_read(v) ((v).counter)
50 #include <xen/trace.h>
51 #include "xenbaked.h"
53 extern FILE *stderr;
55 /***** Compile time configuration of defaults ********************************/
57 /* when we've got more records than this waiting, we log it to the output */
58 #define NEW_DATA_THRESH 1
60 /* sleep for this long (milliseconds) between checking the trace buffers */
61 #define POLL_SLEEP_MILLIS 100
63 /* Size of time period represented by each sample */
64 #define MS_PER_SAMPLE 100
66 /* CPU Frequency */
67 #define MHZ
68 #define CPU_FREQ 2660 MHZ
70 /***** The code **************************************************************/
72 typedef struct settings_st {
73 char *outfile;
74 struct timespec poll_sleep;
75 unsigned long new_data_thresh;
76 unsigned long ms_per_sample;
77 double cpu_freq;
78 } settings_t;
80 settings_t opts;
82 int interrupted = 0; /* gets set if we get a SIGHUP */
83 int rec_count = 0;
84 time_t start_time;
85 int dom0_flips = 0;
87 _new_qos_data *new_qos;
88 _new_qos_data **cpu_qos_data;
91 #define ID(X) ((X>NDOMAINS-1)?(NDOMAINS-1):X)
93 // array of currently running domains, indexed by cpu
94 int *running = NULL;
96 // number of cpu's on this platform
97 int NCPU = 0;
100 void init_current(int ncpu)
101 {
102 running = calloc(ncpu, sizeof(int));
103 NCPU = ncpu;
104 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
105 }
107 int is_current(int domain, int cpu)
108 {
109 // int i;
111 // for (i=0; i<NCPU; i++)
112 if (running[cpu] == domain)
113 return 1;
114 return 0;
115 }
118 // return the domain that's currently running on the given cpu
119 int current(int cpu)
120 {
121 return running[cpu];
122 }
124 void set_current(int cpu, int domain)
125 {
126 running[cpu] = domain;
127 }
131 void close_handler(int signal)
132 {
133 interrupted = 1;
134 }
136 #if 0
137 void dump_record(int cpu, struct t_rec *x)
138 {
139 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
140 cpu, x->cycles, x->event, x->data[0]);
141 }
142 #endif
144 /**
145 * millis_to_timespec - convert a time in milliseconds to a struct timespec
146 * @millis: time interval in milliseconds
147 */
148 struct timespec millis_to_timespec(unsigned long millis)
149 {
150 struct timespec spec;
152 spec.tv_sec = millis / 1000;
153 spec.tv_nsec = (millis % 1000) * 1000;
155 return spec;
156 }
159 typedef struct
160 {
161 int event_count;
162 int event_id;
163 char *text;
164 } stat_map_t;
166 stat_map_t stat_map[] = {
167 { 0, 0, "Other" },
168 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
169 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
170 { 0, TRC_SCHED_SLEEP, "Sleep" },
171 { 0, TRC_SCHED_WAKE, "Wake" },
172 { 0, TRC_SCHED_BLOCK, "Block" },
173 { 0, TRC_SCHED_SWITCH, "Switch" },
174 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
175 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
176 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
177 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
178 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
179 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
180 { 0, 0, 0 }
181 };
184 void check_gotten_sum(void)
185 {
186 #if 0
187 uint64_t sum, ns;
188 extern uint64_t total_ns_gotten(uint64_t*);
189 double percent;
190 int i;
192 for (i=0; i<NCPU; i++) {
193 new_qos = cpu_qos_data[i];
194 ns = billion;
195 sum = total_ns_gotten(&ns);
197 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
198 i, sum, ns);
199 percent = (double) sum;
200 percent = (100.0*percent) / (double)ns;
201 printf(" ==> ns_gotten = %7.3f%%\n", percent);
202 }
203 #endif
204 }
208 void dump_stats(void)
209 {
210 stat_map_t *smt = stat_map;
211 time_t end_time, run_time;
213 time(&end_time);
215 run_time = end_time - start_time;
217 printf("Event counts:\n");
218 while (smt->text != NULL) {
219 printf("%08d\t%s\n", smt->event_count, smt->text);
220 smt++;
221 }
223 printf("processed %d total records in %d seconds (%ld per second)\n",
224 rec_count, (int)run_time, rec_count/run_time);
226 check_gotten_sum();
227 }
229 void log_event(int event_id)
230 {
231 stat_map_t *smt = stat_map;
233 // printf("event_id = 0x%x\n", event_id);
235 while (smt->text != NULL) {
236 if (smt->event_id == event_id) {
237 smt->event_count++;
238 return;
239 }
240 smt++;
241 }
242 if (smt->text == NULL)
243 stat_map[0].event_count++; // other
244 }
248 /**
249 * get_tbufs - get pointer to and size of the trace buffers
250 * @mfn: location to store mfn of the trace buffers to
251 * @size: location to store the size of a trace buffer to
252 *
253 * Gets the machine address of the trace pointer area and the size of the
254 * per CPU buffers.
255 */
256 void get_tbufs(unsigned long *mfn, unsigned long *size)
257 {
258 int ret;
259 dom0_op_t op; /* dom0 op we'll build */
260 int xc_handle = xc_interface_open(); /* for accessing control interface */
262 op.cmd = DOM0_TBUFCONTROL;
263 op.interface_version = DOM0_INTERFACE_VERSION;
264 op.u.tbufcontrol.op = DOM0_TBUF_GET_INFO;
266 ret = do_dom0_op(xc_handle, &op);
268 xc_interface_close(xc_handle);
270 if ( ret != 0 )
271 {
272 PERROR("Failure to get trace buffer pointer from Xen");
273 exit(EXIT_FAILURE);
274 }
276 *mfn = op.u.tbufcontrol.buffer_mfn;
277 *size = op.u.tbufcontrol.size;
278 }
280 /**
281 * map_tbufs - memory map Xen trace buffers into user space
282 * @tbufs_mfn: mfn of the trace buffers
283 * @num: number of trace buffers to map
284 * @size: size of each trace buffer
285 *
286 * Maps the Xen trace buffers them into process address space.
287 */
288 struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
289 unsigned long size)
290 {
291 int xc_handle; /* file descriptor for /proc/xen/privcmd */
292 struct t_buf *tbufs_mapped;
294 xc_handle = xc_interface_open();
296 if ( xc_handle < 0 )
297 {
298 PERROR("Open /proc/xen/privcmd when mapping trace buffers\n");
299 exit(EXIT_FAILURE);
300 }
302 tbufs_mapped = xc_map_foreign_range(xc_handle, 0 /* Dom 0 ID */,
303 size * num, PROT_READ | PROT_WRITE,
304 tbufs_mfn);
306 xc_interface_close(xc_handle);
308 if ( tbufs_mapped == 0 )
309 {
310 PERROR("Failed to mmap trace buffers");
311 exit(EXIT_FAILURE);
312 }
314 return tbufs_mapped;
315 }
317 /**
318 * init_bufs_ptrs - initialises an array of pointers to the trace buffers
319 * @bufs_mapped: the userspace address where the trace buffers are mapped
320 * @num: number of trace buffers
321 * @size: trace buffer size
322 *
323 * Initialises an array of pointers to individual trace buffers within the
324 * mapped region containing all trace buffers.
325 */
326 struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
327 unsigned long size)
328 {
329 int i;
330 struct t_buf **user_ptrs;
332 user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
333 if ( user_ptrs == NULL )
334 {
335 PERROR( "Failed to allocate memory for buffer pointers\n");
336 exit(EXIT_FAILURE);
337 }
339 /* initialise pointers to the trace buffers - given the size of a trace
340 * buffer and the value of bufs_maped, we can easily calculate these */
341 for ( i = 0; i<num; i++ )
342 user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
344 return user_ptrs;
345 }
348 /**
349 * init_rec_ptrs - initialises data area pointers to locations in user space
350 * @tbufs_mfn: base mfn of the trace buffer area
351 * @tbufs_mapped: user virtual address of base of trace buffer area
352 * @meta: array of user-space pointers to struct t_buf's of metadata
353 * @num: number of trace buffers
354 *
355 * Initialises data area pointers to the locations that data areas have been
356 * mapped in user space. Note that the trace buffer metadata contains machine
357 * pointers - the array returned allows more convenient access to them.
358 */
359 struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
360 {
361 int i;
362 struct t_rec **data;
364 data = calloc(num, sizeof(struct t_rec *));
365 if ( data == NULL )
366 {
367 PERROR("Failed to allocate memory for data pointers\n");
368 exit(EXIT_FAILURE);
369 }
371 for ( i = 0; i < num; i++ )
372 data[i] = (struct t_rec *)(meta[i] + 1);
374 return data;
375 }
379 /**
380 * get_num_cpus - get the number of logical CPUs
381 */
382 unsigned int get_num_cpus()
383 {
384 dom0_op_t op;
385 int xc_handle = xc_interface_open();
386 int ret;
388 op.cmd = DOM0_PHYSINFO;
389 op.interface_version = DOM0_INTERFACE_VERSION;
391 ret = xc_dom0_op(xc_handle, &op);
393 if ( ret != 0 )
394 {
395 PERROR("Failure to get logical CPU count from Xen");
396 exit(EXIT_FAILURE);
397 }
399 xc_interface_close(xc_handle);
400 opts.cpu_freq = (double)op.u.physinfo.cpu_khz/1000.0;
402 return (op.u.physinfo.threads_per_core *
403 op.u.physinfo.cores_per_socket *
404 op.u.physinfo.sockets_per_node *
405 op.u.physinfo.nr_nodes);
406 }
409 /**
410 * monitor_tbufs - monitor the contents of tbufs
411 */
412 int monitor_tbufs()
413 {
414 int i;
415 extern void process_record(int, struct t_rec *);
416 extern void alloc_qos_data(int ncpu);
418 void *tbufs_mapped; /* pointer to where the tbufs are mapped */
419 struct t_buf **meta; /* pointers to the trace buffer metadata */
420 struct t_rec **data; /* pointers to the trace buffer data areas
421 * where they are mapped into user space. */
422 unsigned long tbufs_mfn; /* mfn of the tbufs */
423 unsigned int num; /* number of trace buffers / logical CPUS */
424 unsigned long size; /* size of a single trace buffer */
426 int size_in_recs;
428 /* get number of logical CPUs (and therefore number of trace buffers) */
429 num = get_num_cpus();
431 init_current(num);
432 alloc_qos_data(num);
434 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
436 /* setup access to trace buffers */
437 get_tbufs(&tbufs_mfn, &size);
439 // printf("from dom0op: %ld, t_buf: %d, t_rec: %d\n",
440 // size, sizeof(struct t_buf), sizeof(struct t_rec));
442 tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
444 size_in_recs = (size - sizeof(struct t_buf)) / sizeof(struct t_rec);
445 // fprintf(stderr, "size_in_recs = %d\n", size_in_recs);
447 /* build arrays of convenience ptrs */
448 meta = init_bufs_ptrs (tbufs_mapped, num, size);
449 data = init_rec_ptrs(meta, num);
451 /* now, scan buffers for events */
452 while ( !interrupted )
453 {
454 for ( i = 0; ( i < num ) && !interrupted; i++ )
455 while ( meta[i]->cons != meta[i]->prod )
456 {
457 rmb(); /* read prod, then read item. */
458 process_record(i, data[i] + meta[i]->cons % size_in_recs);
459 mb(); /* read item, then update cons. */
460 meta[i]->cons++;
461 }
463 nanosleep(&opts.poll_sleep, NULL);
464 }
466 /* cleanup */
467 free(meta);
468 free(data);
469 /* don't need to munmap - cleanup is automatic */
471 return 0;
472 }
475 /******************************************************************************
476 * Various declarations / definitions GNU argp needs to do its work
477 *****************************************************************************/
480 /* command parser for GNU argp - see GNU docs for more info */
481 error_t cmd_parser(int key, char *arg, struct argp_state *state)
482 {
483 settings_t *setup = (settings_t *)state->input;
485 switch ( key )
486 {
487 case 't': /* set new records threshold for logging */
488 {
489 char *inval;
490 setup->new_data_thresh = strtol(arg, &inval, 0);
491 if ( inval == arg )
492 argp_usage(state);
493 }
494 break;
496 case 's': /* set sleep time (given in milliseconds) */
497 {
498 char *inval;
499 setup->poll_sleep = millis_to_timespec(strtol(arg, &inval, 0));
500 if ( inval == arg )
501 argp_usage(state);
502 }
503 break;
505 case 'm': /* set ms_per_sample */
506 {
507 char *inval;
508 setup->ms_per_sample = strtol(arg, &inval, 0);
509 if ( inval == arg )
510 argp_usage(state);
511 }
512 break;
514 case ARGP_KEY_ARG:
515 {
516 if ( state->arg_num == 0 )
517 setup->outfile = arg;
518 else
519 argp_usage(state);
520 }
521 break;
523 default:
524 return ARGP_ERR_UNKNOWN;
525 }
527 return 0;
528 }
530 #define SHARED_MEM_FILE "/tmp/xenq-shm"
531 void alloc_qos_data(int ncpu)
532 {
533 int i, n, pgsize, off=0;
534 char *dummy;
535 int qos_fd;
536 void advance_next_datapoint(uint64_t);
538 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
541 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
542 if (qos_fd < 0) {
543 PERROR(SHARED_MEM_FILE);
544 exit(2);
545 }
546 pgsize = getpagesize();
547 dummy = malloc(pgsize);
549 for (n=0; n<ncpu; n++) {
551 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
552 write(qos_fd, dummy, pgsize);
554 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
555 MAP_SHARED, qos_fd, off);
556 off += i;
557 if (new_qos == NULL) {
558 PERROR("mmap");
559 exit(3);
560 }
561 // printf("new_qos = %p\n", new_qos);
562 memset(new_qos, 0, sizeof(_new_qos_data));
563 new_qos->next_datapoint = 0;
564 advance_next_datapoint(0);
565 new_qos->structlen = i;
566 new_qos->ncpu = ncpu;
567 // printf("structlen = 0x%x\n", i);
568 cpu_qos_data[n] = new_qos;
569 }
570 free(dummy);
571 new_qos = NULL;
572 }
575 #define xstr(x) str(x)
576 #define str(x) #x
578 const struct argp_option cmd_opts[] =
579 {
580 { .name = "log-thresh", .key='t', .arg="l",
581 .doc =
582 "Set number, l, of new records required to trigger a write to output "
583 "(default " xstr(NEW_DATA_THRESH) ")." },
585 { .name = "poll-sleep", .key='s', .arg="p",
586 .doc =
587 "Set sleep time, p, in milliseconds between polling the trace buffer "
588 "for new data (default " xstr(POLL_SLEEP_MILLIS) ")." },
590 { .name = "ms_per_sample", .key='m', .arg="MS",
591 .doc =
592 "Specify the number of milliseconds per sample "
593 " (default " xstr(MS_PER_SAMPLE) ")." },
595 {0}
596 };
598 const struct argp parser_def =
599 {
600 .options = cmd_opts,
601 .parser = cmd_parser,
602 // .args_doc = "[output file]",
603 .doc =
604 "Tool to capture and partially process Xen trace buffer data"
605 "\v"
606 "This tool is used to capture trace buffer data from Xen. The data is "
607 "saved in a shared memory structure to be further processed by xenmon."
608 };
611 const char *argp_program_version = "xenbaked v1.3";
612 const char *argp_program_bug_address = "<rob.gardner@hp.com>";
615 int main(int argc, char **argv)
616 {
617 int ret;
618 struct sigaction act;
620 time(&start_time);
621 opts.outfile = 0;
622 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
623 opts.new_data_thresh = NEW_DATA_THRESH;
624 opts.ms_per_sample = MS_PER_SAMPLE;
625 opts.cpu_freq = CPU_FREQ;
627 argp_parse(&parser_def, argc, argv, 0, 0, &opts);
628 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
631 /* ensure that if we get a signal, we'll do cleanup, then exit */
632 act.sa_handler = close_handler;
633 act.sa_flags = 0;
634 sigemptyset(&act.sa_mask);
635 sigaction(SIGHUP, &act, NULL);
636 sigaction(SIGTERM, &act, NULL);
637 sigaction(SIGINT, &act, NULL);
639 ret = monitor_tbufs();
641 dump_stats();
642 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
644 return ret;
645 }
647 int domain_runnable(int domid)
648 {
649 return new_qos->domain_info[ID(domid)].runnable;
650 }
653 void update_blocked_time(int domid, uint64_t now)
654 {
655 uint64_t t_blocked;
656 int id = ID(domid);
658 if (new_qos->domain_info[id].blocked_start_time != 0) {
659 if (now >= new_qos->domain_info[id].blocked_start_time)
660 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
661 else
662 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
663 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
664 }
666 if (domain_runnable(id))
667 new_qos->domain_info[id].blocked_start_time = 0;
668 else
669 new_qos->domain_info[id].blocked_start_time = now;
670 }
673 // advance to next datapoint for all domains
674 void advance_next_datapoint(uint64_t now)
675 {
676 int new, old, didx;
678 old = new_qos->next_datapoint;
679 new = QOS_INCR(old);
680 new_qos->next_datapoint = new;
681 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
682 for (didx = 0; didx < NDOMAINS; didx++) {
683 new_qos->qdata[new].ns_gotten[didx] = 0;
684 new_qos->qdata[new].ns_allocated[didx] = 0;
685 new_qos->qdata[new].ns_waiting[didx] = 0;
686 new_qos->qdata[new].ns_blocked[didx] = 0;
687 new_qos->qdata[new].switchin_count[didx] = 0;
688 new_qos->qdata[new].io_count[didx] = 0;
689 }
690 new_qos->qdata[new].ns_passed = 0;
691 new_qos->qdata[new].lost_records = 0;
692 new_qos->qdata[new].flip_free_periods = 0;
694 new_qos->qdata[new].timestamp = now;
695 }
699 void qos_update_thread(int cpu, int domid, uint64_t now)
700 {
701 int n, id;
702 uint64_t last_update_time, start;
703 int64_t time_since_update, run_time = 0;
705 id = ID(domid);
707 n = new_qos->next_datapoint;
708 last_update_time = new_qos->domain_info[id].last_update_time;
710 time_since_update = now - last_update_time;
712 if (time_since_update < 0) {
713 // what happened here? either a timestamp wraparound, or more likely,
714 // a slight inconsistency among timestamps from various cpu's
715 if (-time_since_update < billion) {
716 // fairly small difference, let's just adjust 'now' to be a little
717 // beyond last_update_time
718 time_since_update = -time_since_update;
719 }
720 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
721 // difference is huge, must be a wraparound
722 // last_update time should be "near" ~0ULL,
723 // and now should be "near" 0
724 time_since_update = now + (~0ULL - last_update_time);
725 printf("time wraparound\n");
726 }
727 else {
728 // none of the above, may be an out of order record
729 // no good solution, just ignore and update again later
730 return;
731 }
732 }
734 new_qos->domain_info[id].last_update_time = now;
736 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
737 start = new_qos->domain_info[id].start_time;
738 if (start > now) { // wrapped around
739 run_time = now + (~0ULL - start);
740 printf("warning: start > now\n");
741 }
742 else
743 run_time = now - start;
744 // if (run_time < 0) // should not happen
745 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
746 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
747 new_qos->domain_info[id].start_time = now;
748 new_qos->domain_info[id].ns_since_boot += time_since_update;
749 #if 1
750 new_qos->qdata[n].ns_gotten[id] += run_time;
751 if (domid == 0 && cpu == 1)
752 printf("adding run time for dom0 on cpu1\r\n");
753 #endif
754 }
756 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
758 update_blocked_time(domid, now);
760 // how much time passed since this datapoint was updated?
761 if (now >= new_qos->qdata[n].timestamp) {
762 // all is right with the world, time is increasing
763 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
764 }
765 else {
766 // time wrapped around
767 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
768 // printf("why timewrap?\r\n");
769 }
770 new_qos->qdata[n].timestamp = now;
771 }
774 // called by dump routines to update all structures
775 void qos_update_all(uint64_t now, int cpu)
776 {
777 int i;
779 for (i=0; i<NDOMAINS; i++)
780 if (new_qos->domain_info[i].in_use)
781 qos_update_thread(cpu, i, now);
782 }
785 void qos_update_thread_stats(int cpu, int domid, uint64_t now)
786 {
787 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
788 qos_update_all(now, cpu);
789 advance_next_datapoint(now);
790 return;
791 }
792 qos_update_thread(cpu, domid, now);
793 }
796 void qos_init_domain(int cpu, int domid, uint64_t now)
797 {
798 int i, id;
800 id = ID(domid);
802 if (new_qos->domain_info[id].in_use)
803 return;
806 memset(&new_qos->domain_info[id], 0, sizeof(_domain_info));
807 new_qos->domain_info[id].last_update_time = now;
808 // runnable_start_time[id] = 0;
809 new_qos->domain_info[id].runnable_start_time = 0; // invalidate
810 new_qos->domain_info[id].in_use = 1;
811 new_qos->domain_info[id].blocked_start_time = 0;
812 new_qos->domain_info[id].id = id;
813 if (domid == IDLE_DOMAIN_ID)
814 sprintf(new_qos->domain_info[id].name, "Idle Task%d", cpu);
815 else
816 sprintf(new_qos->domain_info[id].name, "Domain#%d", domid);
818 for (i=0; i<NSAMPLES; i++) {
819 new_qos->qdata[i].ns_gotten[id] = 0;
820 new_qos->qdata[i].ns_allocated[id] = 0;
821 new_qos->qdata[i].ns_waiting[id] = 0;
822 new_qos->qdata[i].ns_blocked[id] = 0;
823 new_qos->qdata[i].switchin_count[id] = 0;
824 new_qos->qdata[i].io_count[id] = 0;
825 }
826 }
829 // called when a new thread gets the cpu
830 void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
831 {
832 int id = ID(domid);
834 new_qos->domain_info[id].runnable = 1;
835 update_blocked_time(domid, now);
836 new_qos->domain_info[id].blocked_start_time = 0; // invalidate
837 new_qos->domain_info[id].runnable_start_time = 0; // invalidate
838 //runnable_start_time[id] = 0;
840 new_qos->domain_info[id].start_time = now;
841 new_qos->qdata[new_qos->next_datapoint].switchin_count[id]++;
842 new_qos->qdata[new_qos->next_datapoint].ns_allocated[id] += ns_alloc;
843 new_qos->qdata[new_qos->next_datapoint].ns_waiting[id] += ns_waited;
844 qos_update_thread_stats(cpu, domid, now);
845 set_current(cpu, id);
847 // count up page flips for dom0 execution
848 if (id == 0)
849 dom0_flips = 0;
850 }
852 // called when the current thread is taken off the cpu
853 void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
854 {
855 int id = ID(domid);
856 int n;
858 if (!is_current(id, cpu)) {
859 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
860 }
862 if (gotten == 0) {
863 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
864 }
866 if (gotten < 100) {
867 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
868 }
871 n = new_qos->next_datapoint;
872 #if 0
873 new_qos->qdata[n].ns_gotten[id] += gotten;
874 if (gotten > new_qos->qdata[n].ns_passed)
875 printf("inconsistency #257, diff = %lld\n",
876 gotten - new_qos->qdata[n].ns_passed );
877 #endif
878 new_qos->domain_info[id].ns_oncpu_since_boot += gotten;
879 new_qos->domain_info[id].runnable_start_time = now;
880 // runnable_start_time[id] = now;
881 qos_update_thread_stats(cpu, id, now);
883 // process dom0 page flips
884 if (id == 0)
885 if (dom0_flips == 0)
886 new_qos->qdata[n].flip_free_periods++;
887 }
889 // called when domain is put to sleep, may also be called
890 // when thread is already asleep
891 void qos_state_sleeping(int cpu, int domid, uint64_t now)
892 {
893 int id = ID(domid);
895 if (!domain_runnable(id)) // double call?
896 return;
898 new_qos->domain_info[id].runnable = 0;
899 new_qos->domain_info[id].blocked_start_time = now;
900 new_qos->domain_info[id].runnable_start_time = 0; // invalidate
901 // runnable_start_time[id] = 0; // invalidate
902 qos_update_thread_stats(cpu, domid, now);
903 }
907 void qos_kill_thread(int domid)
908 {
909 new_qos->domain_info[ID(domid)].in_use = 0;
910 }
913 // called when thread becomes runnable, may also be called
914 // when thread is already runnable
915 void qos_state_runnable(int cpu, int domid, uint64_t now)
916 {
917 int id = ID(domid);
919 if (domain_runnable(id)) // double call?
920 return;
921 new_qos->domain_info[id].runnable = 1;
922 update_blocked_time(domid, now);
924 qos_update_thread_stats(cpu, domid, now);
926 new_qos->domain_info[id].blocked_start_time = 0; /* invalidate */
927 new_qos->domain_info[id].runnable_start_time = now;
928 // runnable_start_time[id] = now;
929 }
932 void qos_count_packets(domid_t domid, uint64_t now)
933 {
934 int i, id = ID(domid);
935 _new_qos_data *cpu_data;
937 for (i=0; i<NCPU; i++) {
938 cpu_data = cpu_qos_data[i];
939 if (cpu_data->domain_info[id].in_use) {
940 cpu_data->qdata[cpu_data->next_datapoint].io_count[id]++;
941 }
942 }
944 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
945 dom0_flips++;
946 }
949 int domain_ok(int cpu, int domid, uint64_t now)
950 {
951 if (domid == IDLE_DOMAIN_ID)
952 domid = NDOMAINS-1;
953 if (domid < 0 || domid >= NDOMAINS) {
954 printf("bad domain id: %d\n", domid);
955 return 0;
956 }
957 if (new_qos->domain_info[domid].in_use == 0)
958 qos_init_domain(cpu, domid, now);
959 return 1;
960 }
963 void process_record(int cpu, struct t_rec *r)
964 {
965 uint64_t now;
968 new_qos = cpu_qos_data[cpu];
970 rec_count++;
972 now = ((double)r->cycles) / (opts.cpu_freq / 1000.0);
974 log_event(r->event);
976 switch (r->event) {
978 case TRC_SCHED_SWITCH_INFPREV:
979 // domain data[0] just switched out and received data[1] ns of cpu time
980 if (domain_ok(cpu, r->data[0], now))
981 qos_switch_out(cpu, r->data[0], now, r->data[1]);
982 // printf("ns_gotten %ld\n", r->data[1]);
983 break;
985 case TRC_SCHED_SWITCH_INFNEXT:
986 // domain data[0] just switched in and
987 // waited data[1] ns, and was allocated data[2] ns of cpu time
988 if (domain_ok(cpu, r->data[0], now))
989 qos_switch_in(cpu, r->data[0], now, r->data[2], r->data[1]);
990 break;
992 case TRC_SCHED_DOM_ADD:
993 if (domain_ok(cpu, r->data[0], now))
994 qos_init_domain(cpu, r->data[0], now);
995 break;
997 case TRC_SCHED_DOM_REM:
998 if (domain_ok(cpu, r->data[0], now))
999 qos_kill_thread(r->data[0]);
1000 break;
1002 case TRC_SCHED_SLEEP:
1003 if (domain_ok(cpu, r->data[0], now))
1004 qos_state_sleeping(cpu, r->data[0], now);
1005 break;
1007 case TRC_SCHED_WAKE:
1008 if (domain_ok(cpu, r->data[0], now))
1009 qos_state_runnable(cpu, r->data[0], now);
1010 break;
1012 case TRC_SCHED_BLOCK:
1013 if (domain_ok(cpu, r->data[0], now))
1014 qos_state_sleeping(cpu, r->data[0], now);
1015 break;
1017 case TRC_MEM_PAGE_GRANT_TRANSFER:
1018 if (domain_ok(cpu, r->data[0], now))
1019 qos_count_packets(r->data[0], now);
1020 break;
1022 default:
1023 break;
1025 new_qos = NULL;