ia64/xen-unstable

view tools/xenmon/xenbaked.c @ 17838:e5c9c8e6e726

tools: replace sprintf with snprintf where applicable

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jun 12 15:41:15 2008 +0100 (2008-06-12)
parents e382c13fab2b
children 750eee596adf
line source
1 /******************************************************************************
2 * tools/xenbaked.c
3 *
4 * Tool for collecting raw trace buffer data from Xen and
5 * performing some accumulation operations and other processing
6 * on it.
7 *
8 * Copyright (C) 2004 by Intel Research Cambridge
9 * Copyright (C) 2005 by Hewlett Packard, Palo Alto and Fort Collins
10 * Copyright (C) 2006 by Hewlett Packard Fort Collins
11 *
12 * Authors: Diwaker Gupta, diwaker.gupta@hp.com
13 * Rob Gardner, rob.gardner@hp.com
14 * Lucy Cherkasova, lucy.cherkasova.hp.com
15 * Much code based on xentrace, authored by Mark Williamson,
16 * mark.a.williamson@intel.com
17 * Date: November, 2005
18 *
19 * This program is free software; you can redistribute it and/or modify
20 * it under the terms of the GNU General Public License as published by
21 * the Free Software Foundation; under version 2 of the License.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program; if not, write to the Free Software
30 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 */
33 #include <time.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <sys/mman.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <errno.h>
40 #include <signal.h>
41 #include <xenctrl.h>
42 #include <xen/xen.h>
43 #include <string.h>
44 #include <sys/select.h>
45 #include <getopt.h>
47 #define PERROR(_m, _a...) \
48 do { \
49 int __saved_errno = errno; \
50 fprintf(stderr, "ERROR: " _m " (%d = %s)\n" , ## _a , \
51 __saved_errno, strerror(__saved_errno)); \
52 errno = __saved_errno; \
53 } while (0)
55 typedef struct { int counter; } atomic_t;
56 #define _atomic_read(v) ((v).counter)
58 #include <xen/trace.h>
59 #include "xenbaked.h"
62 /***** Compile time configuration of defaults ********************************/
64 /* when we've got more records than this waiting, we log it to the output */
65 #define NEW_DATA_THRESH 1
67 /* sleep for this long (milliseconds) between checking the trace buffers */
68 #define POLL_SLEEP_MILLIS 100
70 /* Size of time period represented by each sample */
71 #define MS_PER_SAMPLE 100
73 /* CPU Frequency */
74 #define MHZ
75 #define CPU_FREQ 2660 MHZ
77 /***** The code **************************************************************/
79 typedef struct settings_st {
80 struct timespec poll_sleep;
81 unsigned long new_data_thresh;
82 unsigned long ms_per_sample;
83 double cpu_freq;
84 } settings_t;
86 settings_t opts;
88 int interrupted = 0; /* gets set if we get a SIGHUP */
89 int rec_count = 0;
90 int wakeups = 0;
91 time_t start_time;
92 int dom0_flips = 0;
94 _new_qos_data *new_qos;
95 _new_qos_data **cpu_qos_data;
97 int global_cpu;
98 uint64_t global_now;
100 // array of currently running domains, indexed by cpu
101 int *running = NULL;
103 // number of cpu's on this platform
104 int NCPU = 0;
107 void init_current(int ncpu)
108 {
109 running = calloc(ncpu, sizeof(int));
110 NCPU = ncpu;
111 printf("Initialized with %d %s\n", ncpu, (ncpu == 1) ? "cpu" : "cpu's");
112 }
114 int is_current(int domain, int cpu)
115 {
116 // int i;
118 // for (i=0; i<NCPU; i++)
119 if (running[cpu] == domain)
120 return 1;
121 return 0;
122 }
125 // return the domain that's currently running on the given cpu
126 int current(int cpu)
127 {
128 return running[cpu];
129 }
131 void set_current(int cpu, int domain)
132 {
133 running[cpu] = domain;
134 }
138 void close_handler(int signal)
139 {
140 interrupted = 1;
141 }
143 #if 0
144 void dump_record(int cpu, struct t_rec *x)
145 {
146 printf("record: cpu=%x, tsc=%lx, event=%x, d1=%lx\n",
147 cpu, x->cycles, x->event, x->data[0]);
148 }
149 #endif
151 /**
152 * millis_to_timespec - convert a time in milliseconds to a struct timespec
153 * @millis: time interval in milliseconds
154 */
155 struct timespec millis_to_timespec(unsigned long millis)
156 {
157 struct timespec spec;
159 spec.tv_sec = millis / 1000;
160 spec.tv_nsec = (millis % 1000) * 1000;
162 return spec;
163 }
166 typedef struct
167 {
168 int event_count;
169 int event_id;
170 char *text;
171 } stat_map_t;
173 stat_map_t stat_map[] = {
174 { 0, 0, "Other" },
175 { 0, TRC_SCHED_DOM_ADD, "Add Domain" },
176 { 0, TRC_SCHED_DOM_REM, "Remove Domain" },
177 { 0, TRC_SCHED_SLEEP, "Sleep" },
178 { 0, TRC_SCHED_WAKE, "Wake" },
179 { 0, TRC_SCHED_BLOCK, "Block" },
180 { 0, TRC_SCHED_SWITCH, "Switch" },
181 { 0, TRC_SCHED_S_TIMER_FN, "Timer Func"},
182 { 0, TRC_SCHED_SWITCH_INFPREV, "Switch Prev" },
183 { 0, TRC_SCHED_SWITCH_INFNEXT, "Switch Next" },
184 { 0, TRC_MEM_PAGE_GRANT_MAP, "Page Map" },
185 { 0, TRC_MEM_PAGE_GRANT_UNMAP, "Page Unmap" },
186 { 0, TRC_MEM_PAGE_GRANT_TRANSFER, "Page Transfer" },
187 { 0, 0, 0 }
188 };
191 void check_gotten_sum(void)
192 {
193 #if 0
194 uint64_t sum, ns;
195 extern uint64_t total_ns_gotten(uint64_t*);
196 double percent;
197 int i;
199 for (i=0; i<NCPU; i++) {
200 new_qos = cpu_qos_data[i];
201 ns = billion;
202 sum = total_ns_gotten(&ns);
204 printf("[cpu%d] ns_gotten over all domains = %lldns, over %lldns\n",
205 i, sum, ns);
206 percent = (double) sum;
207 percent = (100.0*percent) / (double)ns;
208 printf(" ==> ns_gotten = %7.3f%%\n", percent);
209 }
210 #endif
211 }
215 void dump_stats(void)
216 {
217 stat_map_t *smt = stat_map;
218 time_t end_time, run_time;
220 time(&end_time);
222 run_time = end_time - start_time;
224 printf("Event counts:\n");
225 while (smt->text != NULL) {
226 printf("%08d\t%s\n", smt->event_count, smt->text);
227 smt++;
228 }
230 printf("processed %d total records in %d seconds (%ld per second)\n",
231 rec_count, (int)run_time, (long)(rec_count/run_time));
233 printf("woke up %d times in %d seconds (%ld per second)\n", wakeups,
234 (int) run_time, (long)(wakeups/run_time));
236 check_gotten_sum();
237 }
239 void log_event(int event_id)
240 {
241 stat_map_t *smt = stat_map;
243 // printf("event_id = 0x%x\n", event_id);
245 while (smt->text != NULL) {
246 if (smt->event_id == event_id) {
247 smt->event_count++;
248 return;
249 }
250 smt++;
251 }
252 if (smt->text == NULL)
253 stat_map[0].event_count++; // other
254 }
256 int virq_port;
257 int xce_handle = -1;
259 /* Returns the event channel handle. */
260 /* Stolen from xenstore code */
261 int eventchn_init(void)
262 {
263 int rc;
265 // to revert to old way:
266 if (0)
267 return -1;
269 xce_handle = xc_evtchn_open();
271 if (xce_handle < 0)
272 perror("Failed to open evtchn device");
274 if ((rc = xc_evtchn_bind_virq(xce_handle, VIRQ_TBUF)) == -1)
275 perror("Failed to bind to domain exception virq port");
276 virq_port = rc;
278 return xce_handle;
279 }
281 void wait_for_event(void)
282 {
283 int ret;
284 fd_set inset;
285 evtchn_port_t port;
286 struct timeval tv;
287 int evtchn_fd;
289 if (xce_handle < 0) {
290 nanosleep(&opts.poll_sleep, NULL);
291 return;
292 }
294 evtchn_fd = xc_evtchn_fd(xce_handle);
296 FD_ZERO(&inset);
297 FD_SET(evtchn_fd, &inset);
298 tv.tv_sec = 1;
299 tv.tv_usec = 0;
300 // tv = millis_to_timespec(&opts.poll_sleep);
301 ret = select(evtchn_fd+1, &inset, NULL, NULL, &tv);
303 if ( (ret == 1) && FD_ISSET(evtchn_fd, &inset)) {
304 if ((port = xc_evtchn_pending(xce_handle)) == -1)
305 perror("Failed to read from event fd");
307 // if (port == virq_port)
308 // printf("got the event I was looking for\r\n");
310 if (xc_evtchn_unmask(xce_handle, port) == -1)
311 perror("Failed to write to event fd");
312 }
313 }
315 static void get_tbufs(unsigned long *mfn, unsigned long *size)
316 {
317 int xc_handle = xc_interface_open();
318 int ret;
320 if ( xc_handle < 0 )
321 {
322 exit(EXIT_FAILURE);
323 }
325 ret = xc_tbuf_enable(xc_handle, DEFAULT_TBUF_SIZE, mfn, size);
327 if ( ret != 0 )
328 {
329 perror("Couldn't enable trace buffers");
330 exit(1);
331 }
333 xc_interface_close(xc_handle);
334 }
336 void disable_tracing(void)
337 {
338 int xc_handle = xc_interface_open();
339 xc_tbuf_disable(xc_handle);
340 xc_interface_close(xc_handle);
341 }
343 /**
344 * map_tbufs - memory map Xen trace buffers into user space
345 * @tbufs_mfn: mfn of the trace buffers
346 * @num: number of trace buffers to map
347 * @size: size of each trace buffer
348 *
349 * Maps the Xen trace buffers them into process address space.
350 */
351 struct t_buf *map_tbufs(unsigned long tbufs_mfn, unsigned int num,
352 unsigned long size)
353 {
354 int xc_handle;
355 struct t_buf *tbufs_mapped;
357 xc_handle = xc_interface_open();
359 if ( xc_handle < 0 )
360 {
361 exit(EXIT_FAILURE);
362 }
364 tbufs_mapped = xc_map_foreign_range(xc_handle, DOMID_XEN,
365 size * num, PROT_READ | PROT_WRITE,
366 tbufs_mfn);
368 xc_interface_close(xc_handle);
370 if ( tbufs_mapped == 0 )
371 {
372 PERROR("Failed to mmap trace buffers");
373 exit(EXIT_FAILURE);
374 }
376 return tbufs_mapped;
377 }
379 /**
380 * init_bufs_ptrs - initialises an array of pointers to the trace buffers
381 * @bufs_mapped: the userspace address where the trace buffers are mapped
382 * @num: number of trace buffers
383 * @size: trace buffer size
384 *
385 * Initialises an array of pointers to individual trace buffers within the
386 * mapped region containing all trace buffers.
387 */
388 struct t_buf **init_bufs_ptrs(void *bufs_mapped, unsigned int num,
389 unsigned long size)
390 {
391 int i;
392 struct t_buf **user_ptrs;
394 user_ptrs = (struct t_buf **)calloc(num, sizeof(struct t_buf *));
395 if ( user_ptrs == NULL )
396 {
397 PERROR( "Failed to allocate memory for buffer pointers\n");
398 exit(EXIT_FAILURE);
399 }
401 /* initialise pointers to the trace buffers - given the size of a trace
402 * buffer and the value of bufs_maped, we can easily calculate these */
403 for ( i = 0; i<num; i++ )
404 user_ptrs[i] = (struct t_buf *)((unsigned long)bufs_mapped + size * i);
406 return user_ptrs;
407 }
410 /**
411 * init_rec_ptrs - initialises data area pointers to locations in user space
412 * @tbufs_mfn: base mfn of the trace buffer area
413 * @tbufs_mapped: user virtual address of base of trace buffer area
414 * @meta: array of user-space pointers to struct t_buf's of metadata
415 * @num: number of trace buffers
416 *
417 * Initialises data area pointers to the locations that data areas have been
418 * mapped in user space. Note that the trace buffer metadata contains machine
419 * pointers - the array returned allows more convenient access to them.
420 */
421 struct t_rec **init_rec_ptrs(struct t_buf **meta, unsigned int num)
422 {
423 int i;
424 struct t_rec **data;
426 data = calloc(num, sizeof(struct t_rec *));
427 if ( data == NULL )
428 {
429 PERROR("Failed to allocate memory for data pointers\n");
430 exit(EXIT_FAILURE);
431 }
433 for ( i = 0; i < num; i++ )
434 data[i] = (struct t_rec *)(meta[i] + 1);
436 return data;
437 }
441 /**
442 * get_num_cpus - get the number of logical CPUs
443 */
444 unsigned int get_num_cpus(void)
445 {
446 xc_physinfo_t physinfo = { 0 };
447 int xc_handle = xc_interface_open();
448 int ret;
450 ret = xc_physinfo(xc_handle, &physinfo);
452 if ( ret != 0 )
453 {
454 PERROR("Failure to get logical CPU count from Xen");
455 exit(EXIT_FAILURE);
456 }
458 xc_interface_close(xc_handle);
459 opts.cpu_freq = (double)physinfo.cpu_khz/1000.0;
461 return physinfo.nr_cpus;
462 }
465 /**
466 * monitor_tbufs - monitor the contents of tbufs
467 */
468 int monitor_tbufs(void)
469 {
470 int i;
471 extern int process_record(int, struct t_rec *);
472 extern void alloc_qos_data(int ncpu);
474 void *tbufs_mapped; /* pointer to where the tbufs are mapped */
475 struct t_buf **meta; /* pointers to the trace buffer metadata */
476 char **data; /* pointers to the trace buffer data areas
477 * where they are mapped into user space. */
478 unsigned long tbufs_mfn; /* mfn of the tbufs */
479 unsigned int num; /* number of trace buffers / logical CPUS */
480 unsigned long size; /* size of a single trace buffer */
482 unsigned long data_size, rec_size;
484 /* get number of logical CPUs (and therefore number of trace buffers) */
485 num = get_num_cpus();
487 init_current(num);
488 alloc_qos_data(num);
490 printf("CPU Frequency = %7.2f\n", opts.cpu_freq);
492 /* setup access to trace buffers */
493 get_tbufs(&tbufs_mfn, &size);
495 tbufs_mapped = map_tbufs(tbufs_mfn, num, size);
497 data_size = size - sizeof(struct t_buf);
499 /* build arrays of convenience ptrs */
500 meta = init_bufs_ptrs (tbufs_mapped, num, size);
501 data = (char **)init_rec_ptrs(meta, num);
503 if ( eventchn_init() < 0 )
504 fprintf(stderr, "Failed to initialize event channel; "
505 "Using POLL method\r\n");
507 /* now, scan buffers for events */
508 while ( !interrupted )
509 {
510 for ( i = 0; (i < num) && !interrupted; i++ )
511 {
512 unsigned long start_offset, end_offset, cons, prod;
514 cons = meta[i]->cons;
515 prod = meta[i]->prod;
516 xen_rmb(); /* read prod, then read item. */
518 if ( cons == prod )
519 continue;
521 start_offset = cons % data_size;
522 end_offset = prod % data_size;
524 if ( start_offset >= end_offset )
525 {
526 while ( start_offset != data_size )
527 {
528 rec_size = process_record(
529 i, (struct t_rec *)(data[i] + start_offset));
530 start_offset += rec_size;
531 }
532 start_offset = 0;
533 }
534 while ( start_offset != end_offset )
535 {
536 rec_size = process_record(
537 i, (struct t_rec *)(data[i] + start_offset));
538 start_offset += rec_size;
539 }
540 xen_mb(); /* read item, then update cons. */
541 meta[i]->cons = prod;
542 }
544 wait_for_event();
545 wakeups++;
546 }
548 /* cleanup */
549 free(meta);
550 free(data);
551 /* don't need to munmap - cleanup is automatic */
553 return 0;
554 }
557 /******************************************************************************
558 * Command line handling
559 *****************************************************************************/
561 const char *program_version = "xenbaked v1.4";
562 const char *program_bug_address = "<rob.gardner@hp.com>";
564 #define xstr(x) str(x)
565 #define str(x) #x
567 void usage(void)
568 {
569 #define USAGE_STR \
570 "Usage: xenbaked [OPTION...]\n" \
571 "Tool to capture and partially process Xen trace buffer data\n" \
572 "\n" \
573 " -m, --ms_per_sample=MS Specify the number of milliseconds per sample\n" \
574 " (default " xstr(MS_PER_SAMPLE) ").\n" \
575 " -s, --poll-sleep=p Set sleep time, p, in milliseconds between\n" \
576 " polling the trace buffer for new data\n" \
577 " (default " xstr(POLL_SLEEP_MILLIS) ").\n" \
578 " -t, --log-thresh=l Set number, l, of new records required to\n" \
579 " trigger a write to output (default " \
580 xstr(NEW_DATA_THRESH) ").\n" \
581 " -?, --help Show this message\n" \
582 " -V, --version Print program version\n" \
583 "\n" \
584 "This tool is used to capture trace buffer data from Xen. The data is\n" \
585 "saved in a shared memory structure to be further processed by xenmon.\n"
587 printf(USAGE_STR);
588 printf("\nReport bugs to %s\n", program_bug_address);
590 exit(EXIT_FAILURE);
591 }
593 /* convert the argument string pointed to by arg to a long int representation */
594 long argtol(const char *restrict arg, int base)
595 {
596 char *endp;
597 long val;
599 errno = 0;
600 val = strtol(arg, &endp, base);
602 if (errno != 0) {
603 fprintf(stderr, "Invalid option argument: %s\n", arg);
604 fprintf(stderr, "Error: %s\n\n", strerror(errno));
605 usage();
606 } else if (endp == arg || *endp != '\0') {
607 fprintf(stderr, "Invalid option argument: %s\n\n", arg);
608 usage();
609 }
611 return val;
612 }
614 /* parse command line arguments */
615 void parse_args(int argc, char **argv)
616 {
617 int option;
618 static struct option long_options[] = {
619 { "log-thresh", required_argument, 0, 't' },
620 { "poll-sleep", required_argument, 0, 's' },
621 { "ms_per_sample", required_argument, 0, 'm' },
622 { "help", no_argument, 0, '?' },
623 { "version", no_argument, 0, 'V' },
624 { 0, 0, 0, 0 }
625 };
627 while ( (option = getopt_long(argc, argv, "m:s:t:?V",
628 long_options, NULL)) != -1)
629 {
630 switch ( option )
631 {
632 case 't': /* set new records threshold for logging */
633 opts.new_data_thresh = argtol(optarg, 0);
634 break;
636 case 's': /* set sleep time (given in milliseconds) */
637 opts.poll_sleep = millis_to_timespec(argtol(optarg, 0));
638 break;
640 case 'm': /* set ms_per_sample */
641 opts.ms_per_sample = argtol(optarg, 0);
642 break;
644 case 'V': /* print program version */
645 printf("%s\n", program_version);
646 exit(EXIT_SUCCESS);
647 break;
649 default:
650 usage();
651 }
652 }
654 /* all arguments should have been processed */
655 if (optind != argc) {
656 usage();
657 }
658 }
660 #define SHARED_MEM_FILE "/var/run/xenq-shm"
661 void alloc_qos_data(int ncpu)
662 {
663 int i, n, pgsize, off=0;
664 char *dummy;
665 int qos_fd;
666 void advance_next_datapoint(uint64_t);
668 cpu_qos_data = (_new_qos_data **) calloc(ncpu, sizeof(_new_qos_data *));
671 qos_fd = open(SHARED_MEM_FILE, O_RDWR|O_CREAT|O_TRUNC, 0777);
672 if (qos_fd < 0) {
673 PERROR(SHARED_MEM_FILE);
674 exit(2);
675 }
676 pgsize = getpagesize();
677 dummy = malloc(pgsize);
679 for (n=0; n<ncpu; n++) {
681 for (i=0; i<sizeof(_new_qos_data); i=i+pgsize)
682 if ((write(qos_fd, dummy, pgsize)) != pgsize) {
683 PERROR(SHARED_MEM_FILE);
684 exit(2);
685 }
687 new_qos = (_new_qos_data *) mmap(0, sizeof(_new_qos_data), PROT_READ|PROT_WRITE,
688 MAP_SHARED, qos_fd, off);
689 off += i;
690 if (new_qos == NULL) {
691 PERROR("mmap");
692 exit(3);
693 }
694 // printf("new_qos = %p\n", new_qos);
695 memset(new_qos, 0, sizeof(_new_qos_data));
696 new_qos->next_datapoint = 0;
697 advance_next_datapoint(0);
698 new_qos->structlen = i;
699 new_qos->ncpu = ncpu;
700 // printf("structlen = 0x%x\n", i);
701 cpu_qos_data[n] = new_qos;
702 }
703 free(dummy);
704 new_qos = NULL;
705 }
708 int main(int argc, char **argv)
709 {
710 int ret;
711 struct sigaction act;
713 time(&start_time);
714 opts.poll_sleep = millis_to_timespec(POLL_SLEEP_MILLIS);
715 opts.new_data_thresh = NEW_DATA_THRESH;
716 opts.ms_per_sample = MS_PER_SAMPLE;
717 opts.cpu_freq = CPU_FREQ;
719 parse_args(argc, argv);
720 fprintf(stderr, "ms_per_sample = %ld\n", opts.ms_per_sample);
723 /* ensure that if we get a signal, we'll do cleanup, then exit */
724 act.sa_handler = close_handler;
725 act.sa_flags = 0;
726 sigemptyset(&act.sa_mask);
727 sigaction(SIGHUP, &act, NULL);
728 sigaction(SIGTERM, &act, NULL);
729 sigaction(SIGINT, &act, NULL);
731 ret = monitor_tbufs();
733 dump_stats();
734 msync(new_qos, sizeof(_new_qos_data), MS_SYNC);
735 disable_tracing();
737 return ret;
738 }
740 void qos_init_domain(int domid, int idx)
741 {
742 int i;
744 memset(&new_qos->domain_info[idx], 0, sizeof(_domain_info));
745 new_qos->domain_info[idx].last_update_time = global_now;
746 // runnable_start_time[idx] = 0;
747 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
748 new_qos->domain_info[idx].in_use = 1;
749 new_qos->domain_info[idx].blocked_start_time = 0;
750 new_qos->domain_info[idx].id = domid;
751 if (domid == IDLE_DOMAIN_ID)
752 snprintf(new_qos->domain_info[idx].name,
753 sizeof(new_qos->domain_info[idx].name),
754 "Idle Task%d", global_cpu);
755 else
756 snprintf(new_qos->domain_info[idx].name,
757 sizeof(new_qos->domain_info[idx].name),
758 "Domain#%d", domid);
760 for (i=0; i<NSAMPLES; i++) {
761 new_qos->qdata[i].ns_gotten[idx] = 0;
762 new_qos->qdata[i].ns_allocated[idx] = 0;
763 new_qos->qdata[i].ns_waiting[idx] = 0;
764 new_qos->qdata[i].ns_blocked[idx] = 0;
765 new_qos->qdata[i].switchin_count[idx] = 0;
766 new_qos->qdata[i].io_count[idx] = 0;
767 }
768 }
770 void global_init_domain(int domid, int idx)
771 {
772 int cpu;
773 _new_qos_data *saved_qos;
775 saved_qos = new_qos;
777 for (cpu=0; cpu<NCPU; cpu++) {
778 new_qos = cpu_qos_data[cpu];
779 qos_init_domain(domid, idx);
780 }
781 new_qos = saved_qos;
782 }
785 // give index of this domain in the qos data array
786 int indexof(int domid)
787 {
788 int idx;
789 xc_dominfo_t dominfo[NDOMAINS];
790 int xc_handle, ndomains;
791 extern void qos_kill_thread(int domid);
793 if (domid < 0) { // shouldn't happen
794 printf("bad domain id: %d\r\n", domid);
795 return 0;
796 }
798 for (idx=0; idx<NDOMAINS; idx++)
799 if ( (new_qos->domain_info[idx].id == domid) && new_qos->domain_info[idx].in_use)
800 return idx;
802 // not found, make a new entry
803 for (idx=0; idx<NDOMAINS; idx++)
804 if (new_qos->domain_info[idx].in_use == 0) {
805 global_init_domain(domid, idx);
806 return idx;
807 }
809 // call domaininfo hypercall to try and garbage collect unused entries
810 xc_handle = xc_interface_open();
811 ndomains = xc_domain_getinfo(xc_handle, 0, NDOMAINS, dominfo);
812 xc_interface_close(xc_handle);
814 // for each domain in our data, look for it in the system dominfo structure
815 // and purge the domain's data from our state if it does not exist in the
816 // dominfo structure
817 for (idx=0; idx<NDOMAINS; idx++) {
818 int domid = new_qos->domain_info[idx].id;
819 int jdx;
821 for (jdx=0; jdx<ndomains; jdx++) {
822 if (dominfo[jdx].domid == domid)
823 break;
824 }
825 if (jdx == ndomains) // we didn't find domid in the dominfo struct
826 if (domid != IDLE_DOMAIN_ID) // exception for idle domain, which is not
827 // contained in dominfo
828 qos_kill_thread(domid); // purge our stale data
829 }
831 // look again for a free slot
832 for (idx=0; idx<NDOMAINS; idx++)
833 if (new_qos->domain_info[idx].in_use == 0) {
834 global_init_domain(domid, idx);
835 return idx;
836 }
838 // still no space found, so bail
839 fprintf(stderr, "out of space in domain table, increase NDOMAINS\r\n");
840 exit(2);
841 }
843 int domain_runnable(int domid)
844 {
845 return new_qos->domain_info[indexof(domid)].runnable;
846 }
849 void update_blocked_time(int domid, uint64_t now)
850 {
851 uint64_t t_blocked;
852 int id = indexof(domid);
854 if (new_qos->domain_info[id].blocked_start_time != 0) {
855 if (now >= new_qos->domain_info[id].blocked_start_time)
856 t_blocked = now - new_qos->domain_info[id].blocked_start_time;
857 else
858 t_blocked = now + (~0ULL - new_qos->domain_info[id].blocked_start_time);
859 new_qos->qdata[new_qos->next_datapoint].ns_blocked[id] += t_blocked;
860 }
862 if (domain_runnable(domid))
863 new_qos->domain_info[id].blocked_start_time = 0;
864 else
865 new_qos->domain_info[id].blocked_start_time = now;
866 }
869 // advance to next datapoint for all domains
870 void advance_next_datapoint(uint64_t now)
871 {
872 int new, old, didx;
874 old = new_qos->next_datapoint;
875 new = QOS_INCR(old);
876 new_qos->next_datapoint = new;
877 // memset(&new_qos->qdata[new], 0, sizeof(uint64_t)*(2+5*NDOMAINS));
878 for (didx = 0; didx < NDOMAINS; didx++) {
879 new_qos->qdata[new].ns_gotten[didx] = 0;
880 new_qos->qdata[new].ns_allocated[didx] = 0;
881 new_qos->qdata[new].ns_waiting[didx] = 0;
882 new_qos->qdata[new].ns_blocked[didx] = 0;
883 new_qos->qdata[new].switchin_count[didx] = 0;
884 new_qos->qdata[new].io_count[didx] = 0;
885 }
886 new_qos->qdata[new].ns_passed = 0;
887 new_qos->qdata[new].lost_records = 0;
888 new_qos->qdata[new].flip_free_periods = 0;
890 new_qos->qdata[new].timestamp = now;
891 }
895 void qos_update_thread(int cpu, int domid, uint64_t now)
896 {
897 int n, id;
898 uint64_t last_update_time, start;
899 int64_t time_since_update, run_time = 0;
901 id = indexof(domid);
903 n = new_qos->next_datapoint;
904 last_update_time = new_qos->domain_info[id].last_update_time;
906 time_since_update = now - last_update_time;
908 if (time_since_update < 0) {
909 // what happened here? either a timestamp wraparound, or more likely,
910 // a slight inconsistency among timestamps from various cpu's
911 if (-time_since_update < billion) {
912 // fairly small difference, let's just adjust 'now' to be a little
913 // beyond last_update_time
914 time_since_update = -time_since_update;
915 }
916 else if ( ((~0ULL - last_update_time) < billion) && (now < billion) ) {
917 // difference is huge, must be a wraparound
918 // last_update time should be "near" ~0ULL,
919 // and now should be "near" 0
920 time_since_update = now + (~0ULL - last_update_time);
921 printf("time wraparound\n");
922 }
923 else {
924 // none of the above, may be an out of order record
925 // no good solution, just ignore and update again later
926 return;
927 }
928 }
930 new_qos->domain_info[id].last_update_time = now;
932 if (new_qos->domain_info[id].runnable_at_last_update && is_current(domid, cpu)) {
933 start = new_qos->domain_info[id].start_time;
934 if (start > now) { // wrapped around
935 run_time = now + (~0ULL - start);
936 // this could happen if there is nothing going on within a cpu;
937 // in this case the idle domain would run forever
938 // printf("warning: start > now\n");
939 }
940 else
941 run_time = now - start;
942 // if (run_time < 0) // should not happen
943 // printf("warning: run_time < 0; start = %lld now= %lld\n", start, now);
944 new_qos->domain_info[id].ns_oncpu_since_boot += run_time;
945 new_qos->domain_info[id].start_time = now;
946 new_qos->domain_info[id].ns_since_boot += time_since_update;
948 new_qos->qdata[n].ns_gotten[id] += run_time;
949 // if (domid == 0 && cpu == 1)
950 // printf("adding run time for dom0 on cpu1\r\n");
952 }
954 new_qos->domain_info[id].runnable_at_last_update = domain_runnable(domid);
956 update_blocked_time(domid, now);
958 // how much time passed since this datapoint was updated?
959 if (now >= new_qos->qdata[n].timestamp) {
960 // all is right with the world, time is increasing
961 new_qos->qdata[n].ns_passed += (now - new_qos->qdata[n].timestamp);
962 }
963 else {
964 // time wrapped around
965 //new_qos->qdata[n].ns_passed += (now + (~0LL - new_qos->qdata[n].timestamp));
966 // printf("why timewrap?\r\n");
967 }
968 new_qos->qdata[n].timestamp = now;
969 }
972 // called by dump routines to update all structures
973 void qos_update_all(uint64_t now, int cpu)
974 {
975 int i;
977 for (i=0; i<NDOMAINS; i++)
978 if (new_qos->domain_info[i].in_use)
979 qos_update_thread(cpu, new_qos->domain_info[i].id, now);
980 }
983 void qos_update_thread_stats(int cpu, int domid, uint64_t now)
984 {
985 if (new_qos->qdata[new_qos->next_datapoint].ns_passed > (million*opts.ms_per_sample)) {
986 qos_update_all(now, cpu);
987 advance_next_datapoint(now);
988 return;
989 }
990 qos_update_thread(cpu, domid, now);
991 }
995 // called when a new thread gets the cpu
996 void qos_switch_in(int cpu, int domid, uint64_t now, unsigned long ns_alloc, unsigned long ns_waited)
997 {
998 int idx = indexof(domid);
1000 new_qos->domain_info[idx].runnable = 1;
1001 update_blocked_time(domid, now);
1002 new_qos->domain_info[idx].blocked_start_time = 0; // invalidate
1003 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1004 //runnable_start_time[idx] = 0;
1006 new_qos->domain_info[idx].start_time = now;
1007 new_qos->qdata[new_qos->next_datapoint].switchin_count[idx]++;
1008 new_qos->qdata[new_qos->next_datapoint].ns_allocated[idx] += ns_alloc;
1009 new_qos->qdata[new_qos->next_datapoint].ns_waiting[idx] += ns_waited;
1010 qos_update_thread_stats(cpu, domid, now);
1011 set_current(cpu, domid);
1013 // count up page flips for dom0 execution
1014 if (domid == 0)
1015 dom0_flips = 0;
1018 // called when the current thread is taken off the cpu
1019 void qos_switch_out(int cpu, int domid, uint64_t now, unsigned long gotten)
1021 int idx = indexof(domid);
1022 int n;
1024 if (!is_current(domid, cpu)) {
1025 // printf("switching out domain %d but it is not current. gotten=%ld\r\n", id, gotten);
1028 if (gotten == 0) {
1029 printf("gotten==0 in qos_switchout(domid=%d)\n", domid);
1032 if (gotten < 100) {
1033 printf("gotten<100ns in qos_switchout(domid=%d)\n", domid);
1037 n = new_qos->next_datapoint;
1038 #if 0
1039 new_qos->qdata[n].ns_gotten[idx] += gotten;
1040 if (gotten > new_qos->qdata[n].ns_passed)
1041 printf("inconsistency #257, diff = %lld\n",
1042 gotten - new_qos->qdata[n].ns_passed );
1043 #endif
1044 new_qos->domain_info[idx].ns_oncpu_since_boot += gotten;
1045 new_qos->domain_info[idx].runnable_start_time = now;
1046 // runnable_start_time[id] = now;
1047 qos_update_thread_stats(cpu, domid, now);
1049 // process dom0 page flips
1050 if (domid == 0)
1051 if (dom0_flips == 0)
1052 new_qos->qdata[n].flip_free_periods++;
1055 // called when domain is put to sleep, may also be called
1056 // when thread is already asleep
1057 void qos_state_sleeping(int cpu, int domid, uint64_t now)
1059 int idx;
1061 if (!domain_runnable(domid)) // double call?
1062 return;
1064 idx = indexof(domid);
1065 new_qos->domain_info[idx].runnable = 0;
1066 new_qos->domain_info[idx].blocked_start_time = now;
1067 new_qos->domain_info[idx].runnable_start_time = 0; // invalidate
1068 // runnable_start_time[idx] = 0; // invalidate
1069 qos_update_thread_stats(cpu, domid, now);
1074 // domain died, presume it's dead on all cpu's, not just mostly dead
1075 void qos_kill_thread(int domid)
1077 int cpu;
1079 for (cpu=0; cpu<NCPU; cpu++) {
1080 cpu_qos_data[cpu]->domain_info[indexof(domid)].in_use = 0;
1086 // called when thread becomes runnable, may also be called
1087 // when thread is already runnable
1088 void qos_state_runnable(int cpu, int domid, uint64_t now)
1090 int idx;
1093 qos_update_thread_stats(cpu, domid, now);
1095 if (domain_runnable(domid)) // double call?
1096 return;
1098 idx = indexof(domid);
1099 new_qos->domain_info[idx].runnable = 1;
1100 update_blocked_time(domid, now);
1102 new_qos->domain_info[idx].blocked_start_time = 0; /* invalidate */
1103 new_qos->domain_info[idx].runnable_start_time = now;
1104 // runnable_start_time[id] = now;
1108 void qos_count_packets(domid_t domid, uint64_t now)
1110 int i, idx = indexof(domid);
1111 _new_qos_data *cpu_data;
1113 for (i=0; i<NCPU; i++) {
1114 cpu_data = cpu_qos_data[i];
1115 if (cpu_data->domain_info[idx].in_use) {
1116 cpu_data->qdata[cpu_data->next_datapoint].io_count[idx]++;
1120 new_qos->qdata[new_qos->next_datapoint].io_count[0]++;
1121 dom0_flips++;
1125 int process_record(int cpu, struct t_rec *r)
1127 uint64_t now = 0;
1128 uint32_t *extra_u32 = r->u.nocycles.extra_u32;
1130 new_qos = cpu_qos_data[cpu];
1132 rec_count++;
1134 if ( r->cycles_included )
1136 now = ((uint64_t)r->u.cycles.cycles_hi << 32) | r->u.cycles.cycles_lo;
1137 now = ((double)now) / (opts.cpu_freq / 1000.0);
1138 extra_u32 = r->u.cycles.extra_u32;
1141 global_now = now;
1142 global_cpu = cpu;
1144 log_event(r->event);
1146 switch (r->event) {
1148 case TRC_SCHED_SWITCH_INFPREV:
1149 // domain data[0] just switched out and received data[1] ns of cpu time
1150 qos_switch_out(cpu, extra_u32[0], now, extra_u32[1]);
1151 // printf("ns_gotten %ld\n", extra_u32[1]);
1152 break;
1154 case TRC_SCHED_SWITCH_INFNEXT:
1155 // domain data[0] just switched in and
1156 // waited data[1] ns, and was allocated data[2] ns of cpu time
1157 qos_switch_in(cpu, extra_u32[0], now, extra_u32[2], extra_u32[1]);
1158 break;
1160 case TRC_SCHED_DOM_ADD:
1161 (void) indexof(extra_u32[0]);
1162 break;
1164 case TRC_SCHED_DOM_REM:
1165 qos_kill_thread(extra_u32[0]);
1166 break;
1168 case TRC_SCHED_SLEEP:
1169 qos_state_sleeping(cpu, extra_u32[0], now);
1170 break;
1172 case TRC_SCHED_WAKE:
1173 qos_state_runnable(cpu, extra_u32[0], now);
1174 break;
1176 case TRC_SCHED_BLOCK:
1177 qos_state_sleeping(cpu, extra_u32[0], now);
1178 break;
1180 case TRC_MEM_PAGE_GRANT_TRANSFER:
1181 qos_count_packets(extra_u32[0], now);
1182 break;
1184 default:
1185 break;
1188 new_qos = NULL;
1190 return 4 + (r->cycles_included ? 8 : 0) + (r->extra_u32 * 4);