--- /dev/null
+#define _XOPEN_SOURCE 600
+#include <stdio.h>
+#include <stdlib.h>
+#include <argp.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "trace.h"
+#include "analyze.h"
+#include <errno.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+
+typedef unsigned long long tsc_t;
+
+#define DEFAULT_CPU_HZ 2400000000LL
+#define ADDR_SPACE_BITS 48
+#define DEFAULT_SAMPLE_SIZE 10240
+#define DEFAULT_INTERVAL_LENGTH 1000
+
+/* -- Global variables -- */
+struct {
+ int fd;
+ struct symbol_struct * symbols;
+ char * symbol_file;
+ char * trace_file;
+ int output_defined;
+ loff_t file_size;
+ struct {
+ loff_t update_offset;
+ int pipe[2];
+ FILE* out;
+ int pid;
+ } progress;
+} G = {
+ .fd=-1,
+ .symbols = NULL,
+ .symbol_file = NULL,
+ .trace_file = NULL,
+ .output_defined = 0,
+ .file_size = 0,
+ .progress = { .update_offset = 0 },
+};
+
+struct {
+ unsigned
+ scatterplot_interrupt_eip:1,
+ scatterplot_cpi:1,
+ scatterplot_unpin_promote:1,
+ scatterplot_cr3_switch:1,
+ scatterplot_wake_to_halt:1,
+ scatterplot_io:1,
+ histogram_interrupt_eip:1,
+ interval_mode:1,
+ dump_cooked:1,
+ dump_all:1,
+ dump_raw_process:1,
+ dump_raw_reads:1,
+ dump_no_processing:1,
+ dump_ipi_latency:1,
+ dump_trace_volume_on_lost_record:1,
+ with_cr3_enumeration:1,
+ with_pio_enumeration:1,
+ show_default_domain_summary:1,
+ progress:1,
+ svm_mode:1,
+ summary:1,
+ summary_info;
+ long long cpu_hz;
+ int scatterplot_interrupt_vector;
+ int scatterplot_io_port;
+ int histogram_interrupt_vector;
+ unsigned long long histogram_interrupt_increment;
+ int default_guest_paging_levels;
+ int sample_size;
+ struct {
+ tsc_t cycles;
+ /* Used if interval is specified in seconds to delay calculating
+ * time_interval until all arguments have been processed (specifically,
+ * cpu_hz). */
+ unsigned msec;
+ enum {
+ INTERVAL_CR3_SCHEDULE_TIME,
+ INTERVAL_CR3_SCHEDULE_ORDERED,
+ INTERVAL_CR3_SHORT_SUMMARY,
+ INTERVAL_DOMAIN_SHORT_SUMMARY,
+ INTERVAL_DOMAIN_GUEST_INTERRUPT,
+ INTERVAL_DOMAIN_GRANT_MAPS
+ } output;
+ /* Options for specific interval output types */
+ union {
+ struct {
+ unsigned long long *values;
+ int count;
+ } cr3;
+ struct {
+ int did;
+ } domain;
+ };
+ } interval;
+} opt = {
+ .scatterplot_interrupt_eip=0,
+ .scatterplot_cpi=0,
+ .scatterplot_unpin_promote=0,
+ .scatterplot_cr3_switch=0,
+ .scatterplot_wake_to_halt=0,
+ .histogram_interrupt_eip=0,
+ .dump_cooked = 0,
+ .dump_all = 0,
+ .dump_raw_process = 0,
+ .dump_raw_reads = 0,
+ .dump_no_processing = 0,
+ .dump_ipi_latency = 0,
+ .dump_trace_volume_on_lost_record = 0,
+ .with_cr3_enumeration = 0,
+ .with_pio_enumeration = 1,
+ .show_default_domain_summary = 0,
+ .progress = 0,
+ .svm_mode = 0,
+ .summary = 0,
+ .cpu_hz = DEFAULT_CPU_HZ,
+ .default_guest_paging_levels = 2,
+ .sample_size = DEFAULT_SAMPLE_SIZE,
+ .interval = { .msec = DEFAULT_INTERVAL_LENGTH },
+};
+
+/* -- on-disk trace buffer definitions -- */
+struct trace_record {
+ unsigned event:28,
+ extra_words:3,
+ cycle_flag:1;
+ union {
+ struct {
+ uint32_t tsc_lo, tsc_hi;
+ uint32_t data[7];
+ } tsc;
+ struct {
+ uint32_t data[7];
+ } notsc;
+ } u;
+};
+
+FILE *warn = NULL;
+
+/* -- General info about a current record -- */
+struct time_struct {
+ unsigned long long time;
+ unsigned long s, ns;
+};
+
+#define DUMP_HEADER_MAX 256
+
+struct record_info {
+ int cpu;
+ tsc_t tsc;
+ union {
+ unsigned event;
+ struct {
+ unsigned minor:12,
+ sub:4,
+ main:12,
+ unused:4;
+ } evt;
+ };
+ int extra_words;
+ int size;
+ uint32_t *d;
+ char dump_header[DUMP_HEADER_MAX];
+ struct time_struct t;
+ struct trace_record rec;
+};
+
+/* -- Summary data -- */
+struct cycle_framework {
+ tsc_t first_tsc, last_tsc, total_cycles;
+};
+
+struct event_cycle_summary {
+ int count, cycles_count, interval_count;
+ long long cycles, interval_cycles;
+ long long *cycles_sample;
+};
+
+struct cycle_summary {
+ int count, interval_count;
+ unsigned long long cycles, interval_cycles;
+ long long *sample;
+};
+
+struct weighted_cpi_summary {
+ int count, interval_count;
+ unsigned long long instructions, interval_instructions;
+ unsigned long long cycles, interval_cycles;
+ float *cpi;
+ unsigned long long *cpi_weight;
+};
+
+/* -- Symbol list information -- */
+#define SYMBOL_ENTRIES_PER_STRUCT 1023
+#define SYMBOL_NAME_SIZE 124
+struct symbol_struct {
+ int count;
+ struct {
+ unsigned long long addr;
+ char name[SYMBOL_NAME_SIZE];
+ } symbols[SYMBOL_ENTRIES_PER_STRUCT];
+ struct symbol_struct *next;
+};
+
+
+void parse_symbol_file(char *fn) {
+ unsigned long long last_addr = 0;
+ FILE * symbol_file;
+ struct symbol_struct ** p=&G.symbols;
+
+ if((symbol_file=fopen(fn, "rb"))==NULL) {
+ fprintf(stderr, "Could not open symbol file %s\n", fn);
+ perror("open");
+ exit(1);
+ }
+ while(!feof(symbol_file)) {
+ /* Allocate a new struct if we need it */
+ if(!*p) {
+ *p = malloc(sizeof(**p));
+ if(!*p) {
+ fprintf(stderr, "Malloc failed!\n");
+ exit(1);
+ }
+ (*p)->count=0;
+ (*p)->next=NULL;
+ }
+
+ /* FIXME -- use SYMBOL_NAME_SIZE */
+ fscanf(symbol_file, "%llx %128s",
+ &(*p)->symbols[(*p)->count].addr,
+ (*p)->symbols[(*p)->count].name);
+
+ if( ((*p)->symbols[(*p)->count].addr > 0)
+ && ((*p)->symbols[(*p)->count].addr < last_addr) ) {
+ fprintf(stderr, "Symbol file not properly ordered: %llx %s < %llx!\n",
+ (*p)->symbols[(*p)->count].addr,
+ (*p)->symbols[(*p)->count].name,
+ last_addr);
+ exit(1);
+ } else
+ last_addr = (*p)->symbols[(*p)->count].addr;
+
+
+
+ (*p)->count++;
+
+ /* If this struct is full, point to the next. It will be allocated
+ if needed. */
+ if((*p)->count == SYMBOL_ENTRIES_PER_STRUCT) {
+ p=&((*p)->next);
+ }
+ }
+}
+
+/* WARNING not thread safe */
+char * find_symbol(unsigned long long addr) {
+ struct symbol_struct * p=G.symbols;
+ int i;
+ char * lastname="ZERO";
+ unsigned long long offset=addr;
+ static char name[128];
+
+ if(!p) {
+ name[0]=0;
+ return name;
+ }
+
+ while(1) {
+ if(!p)
+ goto finish;
+ for(i=0; i<p->count; i++) {
+ if(p->symbols[i].addr > addr)
+ goto finish;
+ else {
+ lastname=p->symbols[i].name;
+ offset=addr - p->symbols[i].addr;
+ }
+ }
+ p=p->next;
+ }
+ finish:
+ snprintf(name, 128, "(%s +%llx)",
+ lastname, offset);
+ return name;
+}
+
+/* -- Eip list data -- */
+enum {
+ EIP_LIST_TYPE_NONE=0,
+ EIP_LIST_TYPE_MAX
+};
+
+struct eip_list_struct {
+ struct eip_list_struct *next;
+ unsigned long long eip;
+ struct event_cycle_summary summary;
+ int type;
+ void * extra;
+};
+
+struct {
+ void (*update)(struct eip_list_struct *, void *);
+ void (*new)(struct eip_list_struct *, void *);
+ void (*dump)(struct eip_list_struct *);
+} eip_list_type[EIP_LIST_TYPE_MAX] = {
+ [EIP_LIST_TYPE_NONE] = {
+ .update=NULL,
+ .new=NULL,
+ .dump=NULL },
+};
+
+
+/* --- HVM class of events --- */
+
+/*
+ * -- Algorithms --
+ *
+ * Interrupt Wake-to-halt detection
+ *
+ * Purpose: To correlate device interrupts to vcpu runtime.
+ *
+ * Diagram:
+ * ...
+ * blocked -> runnable <- set to waking
+ * ...
+ * runnable -> running
+ * inj_virq A <- Note "waking" interrupt
+ * vmenter <- Start tsc of "wake-to-halt" interval.
+ Turn off 'waking'.
+ * ...
+ * inj_virq B <- Note alternate interrupt
+ * vmenter <- Start tsc of "interrupt-to-halt" interval
+ * ...
+ * vmexit <- End tsc of "x-to-halt" interval
+ * running -> blocked <- Process
+ *
+ * The "waking" interrupts we want to sub-classify into
+ * "wake-only" (when interrupt was the only interrupt from wake to halt) and
+ * "wake-all" (whether this was the only interrupt or not).
+ */
+
+/* VMX data */
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+
+#define EXIT_REASON_PENDING_INTERRUPT 7
+
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MACHINE_CHECK 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+
+#define HVM_VMX_EXIT_REASON_MAX (EXIT_REASON_TPR_BELOW_THRESHOLD+1)
+
+char * hvm_vmx_exit_reason_name[HVM_VMX_EXIT_REASON_MAX] = {
+ [0] = "NONE",
+ [EXIT_REASON_EXCEPTION_NMI]="EXCEPTION_NMI",
+ [EXIT_REASON_EXTERNAL_INTERRUPT]="EXTERNAL_INTERRUPT",
+ [EXIT_REASON_PENDING_INTERRUPT]="PENDING_INTERRUPT",
+ [EXIT_REASON_TASK_SWITCH]="TASK_SWITCH",
+ [EXIT_REASON_CPUID]="CPUID",
+ [EXIT_REASON_HLT]="HLT",
+ [EXIT_REASON_INVLPG]="INVLPG",
+ [EXIT_REASON_RDPMC]="RDPMC",
+ [EXIT_REASON_RDTSC]="RDTSC",
+ [EXIT_REASON_VMCALL]="VMCALL",
+ [EXIT_REASON_VMCLEAR]="VMCLEAR",
+ [EXIT_REASON_VMLAUNCH]="VMLAUNCH",
+ [EXIT_REASON_VMPTRLD]="VMPTRLD",
+ [EXIT_REASON_VMPTRST]="VMPTRST",
+ [EXIT_REASON_VMREAD]="VMREAD",
+ [EXIT_REASON_VMRESUME]="VMRESUME",
+ [EXIT_REASON_VMWRITE]="VMWRITE",
+ [EXIT_REASON_VMOFF]="VMOFF",
+ [EXIT_REASON_VMON]="VMON",
+ [EXIT_REASON_CR_ACCESS]="CR_ACCESS",
+ [EXIT_REASON_DR_ACCESS]="DR_ACCESS",
+ [EXIT_REASON_IO_INSTRUCTION]="IO_INSTRUCTION",
+ [EXIT_REASON_MSR_READ]="MSR_READ",
+ [EXIT_REASON_MSR_WRITE]="MSR_WRITE",
+ [EXIT_REASON_MWAIT_INSTRUCTION]="MWAIT_INSTRUCTION",
+ [EXIT_REASON_MONITOR_INSTRUCTION]="MONITOR_INSTRUCTION",
+ [EXIT_REASON_PAUSE_INSTRUCTION]="PAUSE_INSTRUCTION",
+ [EXIT_REASON_MACHINE_CHECK]="MACHINE_CHECK",
+ [EXIT_REASON_TPR_BELOW_THRESHOLD]="TPR_BELOW_THRESHOLD",
+};
+
+/* SVM data */
+enum VMEXIT_EXITCODE
+{
+ /* control register read exitcodes */
+ VMEXIT_CR0_READ = 0,
+ VMEXIT_CR1_READ = 1,
+ VMEXIT_CR2_READ = 2,
+ VMEXIT_CR3_READ = 3,
+ VMEXIT_CR4_READ = 4,
+ VMEXIT_CR5_READ = 5,
+ VMEXIT_CR6_READ = 6,
+ VMEXIT_CR7_READ = 7,
+ VMEXIT_CR8_READ = 8,
+ VMEXIT_CR9_READ = 9,
+ VMEXIT_CR10_READ = 10,
+ VMEXIT_CR11_READ = 11,
+ VMEXIT_CR12_READ = 12,
+ VMEXIT_CR13_READ = 13,
+ VMEXIT_CR14_READ = 14,
+ VMEXIT_CR15_READ = 15,
+
+ /* control register write exitcodes */
+ VMEXIT_CR0_WRITE = 16,
+ VMEXIT_CR1_WRITE = 17,
+ VMEXIT_CR2_WRITE = 18,
+ VMEXIT_CR3_WRITE = 19,
+ VMEXIT_CR4_WRITE = 20,
+ VMEXIT_CR5_WRITE = 21,
+ VMEXIT_CR6_WRITE = 22,
+ VMEXIT_CR7_WRITE = 23,
+ VMEXIT_CR8_WRITE = 24,
+ VMEXIT_CR9_WRITE = 25,
+ VMEXIT_CR10_WRITE = 26,
+ VMEXIT_CR11_WRITE = 27,
+ VMEXIT_CR12_WRITE = 28,
+ VMEXIT_CR13_WRITE = 29,
+ VMEXIT_CR14_WRITE = 30,
+ VMEXIT_CR15_WRITE = 31,
+
+ /* debug register read exitcodes */
+ VMEXIT_DR0_READ = 32,
+ VMEXIT_DR1_READ = 33,
+ VMEXIT_DR2_READ = 34,
+ VMEXIT_DR3_READ = 35,
+ VMEXIT_DR4_READ = 36,
+ VMEXIT_DR5_READ = 37,
+ VMEXIT_DR6_READ = 38,
+ VMEXIT_DR7_READ = 39,
+ VMEXIT_DR8_READ = 40,
+ VMEXIT_DR9_READ = 41,
+ VMEXIT_DR10_READ = 42,
+ VMEXIT_DR11_READ = 43,
+ VMEXIT_DR12_READ = 44,
+ VMEXIT_DR13_READ = 45,
+ VMEXIT_DR14_READ = 46,
+ VMEXIT_DR15_READ = 47,
+
+ /* debug register write exitcodes */
+ VMEXIT_DR0_WRITE = 48,
+ VMEXIT_DR1_WRITE = 49,
+ VMEXIT_DR2_WRITE = 50,
+ VMEXIT_DR3_WRITE = 51,
+ VMEXIT_DR4_WRITE = 52,
+ VMEXIT_DR5_WRITE = 53,
+ VMEXIT_DR6_WRITE = 54,
+ VMEXIT_DR7_WRITE = 55,
+ VMEXIT_DR8_WRITE = 56,
+ VMEXIT_DR9_WRITE = 57,
+ VMEXIT_DR10_WRITE = 58,
+ VMEXIT_DR11_WRITE = 59,
+ VMEXIT_DR12_WRITE = 60,
+ VMEXIT_DR13_WRITE = 61,
+ VMEXIT_DR14_WRITE = 62,
+ VMEXIT_DR15_WRITE = 63,
+
+ /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */
+ VMEXIT_EXCEPTION_DE = 64, /* divide-by-zero-error */
+ VMEXIT_EXCEPTION_DB = 65, /* debug */
+ VMEXIT_EXCEPTION_NMI = 66, /* non-maskable-interrupt */
+ VMEXIT_EXCEPTION_BP = 67, /* breakpoint */
+ VMEXIT_EXCEPTION_OF = 68, /* overflow */
+ VMEXIT_EXCEPTION_BR = 69, /* bound-range */
+ VMEXIT_EXCEPTION_UD = 70, /* invalid-opcode*/
+ VMEXIT_EXCEPTION_NM = 71, /* device-not-available */
+ VMEXIT_EXCEPTION_DF = 72, /* double-fault */
+ VMEXIT_EXCEPTION_09 = 73, /* unsupported (reserved) */
+ VMEXIT_EXCEPTION_TS = 74, /* invalid-tss */
+ VMEXIT_EXCEPTION_NP = 75, /* segment-not-present */
+ VMEXIT_EXCEPTION_SS = 76, /* stack */
+ VMEXIT_EXCEPTION_GP = 77, /* general-protection */
+ VMEXIT_EXCEPTION_PF = 78, /* page-fault */
+ VMEXIT_EXCEPTION_15 = 79, /* reserved */
+ VMEXIT_EXCEPTION_MF = 80, /* x87 floating-point exception-pending */
+ VMEXIT_EXCEPTION_AC = 81, /* alignment-check */
+ VMEXIT_EXCEPTION_MC = 82, /* machine-check */
+ VMEXIT_EXCEPTION_XF = 83, /* simd floating-point */
+
+ /* exceptions 20-31 (exitcodes 84-95) are reserved */
+
+ /* ...and the rest of the #VMEXITs */
+ VMEXIT_INTR = 96,
+ VMEXIT_NMI = 97,
+ VMEXIT_SMI = 98,
+ VMEXIT_INIT = 99,
+ VMEXIT_VINTR = 100,
+ VMEXIT_CR0_SEL_WRITE = 101,
+ VMEXIT_IDTR_READ = 102,
+ VMEXIT_GDTR_READ = 103,
+ VMEXIT_LDTR_READ = 104,
+ VMEXIT_TR_READ = 105,
+ VMEXIT_IDTR_WRITE = 106,
+ VMEXIT_GDTR_WRITE = 107,
+ VMEXIT_LDTR_WRITE = 108,
+ VMEXIT_TR_WRITE = 109,
+ VMEXIT_RDTSC = 110,
+ VMEXIT_RDPMC = 111,
+ VMEXIT_PUSHF = 112,
+ VMEXIT_POPF = 113,
+ VMEXIT_CPUID = 114,
+ VMEXIT_RSM = 115,
+ VMEXIT_IRET = 116,
+ VMEXIT_SWINT = 117,
+ VMEXIT_INVD = 118,
+ VMEXIT_PAUSE = 119,
+ VMEXIT_HLT = 120,
+ VMEXIT_INVLPG = 121,
+ VMEXIT_INVLPGA = 122,
+ VMEXIT_IOIO = 123,
+ VMEXIT_MSR = 124,
+ VMEXIT_TASK_SWITCH = 125,
+ VMEXIT_FERR_FREEZE = 126,
+ VMEXIT_SHUTDOWN = 127,
+ VMEXIT_VMRUN = 128,
+ VMEXIT_VMMCALL = 129,
+ VMEXIT_VMLOAD = 130,
+ VMEXIT_VMSAVE = 131,
+ VMEXIT_STGI = 132,
+ VMEXIT_CLGI = 133,
+ VMEXIT_SKINIT = 134,
+ VMEXIT_RDTSCP = 135,
+ VMEXIT_ICEBP = 136,
+ VMEXIT_WBINVD = 137,
+ VMEXIT_MONITOR = 138,
+ VMEXIT_MWAIT = 139,
+ VMEXIT_MWAIT_CONDITIONAL= 140,
+ VMEXIT_NPF = 1024, /* nested paging fault */
+ VMEXIT_INVALID = -1
+};
+
+#define HVM_SVM_EXIT_REASON_MAX 1025
+char * hvm_svm_exit_reason_name[HVM_SVM_EXIT_REASON_MAX] = {
+ /* 0-15 */
+ "VMEXIT_CR0_READ",
+ "VMEXIT_CR1_READ",
+ "VMEXIT_CR2_READ",
+ "VMEXIT_CR3_READ",
+ "VMEXIT_CR4_READ",
+ "VMEXIT_CR5_READ",
+ "VMEXIT_CR6_READ",
+ "VMEXIT_CR7_READ",
+ "VMEXIT_CR8_READ",
+ "VMEXIT_CR9_READ",
+ "VMEXIT_CR10_READ",
+ "VMEXIT_CR11_READ",
+ "VMEXIT_CR12_READ",
+ "VMEXIT_CR13_READ",
+ "VMEXIT_CR14_READ",
+ "VMEXIT_CR15_READ",
+ /* 16-31 */
+ "VMEXIT_CR0_WRITE",
+ "VMEXIT_CR1_WRITE",
+ "VMEXIT_CR2_WRITE",
+ "VMEXIT_CR3_WRITE",
+ "VMEXIT_CR4_WRITE",
+ "VMEXIT_CR5_WRITE",
+ "VMEXIT_CR6_WRITE",
+ "VMEXIT_CR7_WRITE",
+ "VMEXIT_CR8_WRITE",
+ "VMEXIT_CR9_WRITE",
+ "VMEXIT_CR10_WRITE",
+ "VMEXIT_CR11_WRITE",
+ "VMEXIT_CR12_WRITE",
+ "VMEXIT_CR13_WRITE",
+ "VMEXIT_CR14_WRITE",
+ "VMEXIT_CR15_WRITE",
+ /* 32-47 */
+ "VMEXIT_DR0_READ",
+ "VMEXIT_DR1_READ",
+ "VMEXIT_DR2_READ",
+ "VMEXIT_DR3_READ",
+ "VMEXIT_DR4_READ",
+ "VMEXIT_DR5_READ",
+ "VMEXIT_DR6_READ",
+ "VMEXIT_DR7_READ",
+ "VMEXIT_DR8_READ",
+ "VMEXIT_DR9_READ",
+ "VMEXIT_DR10_READ",
+ "VMEXIT_DR11_READ",
+ "VMEXIT_DR12_READ",
+ "VMEXIT_DR13_READ",
+ "VMEXIT_DR14_READ",
+ "VMEXIT_DR15_READ",
+ /* 48-63 */
+ "VMEXIT_DR0_WRITE",
+ "VMEXIT_DR1_WRITE",
+ "VMEXIT_DR2_WRITE",
+ "VMEXIT_DR3_WRITE",
+ "VMEXIT_DR4_WRITE",
+ "VMEXIT_DR5_WRITE",
+ "VMEXIT_DR6_WRITE",
+ "VMEXIT_DR7_WRITE",
+ "VMEXIT_DR8_WRITE",
+ "VMEXIT_DR9_WRITE",
+ "VMEXIT_DR10_WRITE",
+ "VMEXIT_DR11_WRITE",
+ "VMEXIT_DR12_WRITE",
+ "VMEXIT_DR13_WRITE",
+ "VMEXIT_DR14_WRITE",
+ "VMEXIT_DR15_WRITE",
+ /* 64-83 */
+ "VMEXIT_EXCEPTION_DE",
+ "VMEXIT_EXCEPTION_DB",
+ "VMEXIT_EXCEPTION_NMI",
+ "VMEXIT_EXCEPTION_BP",
+ "VMEXIT_EXCEPTION_OF",
+ "VMEXIT_EXCEPTION_BR",
+ "VMEXIT_EXCEPTION_UD",
+ "VMEXIT_EXCEPTION_NM",
+ "VMEXIT_EXCEPTION_DF",
+ "VMEXIT_EXCEPTION_09",
+ "VMEXIT_EXCEPTION_TS",
+ "VMEXIT_EXCEPTION_NP",
+ "VMEXIT_EXCEPTION_SS",
+ "VMEXIT_EXCEPTION_GP",
+ "VMEXIT_EXCEPTION_PF",
+ "VMEXIT_EXCEPTION_15",
+ "VMEXIT_EXCEPTION_MF",
+ "VMEXIT_EXCEPTION_AC",
+ "VMEXIT_EXCEPTION_MC",
+ "VMEXIT_EXCEPTION_XF",
+ /* 84-95 */
+ "VMEXIT_EXCEPTION_20",
+ "VMEXIT_EXCEPTION_21",
+ "VMEXIT_EXCEPTION_22",
+ "VMEXIT_EXCEPTION_23",
+ "VMEXIT_EXCEPTION_24",
+ "VMEXIT_EXCEPTION_25",
+ "VMEXIT_EXCEPTION_26",
+ "VMEXIT_EXCEPTION_27",
+ "VMEXIT_EXCEPTION_28",
+ "VMEXIT_EXCEPTION_29",
+ "VMEXIT_EXCEPTION_30",
+ "VMEXIT_EXCEPTION_31",
+ /* 96-99 */
+ "VMEXIT_INTR",
+ "VMEXIT_NMI",
+ "VMEXIT_SMI",
+ "VMEXIT_INIT",
+ /* 100-109 */
+ "VMEXIT_VINTR",
+ "VMEXIT_CR0_SEL_WRITE",
+ "VMEXIT_IDTR_READ",
+ "VMEXIT_GDTR_READ",
+ "VMEXIT_LDTR_READ",
+ "VMEXIT_TR_READ",
+ "VMEXIT_IDTR_WRITE",
+ "VMEXIT_GDTR_WRITE",
+ "VMEXIT_LDTR_WRITE",
+ "VMEXIT_TR_WRITE",
+ /* 110-119 */
+ "VMEXIT_RDTSC",
+ "VMEXIT_RDPMC",
+ "VMEXIT_PUSHF",
+ "VMEXIT_POPF",
+ "VMEXIT_CPUID",
+ "VMEXIT_RSM",
+ "VMEXIT_IRET",
+ "VMEXIT_SWINT",
+ "VMEXIT_INVD",
+ "VMEXIT_PAUSE",
+ /* 120-129 */
+ "VMEXIT_HLT",
+ "VMEXIT_INVLPG",
+ "VMEXIT_INVLPGA",
+ "VMEXIT_IOIO",
+ "VMEXIT_MSR",
+ "VMEXIT_TASK_SWITCH",
+ "VMEXIT_FERR_FREEZE",
+ "VMEXIT_SHUTDOWN",
+ "VMEXIT_VMRUN",
+ "VMEXIT_VMMCALL",
+ /* 130-139 */
+ "VMEXIT_VMLOAD",
+ "VMEXIT_VMSAVE",
+ "VMEXIT_STGI",
+ "VMEXIT_CLGI",
+ "VMEXIT_SKINIT",
+ "VMEXIT_RDTSCP",
+ "VMEXIT_ICEBP",
+ "VMEXIT_WBINVD",
+ "VMEXIT_MONITOR",
+ "VMEXIT_MWAIT",
+ /* 140 */
+ "VMEXIT_MWAIT_CONDITIONAL",
+ [VMEXIT_NPF] = "VMEXIT_NPF", /* nested paging fault */
+};
+
+
+#if ( HVM_VMX_EXIT_REASON_MAX > HVM_SVM_EXIT_REASON_MAX )
+# define HVM_EXIT_REASON_MAX HVM_VMX_EXIT_REASON_MAX
+# error - Strange!
+#else
+# define HVM_EXIT_REASON_MAX HVM_SVM_EXIT_REASON_MAX
+#endif
+
+/* General hvm information */
+#define SPURIOUS_APIC_VECTOR 0xff
+#define ERROR_APIC_VECTOR 0xfe
+#define INVALIDATE_TLB_VECTOR 0xfd
+#define EVENT_CHECK_VECTOR 0xfc
+#define CALL_FUNCTION_VECTOR 0xfb
+#define THERMAL_APIC_VECTOR 0xfa
+#define LOCAL_TIMER_VECTOR 0xf9
+
+#define EXTERNAL_INTERRUPT_MAX 256
+
+/* Stringify numbers */
+char * hvm_extint_vector_name[EXTERNAL_INTERRUPT_MAX] = {
+ [SPURIOUS_APIC_VECTOR] = "SPURIOS_APIC",
+ [ERROR_APIC_VECTOR] = "ERROR_APIC",
+ [INVALIDATE_TLB_VECTOR]= "INVALIDATE_TLB",
+ [EVENT_CHECK_VECTOR]= "EVENT_CHECK",
+ [CALL_FUNCTION_VECTOR]= "CALL_FUNCTION",
+ [THERMAL_APIC_VECTOR]= "THERMAL_APIC",
+ [LOCAL_TIMER_VECTOR] = "LOCAL_TIMER",
+};
+
+#define HVM_TRAP_MAX 20
+
+char * hvm_trap_name[HVM_TRAP_MAX] = {
+ [0] = "Divide",
+ [1] = "RESERVED",
+ [2] = "NMI",
+ [3] = "Breakpoint",
+ [4] = "Overflow",
+ [5] = "BOUND",
+ [6] = "Invalid Op",
+ [7] = "Coprocessor not present",
+ [8] = "Double Fault",
+ [9] = "Coprocessor segment overrun",
+ [10] = "TSS",
+ [11] = "Segment not present",
+ [12] = "Stack-segment fault",
+ [13] = "GP",
+ [14] = "Page fault",
+ [15] = "RESERVED",
+ [16] = "FPU",
+ [17] = "Alignment check",
+ [18] = "Machine check",
+ [19] = "SIMD",
+};
+
+
+enum {
+ HVM_EVENT_HANDLER_PF_XEN = 1,
+ HVM_EVENT_HANDLER_PF_INJECT,
+ HVM_EVENT_HANDLER_INJ_EXC,
+ HVM_EVENT_HANDLER_INJ_VIRQ,
+ HVM_EVENT_HANDLER_REINJ_VIRQ,
+ HVM_EVENT_HANDLER_IO_READ,
+ HVM_EVENT_HANDLER_IO_WRITE,
+ HVM_EVENT_HANDLER_CR_READ, /* 8 */
+ HVM_EVENT_HANDLER_CR_WRITE,
+ HVM_EVENT_HANDLER_DR_READ,
+ HVM_EVENT_HANDLER_DR_WRITE,
+ HVM_EVENT_HANDLER_MSR_READ,
+ HVM_EVENT_HANDLER_MSR_WRITE,
+ HVM_EVENT_HANDLER_CPUID,
+ HVM_EVENT_HANDLER_INTR,
+ HVM_EVENT_HANDLER_NMI, /* 16 */
+ HVM_EVENT_HANDLER_SMI,
+ HVM_EVENT_HANDLER_VMCALL,
+ HVM_EVENT_HANDLER_HLT,
+ HVM_EVENT_HANDLER_INVLPG,
+ HVM_EVENT_HANDLER_MCE,
+ HVM_EVENT_HANDLER_IO_ASSIST,
+ HVM_EVENT_HANDLER_MMIO_ASSIST,
+ HVM_EVENT_HANDLER_CLTS,
+ HVM_EVENT_HANDLER_LMSW,
+ HVM_EVENT_HANDLER_MAX
+};
+char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
+ NULL,
+ "pf_xen",
+ "pf_inject",
+ "inj_exc",
+ "inj_virq",
+ "reinj_virq",
+ "io_read",
+ "io_write",
+ "cr_read", /* 8 */
+ "cr_write",
+ "dr_read",
+ "dr_write",
+ "msr_read",
+ "msr_write",
+ "cpuid",
+ "intr",
+ "nmi", /* 16 */
+ "smi",
+ "vmcall",
+ "hlt",
+ "invlpg",
+ "mce",
+ "io_assist",
+ "mmio_assist",
+ "clts",
+ "lmsw",
+};
+
+enum {
+ HVM_VOL_VMENTRY,
+ HVM_VOL_VMEXIT,
+ HVM_VOL_HANDLER,
+ HVM_VOL_MAX
+};
+
+enum {
+ GUEST_INTERRUPT_CASE_NONE,
+ /* This interrupt woke, no other interrupts until halt */
+ GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE,
+ /* This interrupt woke, maybe another interrupt before halt */
+ GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY,
+ /* Time from interrupt (running) to halt */
+ GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT,
+ GUEST_INTERRUPT_CASE_MAX,
+};
+
+char *guest_interrupt_case_name[] = {
+ [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE]="wake to halt alone",
+ /* This interrupt woke, maybe another interrupt before halt */
+ [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY] ="wake to halt any ",
+ /* Time from interrupt (running) to halt */
+ [GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT] ="intr to halt ",
+};
+
+char *hvm_vol_name[HVM_VOL_MAX] = {
+ [HVM_VOL_VMENTRY]="vmentry",
+ [HVM_VOL_VMEXIT] ="vmexit",
+ [HVM_VOL_HANDLER]="handler",
+};
+
+enum {
+ HYPERCALL_set_trap_table = 0,
+ HYPERCALL_mmu_update,
+ HYPERCALL_set_gdt,
+ HYPERCALL_stack_switch,
+ HYPERCALL_set_callbacks,
+ HYPERCALL_fpu_taskswitch,
+ HYPERCALL_sched_op_compat,
+ HYPERCALL_platform_op,
+ HYPERCALL_set_debugreg,
+ HYPERCALL_get_debugreg,
+ HYPERCALL_update_descriptor,
+ HYPERCALL_memory_op=12,
+ HYPERCALL_multicall,
+ HYPERCALL_update_va_mapping,
+ HYPERCALL_set_timer_op,
+ HYPERCALL_event_channel_op_compat,
+ HYPERCALL_xen_version,
+ HYPERCALL_console_io,
+ HYPERCALL_physdev_op_compat,
+ HYPERCALL_grant_table_op,
+ HYPERCALL_vm_assist,
+ HYPERCALL_update_va_mapping_otherdomain,
+ HYPERCALL_iret,
+ HYPERCALL_vcpu_op,
+ HYPERCALL_set_segment_base,
+ HYPERCALL_mmuext_op,
+ HYPERCALL_acm_op,
+ HYPERCALL_nmi_op,
+ HYPERCALL_sched_op,
+ HYPERCALL_callback_op,
+ HYPERCALL_xenoprof_op,
+ HYPERCALL_event_channel_op,
+ HYPERCALL_physdev_op,
+ HYPERCALL_hvm_op,
+ HYPERCALL_sysctl,
+ HYPERCALL_domctl,
+ HYPERCALL_kexec_op,
+ HYPERCALL_MAX
+};
+
+char *hypercall_name[HYPERCALL_MAX] = {
+ [HYPERCALL_set_trap_table]="set_trap_table",
+ [HYPERCALL_mmu_update]="mmu_update",
+ [HYPERCALL_set_gdt]="set_gdt",
+ [HYPERCALL_stack_switch]="stack_switch",
+ [HYPERCALL_set_callbacks]="set_callbacks",
+ [HYPERCALL_fpu_taskswitch]="fpu_taskswitch",
+ [HYPERCALL_sched_op_compat]="sched_op(compat)",
+ [HYPERCALL_platform_op]="platform_op",
+ [HYPERCALL_set_debugreg]="set_debugreg",
+ [HYPERCALL_get_debugreg]="get_debugreg",
+ [HYPERCALL_update_descriptor]="update_descriptor",
+ [HYPERCALL_memory_op]="memory_op",
+ [HYPERCALL_multicall]="multicall",
+ [HYPERCALL_update_va_mapping]="update_va_mapping",
+ [HYPERCALL_set_timer_op]="set_timer_op",
+ [HYPERCALL_event_channel_op_compat]="evtchn_op(compat)",
+ [HYPERCALL_xen_version]="xen_version",
+ [HYPERCALL_console_io]="console_io",
+ [HYPERCALL_physdev_op_compat]="physdev_op(compat)",
+ [HYPERCALL_grant_table_op]="grant_table_op",
+ [HYPERCALL_vm_assist]="vm_assist",
+ [HYPERCALL_update_va_mapping_otherdomain]="update_va_mapping_otherdomain",
+ [HYPERCALL_iret]="iret",
+ [HYPERCALL_vcpu_op]="vcpu_op",
+ [HYPERCALL_set_segment_base]="set_segment_base",
+ [HYPERCALL_mmuext_op]="mmuext_op",
+ [HYPERCALL_acm_op]="acm_op",
+ [HYPERCALL_nmi_op]="nmi_op",
+ [HYPERCALL_sched_op]="sched_op",
+ [HYPERCALL_callback_op]="callback_op",
+ [HYPERCALL_xenoprof_op]="xenoprof_op",
+ [HYPERCALL_event_channel_op]="evtchn_op",
+ [HYPERCALL_physdev_op]="physdev_op",
+ [HYPERCALL_hvm_op]="hvm_op",
+ [HYPERCALL_sysctl]="sysctl",
+ [HYPERCALL_domctl]="domctl",
+ [HYPERCALL_kexec_op]="kexec_op"
+};
+
+enum {
+ PF_XEN_EMUL_LVL_0,
+ PF_XEN_EMUL_LVL_1,
+ PF_XEN_EMUL_LVL_2,
+ PF_XEN_EMUL_LVL_3,
+ PF_XEN_EMUL_LVL_4,
+ PF_XEN_EMUL_EARLY_UNSHADOW,
+ PF_XEN_EMUL_SET_CHANGED,
+ PF_XEN_EMUL_SET_UNCHANGED,
+ PF_XEN_EMUL_SET_FLUSH,
+ PF_XEN_EMUL_SET_ERROR,
+ PF_XEN_EMUL_PROMOTE,
+ PF_XEN_EMUL_DEMOTE,
+ PF_XEN_EMUL_PREALLOC_UNPIN,
+ PF_XEN_EMUL_PREALLOC_UNHOOK,
+ PF_XEN_EMUL_MAX,
+};
+
+char * pf_xen_emul_name[PF_XEN_EMUL_MAX] = {
+ [PF_XEN_EMUL_LVL_0]="non-linmap",
+ [PF_XEN_EMUL_LVL_1]="linmap l1",
+ [PF_XEN_EMUL_LVL_2]="linmap l2",
+ [PF_XEN_EMUL_LVL_3]="linmap l3",
+ [PF_XEN_EMUL_LVL_4]="linmap l4",
+ [PF_XEN_EMUL_EARLY_UNSHADOW]="early unshadow",
+ [PF_XEN_EMUL_SET_UNCHANGED]="set unchanged",
+ [PF_XEN_EMUL_SET_CHANGED]="set changed",
+ [PF_XEN_EMUL_SET_FLUSH]="set changed",
+ [PF_XEN_EMUL_SET_ERROR]="set changed",
+ [PF_XEN_EMUL_PROMOTE]="promote",
+ [PF_XEN_EMUL_DEMOTE]="demote",
+ [PF_XEN_EMUL_PREALLOC_UNPIN]="unpin",
+ [PF_XEN_EMUL_PREALLOC_UNHOOK]="unhook",
+};
+
+/* Rio only */
+enum {
+ PF_XEN_NON_EMUL_VA_USER,
+ PF_XEN_NON_EMUL_VA_KERNEL,
+ PF_XEN_NON_EMUL_EIP_USER,
+ PF_XEN_NON_EMUL_EIP_KERNEL,
+ PF_XEN_NON_EMUL_MAX,
+};
+
+char * pf_xen_non_emul_name[PF_XEN_NON_EMUL_MAX] = {
+ [PF_XEN_NON_EMUL_VA_USER]="va user",
+ [PF_XEN_NON_EMUL_VA_KERNEL]="va kernel",
+ [PF_XEN_NON_EMUL_EIP_USER]="eip user",
+ [PF_XEN_NON_EMUL_EIP_KERNEL]="eip kernel",
+};
+
+enum {
+ PF_XEN_FIXUP_PREALLOC_UNPIN,
+ PF_XEN_FIXUP_PREALLOC_UNHOOK,
+ PF_XEN_FIXUP_UNSYNC,
+ PF_XEN_FIXUP_OOS_ADD,
+ PF_XEN_FIXUP_OOS_EVICT,
+ PF_XEN_FIXUP_PROMOTE,
+ PF_XEN_FIXUP_UPDATE_ONLY,
+ PF_XEN_FIXUP_WRMAP,
+ PF_XEN_FIXUP_BRUTE_FORCE,
+ PF_XEN_FIXUP_MAX,
+};
+
+char * pf_xen_fixup_name[PF_XEN_FIXUP_MAX] = {
+ [PF_XEN_FIXUP_PREALLOC_UNPIN] = "unpin",
+ [PF_XEN_FIXUP_PREALLOC_UNHOOK] = "unhook",
+ [PF_XEN_FIXUP_UNSYNC] = "unsync",
+ [PF_XEN_FIXUP_OOS_ADD] = "oos-add",
+ [PF_XEN_FIXUP_OOS_EVICT] = "oos-evict",
+ [PF_XEN_FIXUP_PROMOTE] = "promote",
+ [PF_XEN_FIXUP_UPDATE_ONLY] = "update",
+ [PF_XEN_FIXUP_WRMAP] = "wrmap",
+ [PF_XEN_FIXUP_BRUTE_FORCE] = "wrmap-bf",
+};
+
+enum {
+ PF_XEN_NOT_SHADOW = 1,
+ PF_XEN_FAST_PROPAGATE,
+ PF_XEN_FAST_MMIO,
+ PF_XEN_FALSE_FAST_PATH,
+ PF_XEN_MMIO,
+ PF_XEN_FIXUP,
+ PF_XEN_DOMF_DYING,
+ PF_XEN_EMULATE,
+ PF_XEN_EMULATE_UNSHADOW_USER,
+ PF_XEN_EMULATE_UNSHADOW_EVTINJ,
+ PF_XEN_EMULATE_UNSHADOW_UNHANDLED,
+ PF_XEN_EMULATE_UNSHADOW_HEURISTIC,
+ PF_XEN_LAST_FAULT=PF_XEN_EMULATE_UNSHADOW_HEURISTIC,
+ PF_XEN_NON_EMULATE,
+ PF_XEN_NO_HANDLER,
+ PF_XEN_MAX,
+};
+
+#define SHADOW_PREALLOC_UNPIN 13
+#define SHADOW_RESYNC_FULL 24
+#define SHADOW_RESYNC_ONLY 25
+
+char * pf_xen_name[PF_XEN_MAX] = {
+ [PF_XEN_NOT_SHADOW]="propagate",
+ [PF_XEN_FAST_PROPAGATE]="fast propagate",
+ [PF_XEN_FAST_MMIO]="fast mmio",
+ [PF_XEN_FALSE_FAST_PATH]="false fast path",
+ [PF_XEN_MMIO]="mmio",
+ [PF_XEN_FIXUP]="fixup",
+ [PF_XEN_DOMF_DYING]="dom dying",
+ [PF_XEN_EMULATE]="emulate",
+ [PF_XEN_EMULATE_UNSHADOW_USER]="unshadow:user-mode",
+ [PF_XEN_EMULATE_UNSHADOW_EVTINJ]="unshadow:evt inj",
+ [PF_XEN_EMULATE_UNSHADOW_UNHANDLED]="unshadow:unhandled instr",
+ [PF_XEN_NON_EMULATE]="fixup|mmio",
+ [PF_XEN_NO_HANDLER]="(no handler)",
+};
+
+#define CORR_VA_INVALID (0ULL-1)
+
+struct pf_xen_extra {
+ unsigned long long va;
+ union {
+ unsigned flags;
+ struct {
+ unsigned flag_set_ad:1,
+ flag_set_a:1,
+ flag_shadow_l1_get_ref:1,
+ flag_shadow_l1_put_ref:1,
+ flag_l2_propagate:1,
+ flag_set_changed:1,
+ flag_set_flush:1,
+ flag_set_error:1,
+ flag_demote:1,
+ flag_promote:1,
+ flag_wrmap:1,
+ flag_wrmap_guess_found:1,
+ flag_wrmap_brute_force:1,
+ flag_early_unshadow:1,
+ flag_emulation_2nd_pt_written:1,
+ flag_emulation_last_failed:1,
+ flag_emulate_full_pt:1,
+ flag_prealloc_unhook:1,
+ flag_unsync:1,
+ flag_oos_fixup_add:1,
+ flag_oos_fixup_evict:1;
+ };
+ }; /* Miami + ; fixup & emulate */
+ unsigned long error_code; /* Rio only */
+
+ /* Calculated */
+ int pf_case; /* Rio */
+
+ /* MMIO only */
+ unsigned long long gpa;
+ unsigned long data;
+
+ /* Emulate only */
+ unsigned long long gl1e; /* Miami + */
+ unsigned long long wval; /* Miami */
+ unsigned long long corresponding_va;
+ unsigned int pt_index[5], pt_is_lo;
+ int pt_level;
+
+ /* Other */
+ unsigned long long gfn;
+
+ /* Flags */
+ unsigned corr_valid:1,
+ corr_is_kernel:1,
+ va_is_kernel:1,
+ mmio_data_valid:1,
+ mmio_is_write:1;
+};
+
+struct pcpu_info;
+
+#define GUEST_INTERRUPT_MAX 350
+#define CR_MAX 9
+#define RESYNCS_MAX 17
+#define PF_XEN_FIXUP_UNSYNC_RESYNC_MAX 2
+
+struct hvm_data {
+ /* Summary information */
+ int init;
+ int vmexit_valid;
+ int summary_info;
+ struct vcpu_data *v; /* up-pointer */
+
+ /* SVM / VMX compatibility. FIXME - should be global */
+ char ** exit_reason_name;
+ int exit_reason_max;
+ void (* exit_reason_summary_handler[HVM_EXIT_REASON_MAX])(struct hvm_data *);
+
+ /* Information about particular exit reasons */
+ struct {
+ struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX];
+ int extint[EXTERNAL_INTERRUPT_MAX+1];
+ int *extint_histogram;
+ struct event_cycle_summary trap[HVM_TRAP_MAX];
+ struct event_cycle_summary pf_xen[PF_XEN_MAX];
+ struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX];
+ struct event_cycle_summary pf_xen_emul_early_unshadow[5];
+ struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX];
+ struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX];
+ struct event_cycle_summary pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1];
+ struct event_cycle_summary cr_write[CR_MAX];
+ struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1];
+ struct hvm_gi_struct {
+ int count;
+ struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX];
+ /* OK, not summary info, but still... */
+ int is_wake;
+ tsc_t start_tsc;
+ } guest_interrupt[GUEST_INTERRUPT_MAX + 1];
+ /* IPI Latency */
+ struct event_cycle_summary ipi_latency;
+ int ipi_count[256];
+ struct io_address *mmio, *pio;
+ } summary;
+
+ /* In-flight accumulation information */
+ union {
+ struct {
+ unsigned port:16,size:16;
+ unsigned long val;
+ } io;
+ struct pf_xen_extra pf_xen;
+ struct {
+ unsigned cr;
+ unsigned long long val;
+ int repromote;
+ } cr_write;
+ struct {
+ unsigned addr;
+ unsigned long long val;
+ } msr;
+ struct {
+ unsigned long event;
+ uint32_t d[4];
+ } generic;
+ } inflight;
+ int resyncs;
+ void (*post_process)(struct hvm_data *);
+ tsc_t exit_tsc, arc_cycles, entry_tsc;
+ unsigned long long rip;
+ unsigned exit_reason, event_handler;
+ char dump_header[256];
+ int short_summary_done:1, prealloc_unpin:1;
+
+ /* Immediate processing */
+ void *d;
+
+ /* Wake-to-halt detection. See comment above. */
+ struct {
+ unsigned waking:1;
+ /* Wake vector: keep track of time from vmentry until:
+ next halt, or next interrupt */
+ int vector, interrupts, interrupts_wanting_tsc;
+ } w2h;
+};
+
+enum {
+ HVM_SHORT_SUMMARY_EMULATE,
+ HVM_SHORT_SUMMARY_UNSYNC,
+ HVM_SHORT_SUMMARY_FIXUP,
+ HVM_SHORT_SUMMARY_MMIO,
+ HVM_SHORT_SUMMARY_PROPAGATE,
+ HVM_SHORT_SUMMARY_CR3,
+ HVM_SHORT_SUMMARY_VMCALL,
+ HVM_SHORT_SUMMARY_INTERRUPT,
+ HVM_SHORT_SUMMARY_HLT,
+ HVM_SHORT_SUMMARY_OTHER,
+ HVM_SHORT_SUMMARY_MAX,
+};
+
+char *hvm_short_summary_name[HVM_SHORT_SUMMARY_MAX] = {
+ [HVM_SHORT_SUMMARY_EMULATE] ="emulate",
+ [HVM_SHORT_SUMMARY_UNSYNC] ="unsync",
+ [HVM_SHORT_SUMMARY_FIXUP] ="fixup",
+ [HVM_SHORT_SUMMARY_MMIO] ="mmio",
+ [HVM_SHORT_SUMMARY_PROPAGATE]="propagate",
+ [HVM_SHORT_SUMMARY_CR3] ="cr3",
+ [HVM_SHORT_SUMMARY_VMCALL] ="vmcall",
+ [HVM_SHORT_SUMMARY_INTERRUPT]="intr",
+ [HVM_SHORT_SUMMARY_HLT] ="hlt",
+ [HVM_SHORT_SUMMARY_OTHER] ="other",
+};
+
+struct hvm_short_summary_struct {
+ struct cycle_summary s[HVM_SHORT_SUMMARY_MAX];
+};
+
+void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) {
+ int i;
+
+ if(h->init)
+ return;
+
+ h->v = v;
+
+ h->init = 1;
+
+ if(opt.svm_mode) {
+ h->exit_reason_max = HVM_SVM_EXIT_REASON_MAX;
+ h->exit_reason_name = hvm_svm_exit_reason_name;
+ } else {
+ h->exit_reason_max = HVM_VMX_EXIT_REASON_MAX;
+ h->exit_reason_name = hvm_vmx_exit_reason_name;
+ }
+
+ if(opt.histogram_interrupt_eip) {
+ int count = ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment);
+ size_t size = count * sizeof(int);
+ h->summary.extint_histogram = malloc(size);
+ if(h->summary.extint_histogram)
+ bzero(h->summary.extint_histogram, size);
+ else {
+ fprintf(stderr, "FATAL: Could not allocate %d bytes for interrupt histogram!\n",
+ size);
+ exit(1);
+ }
+
+ }
+ for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
+ h->summary.guest_interrupt[i].count=0;
+}
+
+/* PV data */
+enum {
+ PV_HYPERCALL=1,
+ PV_TRAP=3,
+ PV_PAGE_FAULT,
+ PV_FORCED_INVALID_OP,
+ PV_EMULATE_PRIVOP,
+ PV_EMULATE_4GB,
+ PV_MATH_STATE_RESTORE,
+ PV_PAGING_FIXUP,
+ PV_GDT_LDT_MAPPING_FAULT,
+ PV_PTWR_EMULATION,
+ PV_PTWR_EMULATION_PAE,
+ PV_MAX
+};
+
+char *pv_name[PV_MAX] = {
+ [PV_HYPERCALL]="hypercall",
+ [PV_TRAP]="trap",
+ [PV_PAGE_FAULT]="page_fault",
+ [PV_FORCED_INVALID_OP]="forced_invalid_op",
+ [PV_EMULATE_PRIVOP]="emulate privop",
+ [PV_EMULATE_4GB]="emulate 4g",
+ [PV_MATH_STATE_RESTORE]="math state restore",
+ [PV_PAGING_FIXUP]="paging fixup",
+ [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault",
+ [PV_PTWR_EMULATION]="writable pt emulation"
+};
+
+#define PV_HYPERCALL_MAX 56
+#define PV_TRAP_MAX 20
+
+struct pv_data {
+ unsigned summary_info:1;
+ int count[PV_MAX];
+ int hypercall_count[PV_HYPERCALL_MAX];
+ int trap_count[PV_TRAP_MAX];
+};
+
+/* Sched data */
+
+enum {
+ SCHED_DOM_ADD=1,
+ SCHED_DOM_REM,
+ SCHED_SLEEP,
+ SCHED_WAKE,
+ SCHED_YIELD,
+ SCHED_BLOCK,
+ SCHED_SHUTDOWN,
+ SCHED_CTL,
+ SCHED_ADJDOM,
+ SCHED_SWITCH,
+ SCHED_S_TIMER_FN,
+ SCHED_T_TIMER_FN,
+ SCHED_DOM_TIMER_FN,
+ SCHED_SWITCH_INFPREV,
+ SCHED_SWITCH_INFNEXT,
+ SCHED_SHUTDOWN_CODE,
+ SCHED_MAX
+};
+
+enum {
+ RUNSTATE_RUNNING=0,
+ RUNSTATE_RUNNABLE,
+ RUNSTATE_BLOCKED,
+ RUNSTATE_OFFLINE,
+ RUNSTATE_LOST,
+ RUNSTATE_QUEUED,
+ RUNSTATE_INIT,
+ RUNSTATE_MAX
+};
+
+char * runstate_name[RUNSTATE_MAX]={
+ [RUNSTATE_RUNNING]= "running",
+ [RUNSTATE_RUNNABLE]="runnable",
+ [RUNSTATE_BLOCKED]= "blocked", /* to be blocked */
+ [RUNSTATE_OFFLINE]= "offline",
+ [RUNSTATE_QUEUED]= "queued",
+ [RUNSTATE_INIT]= "init",
+ [RUNSTATE_LOST]= "lost",
+};
+
+/* Memory data */
+enum {
+ MEM_PAGE_GRANT_MAP = 1,
+ MEM_PAGE_GRANT_UNMAP,
+ MEM_PAGE_GRANT_TRANSFER,
+ MEM_MAX
+};
+
+char *mem_name[MEM_MAX] = {
+ [MEM_PAGE_GRANT_MAP] = "grant-map",
+ [MEM_PAGE_GRANT_UNMAP] = "grant-unmap",
+ [MEM_PAGE_GRANT_TRANSFER] = "grant-transfer",
+};
+
+/* Per-unit information. */
+
+struct cr3_value_struct {
+ struct cr3_value_struct * next;
+ struct cr3_value_struct * gnext;
+ unsigned long long gmfn;
+ int cr3_id;
+ unsigned long long first_time, last_time, run_time;
+ struct cycle_summary total_time, guest_time, hv_time;
+ int switch_count, flush_count;
+
+ struct hvm_short_summary_struct hvm;
+
+ struct {
+ int now;
+ int count;
+ } prealloc_unpin;
+
+ struct {
+ unsigned callback:1;
+ unsigned flush_count, switch_count;
+ unsigned fixup_user, emulate_corr_user;
+ } destroy;
+};
+
+#define MAX_CPUS 32
+typedef uint32_t cpu_mask_t;
+
+#define IDLE_DOMAIN 32767
+#define DEFAULT_DOMAIN 32768
+
+struct vlapic_struct {
+ int outstanding_ipis;
+ tsc_t first_ipi_tsc;
+ int ipi_virq_injected;
+};
+
+struct vcpu_data {
+ int vid;
+ struct domain_data *d; /* up-pointer */
+ unsigned activated:1;
+
+ int guest_paging_levels;
+
+ /* Schedule info */
+ struct {
+ int state;
+ tsc_t tsc;
+ /* TSC skew detection/correction */
+ struct last_oldstate_struct {
+ int wrong, actual, pid;
+ tsc_t tsc;
+ } last_oldstate;
+ /* Performance counters */
+ unsigned long long p1_start, p2_start;
+ } runstate;
+ struct pcpu_info *p;
+ tsc_t pcpu_tsc;
+
+ /* Hardware tracking */
+ struct {
+ long long val;
+ tsc_t start_time;
+ struct cr3_value_struct *data;
+ } cr3;
+
+ /* IPI latency tracking */
+ struct vlapic_struct vlapic;
+
+ /* Summary info */
+ struct cycle_framework f;
+ struct cycle_summary runstates[RUNSTATE_MAX];
+ struct weighted_cpi_summary cpi;
+ struct cycle_summary cpu_affinity;
+ enum {
+ VCPU_DATA_NONE=0,
+ VCPU_DATA_HVM,
+ VCPU_DATA_PV
+ } data_type;
+ union {
+ struct hvm_data hvm;
+ struct pv_data pv;
+ };
+};
+
+enum {
+ DOMAIN_RUNSTATE_BLOCKED=0,
+ DOMAIN_RUNSTATE_PARTIAL_RUN,
+ DOMAIN_RUNSTATE_FULL_RUN,
+ DOMAIN_RUNSTATE_PARTIAL_CONTENTION,
+ DOMAIN_RUNSTATE_CONCURRENCY_HAZARD,
+ DOMAIN_RUNSTATE_FULL_CONTENTION,
+ DOMAIN_RUNSTATE_LOST,
+ DOMAIN_RUNSTATE_MAX
+};
+
+char * domain_runstate_name[] = {
+ [DOMAIN_RUNSTATE_BLOCKED]="blocked",
+ [DOMAIN_RUNSTATE_PARTIAL_RUN]="partial run",
+ [DOMAIN_RUNSTATE_FULL_RUN]="full run",
+ [DOMAIN_RUNSTATE_PARTIAL_CONTENTION]="partial contention",
+ [DOMAIN_RUNSTATE_CONCURRENCY_HAZARD]="concurrency_hazard",
+ [DOMAIN_RUNSTATE_FULL_CONTENTION]="full_contention",
+ [DOMAIN_RUNSTATE_LOST]="lost",
+};
+
+struct domain_data {
+ struct domain_data *next;
+ int did;
+ struct vcpu_data *vcpu[MAX_CPUS];
+
+ int max_vid;
+
+ int runstate;
+ tsc_t runstate_tsc;
+ struct cycle_summary total_time;
+ struct cycle_summary runstates[DOMAIN_RUNSTATE_MAX];
+ struct cr3_value_struct *cr3_value_head;
+ struct eip_list_struct *emulate_eip_list;
+
+ int guest_interrupt[GUEST_INTERRUPT_MAX+1];
+ struct hvm_short_summary_struct hvm_short;
+ struct {
+ int done[MEM_MAX];
+ int done_interval[MEM_MAX];
+
+ int done_for[MEM_MAX];
+ int done_for_interval[MEM_MAX];
+ } memops;
+};
+
+struct domain_data * domain_list=NULL;
+
+struct domain_data default_domain;
+
+enum {
+ TOPLEVEL_GEN=0,
+ TOPLEVEL_SCHED,
+ TOPLEVEL_DOM0OP,
+ TOPLEVEL_HVM,
+ TOPLEVEL_MEM,
+ TOPLEVEL_PV,
+ TOPLEVEL_SHADOW,
+ TOPLEVEL_MAX=12
+};
+
+char * toplevel_name[TOPLEVEL_MAX] = {
+ [TOPLEVEL_GEN]="gen",
+ [TOPLEVEL_SCHED]="sched",
+ [TOPLEVEL_DOM0OP]="dom0op",
+ [TOPLEVEL_HVM]="hvm",
+ [TOPLEVEL_MEM]="mem",
+ [TOPLEVEL_PV]="pv",
+ [TOPLEVEL_SHADOW]="shadow",
+};
+
+struct trace_volume {
+ unsigned long long toplevel[TOPLEVEL_MAX];
+ unsigned long long sched_verbose;
+ unsigned long long hvm[HVM_VOL_MAX];
+} volume;
+
+#define UPDATE_VOLUME(_p,_x,_s) \
+ do { \
+ (_p)->volume.total._x += _s; \
+ (_p)->volume.last_buffer._x += _s; \
+ } while(0)
+
+void volume_clear(struct trace_volume *vol)
+{
+ bzero(vol, sizeof(*vol));
+}
+
+void volume_summary(struct trace_volume *vol)
+{
+ int j, k;
+ for(j=0; j<TOPLEVEL_MAX; j++)
+ if(vol->toplevel[j]) {
+ printf(" %-6s: %10lld\n",
+ toplevel_name[j], vol->toplevel[j]);
+ switch(j) {
+ case TOPLEVEL_SCHED:
+ if(vol->sched_verbose)
+ printf(" +-verbose: %10lld\n",
+ vol->sched_verbose);
+ break;
+ case TOPLEVEL_HVM:
+ for(k=0; k<HVM_VOL_MAX; k++) {
+ if(vol->hvm[k])
+ printf(" +-%-7s: %10lld\n",
+ hvm_vol_name[k], vol->hvm[k]);
+ }
+
+ break;
+ }
+ }
+}
+
+struct pcpu_info {
+ /* Information about this pcpu */
+ unsigned active:1, summary:1;
+ int pid;
+
+ /* Information related to scanning thru the file */
+ tsc_t first_tsc, last_tsc, order_tsc;
+ loff_t file_offset;
+ loff_t next_cpu_change_offset;
+ struct record_info ri;
+ int last_cpu_change_pid;
+
+ /* Information related to tsc skew detection / correction */
+ struct {
+ tsc_t offset;
+ cpu_mask_t downstream; /* To detect cycles in dependencies */
+ } tsc_skew;
+
+ /* Information related to domain tracking */
+ struct vcpu_data * current;
+ struct {
+ unsigned active:1,
+ domain_valid:1;
+ unsigned did:16,vid:16;
+ tsc_t tsc;
+ } lost_record;
+
+ /* Record volume */
+ struct {
+ tsc_t buffer_first_tsc,
+ buffer_dom0_runstate_tsc,
+ buffer_dom0_runstate_cycles[RUNSTATE_MAX];
+ int buffer_dom0_runstate;
+ unsigned buffer_size;
+ struct trace_volume total, last_buffer;
+ } volume;
+};
+
+void __fill_in_record_info(struct pcpu_info *p);
+
+#define INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX 10
+
+struct {
+ int max_active_pcpu;
+ loff_t last_epoch_offset;
+ int early_eof;
+ int lost_cpus;
+ tsc_t now;
+ struct cycle_framework f;
+ tsc_t buffer_trace_virq_tsc;
+ struct pcpu_info pcpu[MAX_CPUS];
+
+ struct {
+ int id;
+ /* Invariant: head null => tail null; head !null => tail valid */
+ struct cr3_value_struct *head, **tail;
+ } cr3;
+
+ struct {
+ tsc_t start_tsc;
+ /* Information about specific interval output types */
+ union {
+ struct {
+ struct cr3_value_struct ** array;
+ } cr3;
+ struct {
+ struct domain_data *d;
+ int guest_vector[INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX];
+ } domain;
+ };
+ } interval;
+} P = { 0 };
+
+char * pcpu_string(int pcpu);
+void process_generic(struct record_info *ri);
+void dump_generic(FILE *f, struct record_info *ri);
+ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset);
+
+void cpumask_init(cpu_mask_t *c) {
+ *c = 0UL;
+}
+
+void cpumask_clear(cpu_mask_t *c, int cpu) {
+ *c &= ~(1UL << cpu);
+}
+
+void cpumask_set(cpu_mask_t *c, int cpu) {
+ *c |= (1UL << cpu);
+}
+
+int cpumask_isset(const cpu_mask_t *c, int cpu) {
+ if(*c & (1UL<<cpu))
+ return 1;
+ else
+ return 0;
+}
+
+void cpumask_union(cpu_mask_t *d, const cpu_mask_t *s) {
+ *d |= *s;
+}
+
+/* -- Time code -- */
+
+void cycles_to_time(unsigned long long c, struct time_struct *t) {
+ t->time = ((c) * 1000) / (opt.cpu_hz / 1000000 );
+ t->s = t->time / 1000000000;
+ t->ns = t->time % 1000000000;
+}
+
+void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) {
+ if(ac > P.f.first_tsc) {
+ t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 );
+ t->s = t->time / 1000000000;
+ t->ns = t->time % 1000000000;
+ } else {
+ t->time = t->s = t->ns = 0;
+ }
+}
+
+tsc_t abs_cycles_to_global(unsigned long long ac) {
+ if(ac > P.f.first_tsc)
+ return ac - P.f.first_tsc;
+ else
+ return 0;
+}
+
+void scatterplot_vs_time(tsc_t atsc, long long y) {
+ struct time_struct t;
+
+ abs_cycles_to_time(atsc, &t);
+
+ printf("%lu.%09lu %lld\n", t.s, t.ns, y);
+}
+
+/* -- Summary Code -- */
+
+/* With compliments to "Numerical Recipes in C", which provided the algorithm
+ * and basic template for this function. */
+long long percentile(long long * A, int N, int ple) {
+ int I, J, L, R, K;
+
+ long long X, W;
+
+ /* Find K, the element # we want */
+ K=N*ple/100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0; R=N-1;
+
+ while(L < R) {
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L;
+ J=R;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X)
+ I++;
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J])
+ J--;
+
+ /* If we found something out-of-order */
+ if(I<=J) {
+ /* Switch the values */
+ W=A[I];
+ A[I]=A[J];
+ A[J]=W;
+
+ /* And move on */
+ I++; J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J<K)
+ L=I;
+ if(K<I)
+ R=J;
+ }
+
+ return A[K];
+}
+
+float weighted_percentile(float * A, /* values */
+ unsigned long long * w, /* weights */
+ int N, /* total */
+ int ple) /* percentile */
+{
+ int L, R, I, J, K;
+ unsigned long long L_weight, R_weight, I_weight, J_weight,
+ K_weight, N_weight;
+
+ float X, t1;
+ unsigned long long t2;
+
+ int progress;
+
+ /* Calculate total weight */
+ N_weight=0;
+
+ for(I=0; I<N; I++) {
+ assert(w[I]!=0);
+ N_weight += w[I];
+ }
+
+ /* Find K_weight, the target weight we want */
+ K_weight = N_weight * ple / 100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0;
+ L_weight = 0;
+ R=N-1;
+ R_weight = N_weight - w[R];
+
+ /* Search between L and R, narrowing down until we're done */
+ while(L < R) {
+ /* Chose an ordering value from right in the middle */
+ K = (L + R) >> 1;
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L; I_weight = L_weight;
+ J=R; J_weight = R_weight;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X) {
+ I_weight += w[I];
+ I++;
+ }
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J]) {
+ J_weight -= w[J];
+ J--;
+ }
+
+ /* If we actually found something... */
+ if(I<=J) {
+ /* Switch the values */
+ t1=A[I];
+ A[I]=A[J];
+ A[J]=t1;
+
+ t2=w[I];
+ w[I]=w[J];
+ w[J]=t2;
+
+ /* And move in */
+ I_weight += w[I];
+ I++;
+
+ J_weight -= w[J];
+ J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ progress = 0;
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J_weight<K_weight) {
+ progress = 1;
+ L=I; L_weight = I_weight;
+ }
+ if(K_weight<I_weight) {
+ progress = 1;
+ R=J; R_weight = J_weight;
+ }
+ }
+
+ return A[L];
+}
+
+long long self_weighted_percentile(long long * A,
+ int N, /* total */
+ int ple) /* percentile */
+{
+ int L, R, I, J, K;
+ long long L_weight, R_weight, I_weight, J_weight,
+ K_weight, N_weight;
+
+ long long X, t1;
+
+ int progress;
+
+ /* Calculate total weight */
+ N_weight=0;
+
+ for(I=0; I<N; I++) {
+ if(A[I] < 0)
+ fprintf(warn, "%s: Value %lld less than zero!\n",
+ __func__, A[I]);
+ assert(A[I]!=0);
+ N_weight += A[I];
+ }
+
+ /* Find K_weight, the target weight we want */
+ K_weight = N_weight * ple / 100;
+
+ /* Set the left and right boundaries of the current search space */
+ L=0;
+ L_weight = 0;
+ R=N-1;
+ R_weight = N_weight - A[R];
+
+ /* Search between L and R, narrowing down until we're done */
+ while(L < R) {
+ /* Chose an ordering value from right in the middle */
+ K = (L + R) >> 1;
+ /* X: The value to order everything higher / lower than */
+ X=A[K];
+
+ /* Starting at the left and the right... */
+ I=L; I_weight = L_weight;
+ J=R; J_weight = R_weight;
+
+ do {
+ /* Find the first element on the left that is out-of-order w/ X */
+ while(A[I]<X) {
+ I_weight += A[I];
+ I++;
+ }
+ /* Find the first element on the right that is out-of-order w/ X */
+ while(X<A[J]) {
+ J_weight -= A[J];
+ J--;
+ }
+
+ /* If we actually found something... */
+ if(I<=J) {
+ /* Switch the values */
+ t1=A[I];
+ A[I]=A[J];
+ A[J]=t1;
+
+ /* And move in */
+ I_weight += A[I];
+ I++;
+
+ J_weight -= A[J];
+ J--;
+ }
+ } while (I <= J); /* Keep going until our pointers meet or pass */
+
+ progress = 0;
+
+ /* Re-adjust L and R, based on which element we're looking for */
+ if(J_weight<K_weight) {
+ progress = 1;
+ L=I; L_weight = I_weight;
+ }
+ if(K_weight<I_weight) {
+ progress = 1;
+ R=J; R_weight = J_weight;
+ }
+ }
+
+ return A[L];
+}
+
+static inline double __cycles_percent(long long cycles, long long total) {
+ return (double)(cycles*100) / total;
+}
+
+static inline double __summary_percent(struct event_cycle_summary *s,
+ struct cycle_framework *f) {
+ return __cycles_percent(s->cycles, f->total_cycles);
+}
+
+static inline double summary_percent_global(struct event_cycle_summary *s) {
+ return __summary_percent(s, &P.f);
+}
+
+static inline void update_summary(struct event_cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ if(c) {
+ if(opt.sample_size) {
+ int lap = (s->cycles_count/opt.sample_size)+1,
+ index =s->cycles_count % opt.sample_size;
+ if((index - (lap/3))%lap == 0) {
+ if(!s->cycles_sample) {
+ s->cycles_sample = malloc(sizeof(*s->cycles_sample) * opt.sample_size);
+ if(!s->cycles_sample) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ exit(1);
+ }
+ }
+ s->cycles_sample[index]=c;
+ }
+ }
+ s->cycles_count++;
+ s->cycles += c;
+
+ s->interval_count++;
+ s->interval_cycles += c;
+ }
+ s->count++;
+}
+
+static inline void clear_interval_summary(struct event_cycle_summary *s) {
+ s->interval_count = 0;
+ s->interval_cycles = 0;
+}
+
+static inline void update_cycles(struct cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ int lap, index;
+
+ assert(c!=0);
+
+ if ( opt.sample_size ) {
+ lap = (s->count/opt.sample_size)+1;
+ index =s->count % opt.sample_size;
+
+ if((index - (lap/3))%lap == 0) {
+ if(!s->sample) {
+ s->sample = malloc(sizeof(*s->sample) * opt.sample_size);
+ if(!s->sample) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ exit(1);
+ }
+ }
+ s->sample[index] = c;
+ }
+ }
+
+ if(c > 0) {
+ s->cycles += c;
+ s->interval_cycles += c;
+ } else {
+ s->cycles += -c;
+ s->interval_cycles += -c;
+ }
+ s->count++;
+ s->interval_count++;
+}
+
+static inline void clear_interval_cycles(struct cycle_summary *s) {
+ s->interval_cycles = 0;
+ s->interval_count = 0;
+}
+
+static inline void update_cpi(struct weighted_cpi_summary *s,
+ unsigned long long i,
+ unsigned long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary. This algorithm will
+ * generate a sample set that approximates an even sample. We can
+ * then take the percentiles on this, and get an approximate value. */
+ int lap, index;
+
+ if ( opt.sample_size ) {
+ lap = (s->count/opt.sample_size)+1;
+ index =s->count % opt.sample_size;
+
+ if((index - (lap/3))%lap == 0) {
+ if(!s->cpi) {
+ assert(!s->cpi_weight);
+
+ s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size);
+ s->cpi_weight = malloc(sizeof(*s->cpi_weight) * opt.sample_size);
+ if(!s->cpi || !s->cpi_weight) {
+ fprintf(stderr, "%s: malloc failed!\n", __func__);
+ exit(1);
+ }
+ }
+ assert(s->cpi_weight);
+
+ s->cpi[index] = (float) c / i;
+ s->cpi_weight[index]=c;
+ }
+ }
+
+ s->instructions += i;
+ s->cycles += c;
+ s->count++;
+
+ s->interval_instructions += i;
+ s->interval_cycles += c;
+ s->interval_count++;
+}
+
+static inline void clear_interval_cpi(struct weighted_cpi_summary *s) {
+ s->interval_cycles = 0;
+ s->interval_count = 0;
+ s->interval_instructions = 0;
+}
+
+static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
+ if(s->count) {
+ long long avg;
+
+ avg = s->cycles / s->count;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = percentile(s->sample, data_size, 50);
+ p5 = percentile(s->sample, data_size, 5);
+ p95 = percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count, avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %6lld\n",
+ p, s->count, avg);
+ }
+ }
+}
+
+static inline void print_cpi_summary(struct weighted_cpi_summary *s) {
+ if(s->count) {
+ float avg;
+
+ avg = (float)s->cycles / s->instructions;
+
+ if ( opt.sample_size ) {
+ float p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50);
+ p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5);
+ p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95);
+
+ printf(" CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n",
+ avg, p5, p50, p95);
+ } else {
+ printf(" CPI summary: %2.2f\n", avg);
+ }
+ }
+}
+
+static inline void print_cycle_percent_summary(struct cycle_summary *s,
+ tsc_t total, char *p) {
+ if(s->count) {
+ long long avg;
+ double percent, seconds;
+
+ avg = s->cycles / s->count;
+
+ seconds = ((double)s->cycles) / opt.cpu_hz;
+
+ percent = ((double)(s->cycles * 100)) / total;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = self_weighted_percentile(s->sample, data_size, 50);
+ p5 = self_weighted_percentile(s->sample, data_size, 5);
+ p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %5.2lfs %5.2lf%% %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count,
+ seconds,
+ percent,
+ avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %5.2lfs %5.2lf%% %6lld\n",
+ p, s->count,
+ seconds,
+ percent,
+ avg);
+ }
+ }
+}
+
+static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
+ if(s->count) {
+ long long avg;
+
+ avg = s->cycles / s->count;
+
+ if ( opt.sample_size ) {
+ long long p5, p50, p95;
+ int data_size = s->count;
+
+ if(data_size > opt.sample_size)
+ data_size = opt.sample_size;
+
+ p50 = self_weighted_percentile(s->sample, data_size, 50);
+ p5 = self_weighted_percentile(s->sample, data_size, 5);
+ p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+ printf("%s: %7d %5.2lfs %6lld {%6lld|%6lld|%6lld}\n",
+ p, s->count, ((double)s->cycles)/opt.cpu_hz,
+ avg, p5, p50, p95);
+ } else {
+ printf("%s: %7d %5.2lfs %6lld\n",
+ p, s->count, ((double)s->cycles)/opt.cpu_hz, avg);
+ }
+ }
+}
+
+#define PRINT_SUMMARY(_s, _p...) \
+ do { \
+ if((_s).count) { \
+ if ( opt.sample_size ) { \
+ unsigned long long p5, p50, p95; \
+ int data_size=(_s).cycles_count; \
+ if(data_size > opt.sample_size) \
+ data_size=opt.sample_size; \
+ p50=percentile((_s).cycles_sample, data_size, 50); \
+ p5=percentile((_s).cycles_sample, data_size, 5); \
+ p95=percentile((_s).cycles_sample, data_size, 95); \
+ printf(_p); \
+ printf(" %7d %5.2lfs %5.2lf%% %5lld cyc {%5lld|%5lld|%5lld}\n", \
+ (_s).count, \
+ ((double)(_s).cycles)/opt.cpu_hz, \
+ summary_percent_global(&(_s)), \
+ (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0, \
+ p5, p50, p95); \
+ } else { \
+ printf(_p); \
+ printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n", \
+ (_s).count, \
+ ((double)(_s).cycles)/opt.cpu_hz, \
+ summary_percent_global(&(_s)), \
+ (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0); \
+ } \
+ } \
+ } while(0)
+
+void __interval_cycle_percent_output(struct cycle_summary *s, tsc_t cycles) {
+ printf(" %.02lf",
+ __cycles_percent(s->interval_cycles, cycles));
+ clear_interval_cycles(s);
+}
+
+void interval_cycle_percent_output(struct cycle_summary *s) {
+ __interval_cycle_percent_output(s, opt.interval.cycles);
+}
+
+void interval_time_output(void) {
+ struct time_struct t;
+ abs_cycles_to_time(P.interval.start_tsc, &t);
+
+ printf("%lu.%09lu", t.s, t.ns);
+}
+
+void interval_cr3_schedule_time_header(void) {
+ if( opt.interval.cr3.count ) {
+ int i;
+
+ printf("time");
+ for(i=0; i<opt.interval.cr3.count; i++) {
+ printf(" %llx", opt.interval.cr3.values[i]);
+ }
+ printf("\n");
+ }
+ /* Can't see into the future, so no header if cr3 values are
+ not specified. */
+}
+
+void interval_cr3_schedule_time_tail(void) {
+ if( !opt.interval.cr3.count ) {
+ struct cr3_value_struct *cr3;
+
+ printf("time");
+ for(cr3=P.cr3.head; cr3; cr3 = cr3->gnext) {
+ printf(" %llx", cr3->gmfn);
+ }
+ printf("\n");
+ }
+}
+
+void interval_cr3_value_check(struct cr3_value_struct *cr3) {
+ if( opt.interval.cr3.count ) {
+ int i;
+
+ for(i=0; i<opt.interval.cr3.count; i++) {
+ if(cr3->gmfn == opt.interval.cr3.values[i]) {
+ if(P.interval.cr3.array[i]) {
+ fprintf(stderr, "Fatal: duplicate cr3 value %llx!\n",
+ cr3->gmfn);
+ exit(1);
+ }
+ fprintf(stderr, "%s: found gmfn %llx\n",
+ __func__, cr3->gmfn);
+ P.interval.cr3.array[i] = cr3;
+ }
+ }
+ }
+}
+
+void interval_cr3_schedule_time_output(void) {
+ struct cr3_value_struct *cr3;
+ int i;
+
+ interval_time_output();
+
+ if(opt.interval.cr3.count) {
+ for(i=0; i<opt.interval.cr3.count; i++) {
+ cr3 = P.interval.cr3.array[i];
+ if(cr3) {
+ printf(" %.02lf",
+ __cycles_percent(cr3->total_time.interval_cycles,
+ opt.interval.cycles));
+ clear_interval_cycles(&cr3->total_time);
+ } else {
+ printf(" 0.0");
+ }
+ }
+ } else {
+ struct cr3_value_struct *cr3;
+ for(cr3 = P.cr3.head; cr3; cr3 = cr3->gnext) {
+ printf(" %.02lf",
+ __cycles_percent(cr3->total_time.interval_cycles,
+ opt.interval.cycles));
+ clear_interval_cycles(&cr3->total_time);
+ }
+ }
+ printf("\n");
+}
+
+void interval_cr3_schedule_ordered_output(void) {
+ struct cr3_value_struct *p;
+ int i;
+
+ struct cr3_value_struct **qsort_array;
+ int N=0;
+
+ int cr3_time_compare(const void *_a, const void *_b) {
+ struct cr3_value_struct *a=*(typeof(&a))_a;
+ struct cr3_value_struct *b=*(typeof(&a))_b;
+
+ if(a->total_time.interval_cycles < b->total_time.interval_cycles)
+ return 1;
+ else if(b->total_time.interval_cycles == a->total_time.interval_cycles) {
+ if(a->total_time.interval_count < b->total_time.interval_count)
+ return 1;
+ else if(a->total_time.interval_count == b->total_time.interval_count)
+ return 0;
+ else
+ return -1;
+ } else
+ return -1;
+ }
+
+ for(p=P.cr3.head; p; p=p->gnext)
+ N++;
+
+ if(!N)
+ return;
+
+ qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+ for(i=0, p=P.cr3.head; p; p=p->gnext, i++)
+ qsort_array[i]=p;
+
+ qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+ cr3_time_compare);
+
+ interval_time_output();
+
+ for(i=0; i<N; i++) {
+ p = qsort_array[i];
+ /* Rounding down means this will get ..1]% */
+ if(p->total_time.interval_cycles > 0) {
+ printf(" %8llx: %.02lf %c\n",
+ p->gmfn,
+ __cycles_percent(p->total_time.interval_cycles,
+ opt.interval.cycles),
+ (p->first_time > P.interval.start_tsc)?'*':' ');
+ }
+ clear_interval_cycles(&p->total_time);
+ }
+
+ free(qsort_array);
+}
+
+void interval_cr3_short_summary_header(void) {
+ int i;
+
+ printf("time guest");
+ for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+ printf(" %s", hvm_short_summary_name[i]);
+ printf("\n");
+}
+
+void interval_cr3_short_summary_output(void) {
+ struct cycle_summary *hss_array;
+ int i;
+
+ if(P.interval.cr3.array[0]) {
+ struct cr3_value_struct *p = P.interval.cr3.array[0];
+
+ interval_time_output();
+
+ hss_array = p->hvm.s;
+
+ printf(" %.02lf",
+ __cycles_percent(p->total_time.interval_cycles,
+ opt.interval.cycles));
+
+ for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+ __interval_cycle_percent_output(hss_array + i,
+ p->total_time.interval_cycles);
+
+ clear_interval_cycles(&p->total_time);
+
+ printf("\n");
+ }
+}
+
+void interval_domain_short_summary_header(void) {
+ int i;
+
+ printf("time running");
+ for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+ printf(" %s", hvm_short_summary_name[i]);
+ printf("\n");
+}
+
+void interval_domain_short_summary_output(void) {
+
+ if(P.interval.domain.d) {
+ struct domain_data *d;
+ tsc_t interval_cycles;
+ int i;
+
+ d=P.interval.domain.d;
+
+ interval_cycles = d->total_time.interval_cycles;
+
+ interval_time_output();
+
+ interval_cycle_percent_output(&d->total_time);
+
+ for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+ __interval_cycle_percent_output(d->hvm_short.s + i,
+ interval_cycles);
+
+ printf("\n");
+ }
+}
+
+void interval_domain_guest_interrupt(struct hvm_data *h, int vector) {
+ struct domain_data *d = h->v->d;
+ int i;
+
+ /* Check to see if this vector is in the "print list" */
+ for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+ if(P.interval.domain.guest_vector[i] == 0) {
+ P.interval.domain.guest_vector[i] = vector;
+ break;
+ }
+ if(P.interval.domain.guest_vector[i] == vector)
+ break;
+ }
+
+ if(i == INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX) {
+ fprintf(stderr, "FATAL: used up all %d guest interrupt slots!\n",
+ INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX);
+ exit(1);
+ }
+
+ d->guest_interrupt[vector]++;
+}
+
+void interval_domain_guest_interrupt_tail(void) {
+ int i;
+
+ printf("time running");
+ for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+ if(P.interval.domain.guest_vector[i] == 0)
+ break;
+ printf(" %d", P.interval.domain.guest_vector[i]);
+ }
+ printf("\n");
+}
+
+void interval_domain_guest_interrupt_output(void) {
+
+ if(P.interval.domain.d) {
+ struct domain_data *d;
+ int i;
+
+ d=P.interval.domain.d;
+
+ interval_time_output();
+
+ for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+ int v = P.interval.domain.guest_vector[i];
+
+ if(v == 0)
+ break;
+
+ printf(" %d", d->guest_interrupt[v]);
+
+ d->guest_interrupt[v]=0;
+ }
+
+ printf("\n");
+ }
+
+}
+
+void interval_domain_grant_maps_output(void) {
+
+ if(P.interval.domain.d) {
+ struct domain_data *d;
+
+ d=P.interval.domain.d;
+
+ interval_time_output();
+
+ printf(" %d", d->memops.done_for_interval[MEM_PAGE_GRANT_MAP]);
+
+ d->memops.done_for_interval[MEM_PAGE_GRANT_MAP] = 0;
+
+ printf("\n");
+ }
+}
+
+/* General interval gateways */
+
+void interval_callback(void) {
+ switch(opt.interval.output) {
+ case INTERVAL_CR3_SCHEDULE_TIME:
+ interval_cr3_schedule_time_output();
+ break;
+ case INTERVAL_CR3_SCHEDULE_ORDERED:
+ interval_cr3_schedule_ordered_output();
+ break;
+ case INTERVAL_CR3_SHORT_SUMMARY:
+ interval_cr3_short_summary_output();
+ break;
+ case INTERVAL_DOMAIN_SHORT_SUMMARY:
+ interval_domain_short_summary_output();
+ break;
+ case INTERVAL_DOMAIN_GUEST_INTERRUPT:
+ interval_domain_guest_interrupt_output();
+ break;
+ case INTERVAL_DOMAIN_GRANT_MAPS:
+ interval_domain_grant_maps_output();
+ break;
+ }
+}
+
+void interval_header(void) {
+ switch(opt.interval.output) {
+ case INTERVAL_CR3_SCHEDULE_TIME:
+ interval_cr3_schedule_time_header();
+ break;
+ case INTERVAL_CR3_SHORT_SUMMARY:
+ interval_cr3_short_summary_header();
+ break;
+ case INTERVAL_DOMAIN_SHORT_SUMMARY:
+ interval_domain_short_summary_header();
+ break;
+ default:
+ break;
+ }
+}
+
+void interval_tail(void) {
+ switch(opt.interval.output) {
+ case INTERVAL_CR3_SCHEDULE_TIME:
+ interval_cr3_schedule_time_tail();
+ break;
+ case INTERVAL_DOMAIN_GUEST_INTERRUPT:
+ interval_domain_guest_interrupt_tail();
+ break;
+ default:
+ break;
+ }
+}
+
+/* -- Eip list data -- */
+
+void update_eip(struct eip_list_struct **head, unsigned long long eip,
+ unsigned long long cycles, int type, void * extra) {
+ struct eip_list_struct *p, **last=head;
+
+ for(p=*head; p; last = (&p->next), p=p->next)
+ if(p->eip >= eip)
+ break;
+
+ if(!p || p->eip != eip) {
+ p=malloc(sizeof(*p));
+ if(!p) {
+ perror("malloc failed");
+ exit(1);
+ }
+
+ bzero(p, sizeof(*p));
+
+ p->eip=eip;
+ p->type = type;
+
+ if(eip_list_type[type].new) {
+ eip_list_type[type].new(p, extra);
+ }
+ p->next = *last;
+ *last=p;
+ } else if(p->type != type) {
+ fprintf(stderr, "WARNING, mixed types! %d %d\n", p->type, type);
+ } else if(eip_list_type[type].update) {
+ eip_list_type[type].update(p, extra);
+ }
+
+ update_summary(&p->summary, cycles);
+}
+
+void dump_eip(struct eip_list_struct *head) {
+ struct eip_list_struct *p;
+ int i;
+
+ struct eip_list_struct **qsort_array;
+ int N=0;
+
+ int eip_compare(const void *_a, const void *_b) {
+ struct eip_list_struct *a=*(typeof(&a))_a;
+ struct eip_list_struct *b=*(typeof(&a))_b;
+
+ if(a->summary.cycles < b->summary.cycles)
+ return 1;
+ else if(b->summary.cycles == a->summary.cycles) {
+ if(a->summary.count < b->summary.count)
+ return 1;
+ else if(a->summary.count == b->summary.count)
+ return 0;
+ else
+ return -1;
+ } else
+ return -1;
+ }
+
+ for(p=head; p; p=p->next)
+ N++;
+
+ if(!N)
+ return;
+
+ qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+ for(i=0, p=head; p; p=p->next, i++)
+ qsort_array[i]=p;
+
+ qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+ eip_compare);
+
+ /* WARNING: don't use N after this point unless you copy this variable */
+#if 0
+ if(opt.summary_eip_limit && opt.summary_eip_limit < N)
+ N=opt.summary_eip_limit;
+#endif
+
+ for(i=0; i<N; i++) {
+ p = qsort_array[i];
+ PRINT_SUMMARY(p->summary, " %8llx%-45s: ",
+ p->eip,
+ find_symbol(p->eip));
+ if(eip_list_type[p->type].dump) {
+ eip_list_type[p->type].dump(p);
+ }
+ }
+
+ free(qsort_array);
+}
+
+/* -- HVM code -- */
+struct hvm_pf_xen_record {
+ //unsigned vcpu:16, domain:16;
+ union {
+ struct {
+ unsigned long long va;
+ unsigned long error_code;
+ } x64;
+ struct {
+ unsigned long va;
+ unsigned long error_code;
+ } x32;
+ };
+};
+
+void hvm_update_short_summary(struct hvm_data *h, int element) {
+ struct vcpu_data *v = h->v;
+
+ if(v->cr3.data)
+ update_cycles(&v->cr3.data->hvm.s[element], h->arc_cycles);
+
+ update_cycles(&v->d->hvm_short.s[element], h->arc_cycles);
+
+ h->short_summary_done=1;
+}
+
+void hvm_short_summary(struct hvm_short_summary_struct *hss,
+ tsc_t total, char *prefix) {
+ char desc[80];
+ int i;
+
+ for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) {
+ snprintf(desc, 80, "%s%s", prefix, hvm_short_summary_name[i]);
+ print_cycle_percent_summary(hss->s + i, total, desc);
+ }
+}
+
+void hvm_set_summary_handler(struct hvm_data *h, void (*s)(struct hvm_data *h)) {
+ /* Set summary handler */
+ if(h->exit_reason < h->exit_reason_max)
+ {
+ if(h->exit_reason_summary_handler[h->exit_reason])
+ {
+ if(h->exit_reason_summary_handler[h->exit_reason]!= s)
+ fprintf(warn, "Strange, unexpected summary handler for exit_reason %s (%d!)\n",
+ h->exit_reason_name[h->exit_reason], h->exit_reason);
+ }
+ else
+ {
+ h->exit_reason_summary_handler[h->exit_reason] = s;
+ fprintf(warn, "Setting exit_reason %s (%d) summary handler\n",
+ h->exit_reason_name[h->exit_reason], h->exit_reason);
+ }
+ }
+}
+
+#define SIGN_EXTENDED_BITS (~((1ULL<<48)-1))
+#define HIGH_BIT(_v) ((_v) & (1ULL<<47))
+static inline int is_valid_addr64(unsigned long long va)
+{
+ if(HIGH_BIT(va))
+ return ((va & SIGN_EXTENDED_BITS) == SIGN_EXTENDED_BITS);
+ else
+ return ((va & SIGN_EXTENDED_BITS) == 0);
+}
+
+void hvm_pf_xen_summary(struct hvm_data *h) {
+ int i,j, k;
+
+ for(i=0; i<PF_XEN_MAX; i++)
+ {
+ PRINT_SUMMARY(h->summary.pf_xen[i],
+ " %-25s ", pf_xen_name[i]);
+ switch(i){
+ case PF_XEN_NON_EMULATE:
+ for(j=0; j<PF_XEN_NON_EMUL_MAX; j++)
+ PRINT_SUMMARY(h->summary.pf_xen_non_emul[j],
+ " *%-13s ", pf_xen_non_emul_name[j]);
+ break;
+ case PF_XEN_EMULATE:
+ for(j=0; j<PF_XEN_EMUL_MAX; j++) {
+ PRINT_SUMMARY(h->summary.pf_xen_emul[j],
+ " *%-13s ", pf_xen_emul_name[j]);
+ if(j == PF_XEN_EMUL_EARLY_UNSHADOW) {
+ int k;
+ for(k=0; k<5; k++) {
+ PRINT_SUMMARY(h->summary.pf_xen_emul_early_unshadow[k],
+ " +[%d] ", k);
+ }
+ }
+ }
+ break;
+ case PF_XEN_FIXUP:
+ for(j=0; j<PF_XEN_FIXUP_MAX; j++) {
+ PRINT_SUMMARY(h->summary.pf_xen_fixup[j],
+ " *%-13s ", pf_xen_fixup_name[j]);
+ if(j == PF_XEN_FIXUP_UNSYNC ) {
+ for(k=0; k<PF_XEN_FIXUP_UNSYNC_RESYNC_MAX; k++) {
+ PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k],
+ " +[%3d] ", k);
+ }
+ PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k],
+ " +[max] ");
+ }
+ }
+ break;
+ }
+ }
+}
+
+void pf_preprocess(struct pf_xen_extra *e, int guest_paging_levels)
+{
+ switch(guest_paging_levels) {
+ /* Select a subfield of _bits bits starting at bit _shift from _x */
+#define _SUBFIELD(_bits, _shift, _x) \
+ (((_x)>>(_shift)) & ((1ULL<<(_bits))-1))
+ case 4:
+ /* Verify sign-extension */
+ if((HIGH_BIT(e->va)
+ &&((e->va & SIGN_EXTENDED_BITS) != SIGN_EXTENDED_BITS))
+ || (!HIGH_BIT(e->va)
+ && ((e->va & SIGN_EXTENDED_BITS) != 0))) {
+ fprintf(warn, "Strange, va %llx not properly sign extended for 4-level pagetables\n",
+ e->va);
+ }
+ e->pt_index[4]=_SUBFIELD(9,39,e->va);
+ e->pt_index[3]=_SUBFIELD(9,30,e->va);
+ e->pt_index[2]=_SUBFIELD(9,21,e->va);
+ e->pt_index[1]=_SUBFIELD(9,12,e->va);
+ /* These are only useful for the linear-pagetable code */
+ e->pt_index[0]=_SUBFIELD(9,3,e->va);
+ if(e->va & 0x4)
+ e->pt_is_lo=0;
+ break;
+ case 3:
+ e->pt_index[3]=_SUBFIELD(2,30,e->va);
+ e->pt_index[2]=_SUBFIELD(9,21,e->va);
+ e->pt_index[1]=_SUBFIELD(9,12,e->va);
+ /* These are only useful for the linear-pagetable code */
+ e->pt_index[0]=_SUBFIELD(9,3,e->va);
+ if(e->va & 0x4)
+ e->pt_is_lo=0;
+ break;
+ case 2:
+ e->pt_index[2]=_SUBFIELD(10,22,e->va);
+ e->pt_index[1]=_SUBFIELD(10,12,e->va);
+ /* This is only useful for the linear pagetable code */
+ e->pt_index[0]=_SUBFIELD(10,2,e->va);
+ break;
+ case 0:
+ break;
+ default:
+ fprintf(warn, "Don't know how to handle %d-level pagetables\n",
+ guest_paging_levels);
+ }
+
+ e->corresponding_va = CORR_VA_INVALID;
+ e->pt_level = 0;
+
+ /* Detect accesses to Windows linear pagetables */
+ switch(guest_paging_levels)
+ {
+ case 2:
+ if(e->pt_index[2] == 768) {
+ if(e->pt_index[1] == 768)
+ {
+ e->pt_level = 2;
+ e->corresponding_va=((1UL<<22)-1)
+ | e->pt_index[0]<<22;
+ }
+ else
+ {
+ e->pt_level = 1;
+ e->corresponding_va = ((1UL<<12)-1)
+ | e->pt_index[1]<<22
+ | e->pt_index[0]<<12;
+ }
+ }
+ break;
+ case 3:
+ if(e->pt_index[3]==3 && (e->pt_index[2]>>2==0))
+ {
+ if(e->pt_index[2]==3 && e->pt_index[1]>>2==0)
+ {
+ if(e->pt_index[1] == 3 && e->pt_index[0]>>2==0)
+ {
+ e->pt_level = 3;
+ e->corresponding_va=((1UL<<30)-1)
+ | e->pt_index[0]<<30;
+ }
+ else
+ {
+ e->pt_level = 2;
+ e->corresponding_va=((1UL<<21)-1)
+ | e->pt_index[1]<<30
+ | e->pt_index[2]<<21;
+ }
+ }
+ else
+ {
+ e->pt_level = 1;
+ e->corresponding_va = ((1UL<<12)-1)
+ | e->pt_index[0]<<12
+ | e->pt_index[1]<<21
+ | e->pt_index[2]<<30;
+ }
+ }
+ break;
+ case 4:
+ if(e->pt_index[4] == 0x1ed)
+ {
+ if(e->pt_index[3] == 0x1ed)
+ {
+ if(e->pt_index[2] == 0x1ed)
+ {
+ if(e->pt_index[1] == 0x1ed)
+ {
+ e->pt_level = 4;
+ e->corresponding_va = ((1ULL<<39)-1)
+ | (unsigned long long)e->pt_index[0]<<39;
+ }
+ else
+ {
+ e->pt_level = 3;
+ e->corresponding_va = ((1ULL<<30)-1)
+ | (unsigned long long)e->pt_index[0]<<30
+ | (unsigned long long)e->pt_index[1]<<39;
+ }
+ }
+ else
+ {
+ e->pt_level = 2;
+ e->corresponding_va = ((1ULL<<21)-1)
+ | (unsigned long long)e->pt_index[0]<<21
+ | (unsigned long long)e->pt_index[1]<<30
+ | (unsigned long long)e->pt_index[2]<<39;
+ }
+ }
+ else
+ {
+ e->pt_level = 1;
+ e->corresponding_va = ((1ULL<<12)-1)
+ | (unsigned long long)e->pt_index[0]<<12
+ | (unsigned long long)e->pt_index[1]<<21
+ | (unsigned long long)e->pt_index[2]<<30
+ | (unsigned long long)e->pt_index[3]<<39;
+ }
+
+ if(HIGH_BIT(e->corresponding_va))
+ e->corresponding_va |= SIGN_EXTENDED_BITS;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void hvm_pf_xen_preprocess(unsigned event, struct hvm_data *h) {
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ struct hvm_pf_xen_record *r = (typeof(r))h->d;
+
+ if(event == TRC_HVM_PF_XEN64)
+ {
+ if(!is_valid_addr64(r->x64.va))
+ fprintf(warn, "%s: invalid va %llx",
+ __func__, r->x64.va);
+ e->va = r->x64.va;
+ e->error_code = r->x64.error_code;
+ }
+ else
+ {
+ e->va = r->x32.va;
+ e->error_code = r->x32.error_code;
+ }
+
+ if(e->mmio_data_valid)
+ e->pf_case = PF_XEN_MMIO;
+ else
+ {
+ pf_preprocess(e, h->v->guest_paging_levels);
+
+ /* On rio traces, we try to infer emulation by looking for accesses
+ in the linear pagetable */
+ if(e->pt_level > 0)
+ e->pf_case = PF_XEN_EMULATE;
+ else
+ e->pf_case = PF_XEN_NON_EMULATE;
+ }
+}
+
+static inline int is_kernel(int paging_levels, unsigned long long va) {
+ switch(paging_levels) {
+ case 2:
+ case 3:
+ if(va & 0x80000000)
+ return 1;
+ else
+ return 0;
+ break;
+ case 4:
+ if(HIGH_BIT(va))
+ return 1;
+ else return 0;
+ default:
+ return 0;
+ }
+
+}
+
+void hvm_pf_xen_postprocess(struct hvm_data *h) {
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if(opt.summary_info) {
+ update_summary(&h->summary.pf_xen[e->pf_case],
+ h->arc_cycles);
+ switch(e->pf_case)
+ {
+ case PF_XEN_EMULATE:
+ update_eip(&h->v->d->emulate_eip_list,
+ h->rip,
+ h->arc_cycles,
+ 0, NULL);
+ break;
+ case PF_XEN_NON_EMULATE:
+ if(is_kernel(h->v->guest_paging_levels, h->rip))
+ update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_KERNEL],
+ h->arc_cycles);
+ else
+ update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_USER],
+ h->arc_cycles);
+ if(is_kernel(h->v->guest_paging_levels, e->va))
+ update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_KERNEL],
+ h->arc_cycles);
+
+ else
+ update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_USER],
+ h->arc_cycles);
+ }
+
+ /* Set summary handler */
+ hvm_set_summary_handler(h, hvm_pf_xen_summary);
+ }
+
+ if(opt.dump_cooked)
+ {
+ switch(e->pf_case)
+ {
+ case PF_XEN_EMULATE:
+ printf(" %s pf_xen:emulate va %llx ec %lx eip %llx%s lvl %d corr %llx\n",
+ h->dump_header, e->va, e->error_code,
+ h->rip, find_symbol(h->rip),
+ e->pt_level, e->corresponding_va);
+ break;
+ case PF_XEN_MMIO:
+ printf(" %s pf_xen:mmio va %llx ec %lx eip %llx%s data %lx\n",
+ h->dump_header, e->va, e->error_code,
+ h->rip, find_symbol(h->rip),
+ e->data);
+ break;
+ default:
+ printf(" %s pf_xen va %llx ec %lx eip %llx%s\n",
+ h->dump_header, e->va, e->error_code,
+ h->rip, find_symbol(h->rip));
+ break;
+ }
+ }
+}
+
+void hvm_pf_xen_process(struct record_info *ri, struct hvm_data *h) {
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if(ri->event == TRC_HVM_PF_XEN64
+ && h->v->guest_paging_levels != 4)
+ fprintf(warn, "Strange, PF_XEN64 but guest_paging_levels %d!\n",
+ h->v->guest_paging_levels);
+ else if(ri->event == TRC_HVM_PF_XEN
+ && h->v->guest_paging_levels == 4)
+ fprintf(warn, "Strange, PF_XEN but guest_paging_levels %d!\n",
+ h->v->guest_paging_levels);
+
+ hvm_pf_xen_preprocess(ri->event, h);
+
+ if(opt.dump_all)
+ {
+ if(e->pf_case == PF_XEN_EMULATE)
+ printf("]%s pf_xen:emulate va %llx ec %lx level %d corr %llx e->pt_index[%d %d %d %d %d]\n",
+ ri->dump_header, e->va, e->error_code,
+ e->pt_level, e->corresponding_va,
+ e->pt_index[0], e->pt_index[1], e->pt_index[2],
+ e->pt_index[3],
+ e->pt_index[4]);
+ else
+ printf("]%s pf_xen va %llx ec %lx e->pt_index[%d %d %d %d %d]\n",
+ ri->dump_header, e->va, e->error_code,
+ e->pt_index[0], e->pt_index[1], e->pt_index[2],
+ e->pt_index[3],
+ e->pt_index[4]);
+ }
+
+ h->post_process = hvm_pf_xen_postprocess;
+}
+
+char * hvm_vlapic_icr_dest_shorthand_name[4] = {
+ "dest_field", "self", "all-inc", "all-exc"
+};
+
+void clear_vlapic(struct vlapic_struct *vla) {
+ vla->outstanding_ipis = 0;
+ vla->first_ipi_tsc = 0;
+ vla->ipi_virq_injected = 0;
+}
+
+void hvm_vlapic_icr_handler(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ union {
+ unsigned long val;
+ struct {
+ unsigned vec:8,
+ delivery_mode:3,
+ dest_mode:1,
+ delivery_status:1,
+ _res1:1,
+ level:1,
+ trigger:1,
+ _res2:2,
+ dest_shorthand:2;
+ };
+ } icr = { .val = e->data };
+
+ if(e->mmio_is_write) {
+ if(opt.dump_all || opt.dump_cooked) {
+ printf(" vlapic icr vec %d %s\n",
+ icr.vec,
+ hvm_vlapic_icr_dest_shorthand_name[icr.dest_shorthand]);
+ }
+
+ if(icr.dest_shorthand == 3 && icr.vec == 0xd1)
+ {
+ struct vcpu_data *ov, *v = h->v;
+ struct vlapic_struct *vla;
+ struct domain_data *d = v->d;
+ int i;
+
+ for(i=0; i<MAX_CPUS; i++)
+ {
+ ov = d->vcpu[i];
+ if(!ov || ov == v)
+ continue;
+
+ if(ov->runstate.state == RUNSTATE_LOST) {
+ if(opt.dump_all)
+ fprintf(warn, "%s: v%d in state RUNSTATE_LOST, not counting ipi\n",
+ __func__, ov->vid);
+ continue;
+ }
+
+ vla = &ov->vlapic;
+
+ if(!vla->first_ipi_tsc)
+ {
+ struct record_info *ri;
+ ri = &v->p->ri;
+ vla->first_ipi_tsc = ri->tsc;
+ }
+
+ vla->outstanding_ipis++;
+
+ if((opt.dump_all || opt.dump_cooked) &&
+ (ov->runstate.state != RUNSTATE_RUNNING))
+ printf(" v%d in state %s (outstanding ipis %d)\n",
+ ov->vid, runstate_name[ov->runstate.state],
+ vla->outstanding_ipis);
+ }
+ }
+ } else {
+ /* Read */
+ if(opt.dump_all || opt.dump_cooked) {
+ printf(" vlapic icr status %s\n",
+ icr.delivery_status?"pending":"idle");
+ }
+ }
+
+}
+
+void hvm_vlapic_eoi_handler(struct hvm_data *h) {
+ if(opt.dump_all || opt.dump_cooked)
+ printf(" vlapic eoi\n");
+}
+
+void hvm_vlapic_handler(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ switch(e->gpa) {
+ case 0xfee00300:
+ hvm_vlapic_icr_handler(h);
+ break;
+ case 0xfee000b0:
+ hvm_vlapic_eoi_handler(h);
+ break;
+ }
+
+}
+
+
+#define HVM_MMIO_ASSIST_WRITE 0x200
+void hvm_mmio_assist_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ union {
+ struct {
+ unsigned long gpa;
+ unsigned long data;
+ } x32;
+ struct {
+ unsigned long long gpa;
+ unsigned long data;
+ } x64;
+ } *r = (typeof(r))h->d;
+
+ union {
+ unsigned event;
+ struct {
+ unsigned minor:8,
+ x64:1,
+ write:2;
+ };
+ } mevt = { .event = ri->event };
+
+ if(mevt.x64) {
+ e->gpa = r->x64.gpa;
+ e->data = r->x64.data;
+ if(ri->extra_words*(sizeof(unsigned long))==sizeof(r->x64))
+ e->mmio_data_valid=1;
+ } else {
+ e->gpa = r->x32.gpa;
+ e->data = r->x32.data;
+ if(ri->extra_words*(sizeof(unsigned long))==sizeof(r->x32))
+ e->mmio_data_valid=1;
+ }
+
+ e->mmio_is_write = mevt.write;
+
+ if(opt.dump_all)
+ {
+ if(e->mmio_data_valid)
+ printf("]%s mmio_assist %c gpa %llx data %lx\n", h->dump_header,
+ mevt.write?'w':'r', e->gpa, e->data);
+ else
+ printf("]%s mmio_assist %c gpa %llx (no data)\n", h->dump_header,
+ mevt.write?'w':'r', e->gpa);
+ }
+
+ if((e->gpa & 0xfffff000) == 0xfee00000)
+ hvm_vlapic_handler(h);
+}
+
+void hvm_inj_virq_process(struct record_info *ri, struct hvm_data *h) {
+ struct {
+ int vector, fake;
+ } *r = (typeof(r))h->d;
+
+ if(opt.dump_cooked | opt.dump_all) {
+ printf(" %3lu.%09lu %s inj_virq vec %u %s\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+ r->vector, r->fake?"fake":"real");
+ }
+
+ if(opt.summary_info)
+ {
+ int vector = r->vector;
+
+ if(vector >= GUEST_INTERRUPT_MAX)
+ vector = GUEST_INTERRUPT_MAX;
+ h->summary.guest_interrupt[vector].count++;
+
+ if(opt.interval.output == INTERVAL_DOMAIN_GUEST_INTERRUPT)
+ interval_domain_guest_interrupt(h, vector);
+ }
+
+ /* If we're waking, make this the wake vector */
+ if(r->vector < GUEST_INTERRUPT_MAX ) {
+ int vector = r->vector;
+ if ( h->w2h.waking && h->w2h.vector == 0 ) {
+ if(h->summary.guest_interrupt[vector].start_tsc) {
+ fprintf(warn, "Strange, d%dv%d waking && wake_vector 0 but vec %d start_tsc %lld!\n",
+ h->v->d->did, h->v->vid,
+ vector,
+ h->summary.guest_interrupt[vector].start_tsc);
+ exit(1);
+ }
+ if(h->w2h.interrupts)
+ fprintf(warn, "Strange, waking && wake_vector 0 but interrupts_this_wait_to_halt %d!\n",
+ h->w2h.interrupts);
+
+ if(opt.dump_all)
+ printf(" [w2h] d%dv%d Setting wake_vector %d\n",
+ h->v->d->did, h->v->vid, vector);
+
+ h->w2h.vector = vector;
+ h->summary.guest_interrupt[vector].is_wake = 1;
+ }
+
+ if( h->summary.guest_interrupt[vector].start_tsc == 0 ) {
+ /* Note that we want start_tsc set at the next vmentry */
+ h->summary.guest_interrupt[vector].start_tsc = 1;
+ h->w2h.interrupts_wanting_tsc++;
+ h->w2h.interrupts++;
+
+ if(opt.dump_all)
+ printf(" [w2h] d%dv%d Starting vec %d\n",
+ h->v->d->did, h->v->vid, vector);
+ }
+ }
+
+
+ /* Vista bug */
+ if(r->vector == 0xd1 && h->v->vlapic.outstanding_ipis)
+ h->v->vlapic.ipi_virq_injected = 1;
+}
+
+/* I/O Handling */
+struct io_address {
+ struct io_address *next;
+ unsigned long pa;
+ struct event_cycle_summary summary[2];
+};
+
+void update_io_address(struct io_address ** list, unsigned long pa, int dir,
+ tsc_t arc_cycles) {
+ struct io_address *p, *q=NULL;
+
+ /* Keep list in order */
+ for(p=*list; p && (p->pa != pa) && (p->pa < pa); q=p, p=p->next);
+
+ /* If we didn't find it, make a new element. */
+ if(!p || (p->pa != pa)) {
+ if((p=malloc(sizeof(*p)))==NULL) {
+ fprintf(stderr, "malloc() failed.\n");
+ exit(1);
+ }
+
+ bzero(p, sizeof(*p));
+
+ p->pa=pa;
+
+ /* If we stopped in the middle or at the end, add it in */
+ if(q) {
+ p->next=q->next;
+ q->next=p;
+ } else {
+ /* Otherwise, we stopped after the first element; put it at the beginning */
+ p->next = *list;
+ *list = p;
+ }
+ }
+ update_summary(&p->summary[dir], arc_cycles);
+}
+
+void hvm_io_address_summary(struct io_address *list, char * s) {
+ if(!list)
+ return;
+
+ printf("%s\n", s);
+
+ for(; list; list=list->next) {
+ PRINT_SUMMARY(list->summary[0], "%8lx:[r] ", list->pa);
+ PRINT_SUMMARY(list->summary[1], "%8lx:[w] ", list->pa);
+ }
+}
+
+void hvm_io_write_postprocess(struct hvm_data *h)
+{
+ if(opt.dump_cooked)
+ {
+ printf(" %s io_write port %x size %u val %lx\n",
+ h->dump_header, h->inflight.io.port,
+ h->inflight.io.size,
+ h->inflight.io.val);
+ }
+ if(opt.with_pio_enumeration)
+ update_io_address(&h->summary.pio, h->inflight.io.port, 1, h->arc_cycles);
+}
+
+void hvm_io_write_process(struct hvm_data *h)
+{
+ struct {
+ unsigned long port;
+ int size;
+ unsigned long val;
+ } *r = (typeof(r))h->d;
+
+ unsigned long long mask = ((1ULL<<(r->size<<3))-1);
+
+ h->inflight.io.port = r->port;
+ h->inflight.io.size = r->size;
+ h->inflight.io.val = r->val & mask;
+
+ if(opt.dump_all)
+ {
+ printf(" %s io_write port %lx size %d val %lx\n",
+ h->dump_header,
+ r->port, r->size, r->val);
+ }
+
+ h->post_process = hvm_io_write_postprocess;
+}
+
+void hvm_io_read_postprocess(struct hvm_data *h)
+{
+ if(opt.dump_cooked)
+ {
+ printf(" %s io_read port %x size %u val %lx\n",
+ h->dump_header, h->inflight.io.port,
+ h->inflight.io.size,
+ h->inflight.io.val);
+ }
+ if(opt.with_pio_enumeration)
+ update_io_address(&h->summary.pio, h->inflight.io.port, 0, h->arc_cycles);
+ if(opt.scatterplot_io && h->inflight.io.port == opt.scatterplot_io_port)
+ scatterplot_vs_time(h->exit_tsc, P.now - h->exit_tsc);
+}
+
+void hvm_io_assist_process(struct hvm_data *h)
+{
+ struct {
+ unsigned long val;
+ } *r = (typeof(r))h->d;
+
+ int size = h->inflight.io.size;
+ unsigned long long mask = ((1ULL<<(size<<3))-1);
+
+ h->inflight.io.val = r->val & mask;
+
+ if(opt.dump_all)
+ {
+ printf(" %s io_assist val %lx\n",
+ h->dump_header,
+ r->val);
+ }
+
+
+ if(!h->post_process)
+ fprintf(warn, "Strange, io_assist but post_process not set!\n");
+}
+
+void hvm_io_read_process(struct hvm_data *h)
+{
+ struct {
+ unsigned long port;
+ int size;
+ } *r = (typeof(r))h->d;
+
+ h->inflight.io.port = r->port;
+ h->inflight.io.size = r->size;
+ h->inflight.io.val = 0;
+
+ if(opt.dump_all)
+ {
+ printf(" %s io_read port %lx size %d\n",
+ h->dump_header,
+ r->port, r->size);
+ }
+
+ h->post_process = hvm_io_read_postprocess;
+}
+
+/* cr_write */
+/* CR3 list */
+void cr3_switch(unsigned long long val, struct hvm_data *h) {
+ struct vcpu_data *v = h->v;
+ /* Really only need absolute tsc here. Later change to global time. */
+ unsigned long long now = P.now;
+ unsigned long long gmfn = val >> 12;
+
+ if(opt.with_cr3_enumeration) {
+ if(v->cr3.data) {
+ struct cr3_value_struct *cur = v->cr3.data;
+ unsigned long long cycles = now - v->cr3.start_time;
+
+ if(opt.summary_info)
+ update_cycles(&cur->total_time, cycles);
+
+ cur->last_time = now;
+ }
+
+ if(gmfn) {
+ struct cr3_value_struct *p, **last=&v->d->cr3_value_head;
+
+ /* Always add to tail, so that we get consistent interval
+ ouptut as the number of cr3s grow */
+ for(p=*last; p; last = (&p->next), p=p->next)
+ if(p->gmfn == gmfn)
+ break;
+
+ if(!p) {
+ if((p=malloc(sizeof(*p)))==NULL) {
+ fprintf(stderr, "malloc() failed.\n");
+ exit(1);
+ }
+
+ bzero(p, sizeof(*p));
+
+ p->gmfn = gmfn;
+ p->cr3_id = P.cr3.id;
+ p->first_time = now;
+
+ p->next=*last;
+ *last=p;
+
+ p->gnext = NULL;
+ if(P.cr3.head)
+ *P.cr3.tail = p;
+ else
+ P.cr3.head = p;
+ P.cr3.tail = &p->gnext;
+
+ P.cr3.id++;
+
+ /* Add to the interval list if appropriate */
+ if(v->d->did != DEFAULT_DOMAIN)
+ interval_cr3_value_check(p);
+ }
+
+ if(p->prealloc_unpin.now) {
+ fprintf(warn, "Re-promoting previously unpinned cr3 %llx!\n",
+ p->gmfn);
+ p->prealloc_unpin.now = 0;
+ h->inflight.cr_write.repromote = 1;
+ }
+
+ /* Accounting for new toplevel */
+ v->cr3.start_time = now;
+ p->switch_count++;
+ if(p->destroy.callback)
+ p->destroy.switch_count++;
+ v->cr3.data = p;
+ } else {
+ v->cr3.data = NULL;
+ }
+
+ if (opt.scatterplot_cr3_switch) {
+ scatterplot_vs_time(h->exit_tsc,
+ v->cr3.data ? (v->cr3.data->cr3_id) : 0);
+ }
+ } else {
+ if (opt.scatterplot_cr3_switch)
+ scatterplot_vs_time(h->exit_tsc, gmfn);
+ }
+
+ v->cr3.val = val;
+};
+
+void cr3_prealloc_unpin(struct vcpu_data *v, unsigned long long gmfn) {
+ struct cr3_value_struct *cr3;
+
+ /* Look for it in the list */
+ for(cr3 = v->d->cr3_value_head; cr3; cr3=cr3->next)
+ if(cr3->gmfn == gmfn)
+ break;
+
+ if(!cr3)
+ return;
+
+ if(cr3->prealloc_unpin.now)
+ fprintf(warn, "Strange, gmfn %llx multiple unpins w/o access!\n",
+ gmfn);
+
+ cr3->prealloc_unpin.now = 1;
+ cr3->prealloc_unpin.count++;
+
+ if(opt.dump_all)
+ printf(" cr3 %llx unpinned %d times\n",
+ gmfn, cr3->prealloc_unpin.count);
+}
+
+void cr3_dump_list(struct cr3_value_struct *head){
+ struct cr3_value_struct *p;
+ struct cr3_value_struct **qsort_array;
+ int i, N=0;
+
+ int cr3_compare(const void *_a, const void *_b) {
+ struct cr3_value_struct *a=*(typeof(&a))_a;
+ struct cr3_value_struct *b=*(typeof(&a))_b;
+
+ if(a->total_time.cycles < b->total_time.cycles)
+ return 1;
+ else if(b->total_time.cycles == a->total_time.cycles) {
+ if(a->total_time.count < b->total_time.count)
+ return 1;
+ else if(a->total_time.count == b->total_time.count)
+ return 0;
+ else
+ return -1;
+ } else
+ return -1;
+ }
+
+ if(!head)
+ return;
+
+ /* Count the number of elements */
+ for(p=head; p; p=p->next)
+ N++;
+
+ if(!N)
+ return;
+
+ /* Alloc a struct of the right size */
+ qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+ /* Point the array into it */
+ for(i=0, p=head; p; p=p->next, i++)
+ qsort_array[i]=p;
+
+ /* Sort the array by time */
+ qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+ cr3_compare);
+
+ /* WARNING: don't use N after this point unless you copy this variable */
+#if 0
+ if(opt.summary_eip_limit && opt.summary_eip_limit < N)
+ N=opt.summary_eip_limit;
+#endif
+
+ /* Now print the results */
+ printf(" cr3 values:\n");
+ for(i=0; i<N; i++) {
+ char desc[30];
+ struct time_struct first, last;
+
+ p = qsort_array[i];
+
+ abs_cycles_to_time(p->first_time, &first);
+ abs_cycles_to_time(p->last_time, &last);
+
+
+ snprintf(desc, 30, " %8llx (id %d)", p->gmfn, p->cr3_id);
+ print_cycle_summary(&p->total_time, desc);
+ snprintf(desc, 30, " guest");
+ print_cycle_percent_summary(&p->guest_time, p->run_time, desc);
+ snprintf(desc, 30, " hv ");
+ print_cycle_percent_summary(&p->hv_time, p->run_time, desc);
+
+ hvm_short_summary(&p->hvm, p->run_time, " + ");
+#if 0
+ printf(" Seen: %4lu.%09lu-%4lu.%09lu switch %d flush %d\n",
+ first.s, first.ns,
+ last.s, last.ns,
+ p->switch_count, p->flush_count);
+ if(p->destroy.callback)
+ printf(" destroy: flush %u switch %u fixup %u emulate %u\n",
+ p->destroy.flush_count,
+ p->destroy.switch_count,
+ p->destroy.fixup_user,
+ p->destroy.emulate_corr_user);
+#endif
+ }
+
+ free(qsort_array);
+}
+
+void hvm_cr3_write_summary(struct hvm_data *h) {
+ int j;
+
+ for(j=0; j<RESYNCS_MAX; j++)
+ PRINT_SUMMARY(h->summary.cr3_write_resyncs[j],
+ " *[%3d] ", j);
+ PRINT_SUMMARY(h->summary.cr3_write_resyncs[j],
+ " *[MAX] ");
+}
+
+void hvm_cr_write_summary(struct hvm_data *h)
+{
+ int i;
+
+ for(i=0; i<CR_MAX; i++)
+ {
+ PRINT_SUMMARY(h->summary.cr_write[i],
+ " cr%d ", i);
+ switch(i) {
+ case 3:
+ hvm_cr3_write_summary(h);
+ break;
+ }
+ }
+}
+
+void hvm_cr_write_postprocess(struct hvm_data *h)
+{
+ if(h->inflight.cr_write.cr == 3) {
+ struct vcpu_data *v = h->v;
+ unsigned long long new_val = h->inflight.cr_write.val;
+ unsigned long long oval;
+ int flush=0;
+
+ if(v->cr3.val) {
+ oval = v->cr3.val;
+
+ if(new_val == oval) {
+ if(v->cr3.data) {
+ v->cr3.data->flush_count++;
+ if(v->cr3.data->destroy.callback)
+ v->cr3.data->destroy.flush_count++;
+ }
+ flush=1;
+ }
+
+ if(opt.dump_cooked)
+ {
+ printf(" %s cr_write cr3 val %llx oval %llx (%d resyncs) %s\n",
+ h->dump_header,
+ new_val,
+ oval,
+ h->resyncs,
+ flush?"flush":"");
+ }
+ } else {
+ if(opt.dump_cooked)
+ {
+ printf(" %s cr_write cr3 val %llx (%d resyncs)\n",
+ h->dump_header,
+ h->inflight.cr_write.val,
+ h->resyncs);
+ }
+ }
+
+ if(opt.summary_info) {
+ int resyncs = h->resyncs;
+
+ if(resyncs > RESYNCS_MAX)
+ resyncs = RESYNCS_MAX;
+
+ update_summary(&h->summary.cr3_write_resyncs[resyncs],
+ h->arc_cycles);
+
+ update_summary(&h->summary.cr_write[3],
+ h->arc_cycles);
+
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_CR3);
+ }
+
+ if(!flush)
+ cr3_switch(new_val, h);
+ } else {
+ if(opt.dump_cooked)
+ {
+ printf(" %s cr_write cr%d val %llx\n",
+ h->dump_header,
+ h->inflight.cr_write.cr,
+ h->inflight.cr_write.val);
+ }
+
+ if(opt.summary_info)
+ {
+ if(h->inflight.cr_write.cr < CR_MAX)
+ update_summary(&h->summary.cr_write[h->inflight.cr_write.cr],
+ h->arc_cycles);
+
+ }
+ }
+
+ /* Set summary handler */
+ /* FIXME - deal with cr_read_summary */
+ if(h->exit_reason < h->exit_reason_max)
+ {
+ if ( opt.svm_mode ) {
+ /* For svm, only need a summary for cr3 */
+ if ( h->exit_reason == VMEXIT_CR3_WRITE )
+ hvm_set_summary_handler(h, hvm_cr3_write_summary);
+ } else {
+ hvm_set_summary_handler(h, hvm_cr3_write_summary);
+ }
+ }
+
+}
+
+void hvm_cr_write_process(struct hvm_data *h)
+{
+ struct {
+ /* FIXME -- 64-bit values */
+ unsigned cr;
+ unsigned val;
+ } *r = (typeof(r))h->d;
+
+ h->inflight.cr_write.cr = r->cr;
+ h->inflight.cr_write.val = r->val;
+
+ h->post_process = hvm_cr_write_postprocess;
+
+ if(opt.dump_all)
+ {
+ if(r->cr == 3 && h->v->cr3.val) {
+ printf("]%s cr_write cr3 val %llx oval %llx %s\n",
+ h->dump_header,
+ (unsigned long long)r->val,
+ h->v->cr3.val,
+ (h->v->cr3.val == r->val)?"flush":"switch");
+ } else {
+ printf(" %s cr_write cr%d val %llx\n",
+ h->dump_header,
+ r->cr, (unsigned long long)r->val);
+
+ }
+ }
+
+}
+
+/* msr_write */
+void hvm_msr_write_summary(struct hvm_data *h)
+{
+}
+
+void hvm_msr_write_postprocess(struct hvm_data *h)
+{
+ if(opt.dump_cooked)
+ {
+ printf(" %s msr_write msr %d val %llx\n",
+ h->dump_header,
+ h->inflight.msr.addr,
+ h->inflight.msr.val);
+ }
+
+ if(opt.summary_info) {
+ }
+
+ /* Set summary handler */
+ hvm_set_summary_handler(h, hvm_msr_write_summary);
+}
+
+void hvm_msr_write_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned long long val;
+ unsigned long addr;
+ } *r = (typeof(r))h->d;
+
+ if(ri->extra_words != (sizeof(*r)/sizeof(unsigned long)))
+ {
+ fprintf(warn, "FATAL: msr_write extra_words %d, expected %d!\n",
+ ri->extra_words, sizeof(*r)/sizeof(unsigned long));
+ exit(1);
+ }
+
+ h->inflight.msr.addr = r->addr;
+ h->inflight.msr.val = r->val;
+
+ if(opt.dump_all)
+ {
+ printf(" %s msr_write addr %lx val %llx\n",
+ h->dump_header,
+ r->addr, r->val);
+ }
+
+ h->post_process = hvm_msr_write_postprocess;
+}
+
+/* msr_read */
+void hvm_msr_read_summary(struct hvm_data *h)
+{
+}
+
+void hvm_msr_read_postprocess(struct hvm_data *h)
+{
+ if(opt.dump_cooked)
+ {
+ printf(" %s msr_read msr %d val %llx\n",
+ h->dump_header,
+ h->inflight.msr.addr,
+ h->inflight.msr.val);
+ }
+
+ if(opt.summary_info) {
+ }
+
+ /* Set summary handler */
+ hvm_set_summary_handler(h, hvm_msr_read_summary);
+}
+
+void hvm_msr_read_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned long long val;
+ unsigned long addr;
+ } *r = (typeof(r))h->d;
+
+ if(ri->extra_words != (sizeof(*r)/sizeof(unsigned long)))
+ {
+ fprintf(warn, "FATAL: msr_read extra_words %d, expected %d!\n",
+ ri->extra_words, sizeof(*r)/sizeof(unsigned long));
+ exit(1);
+ }
+
+ h->inflight.msr.addr = r->addr;
+ h->inflight.msr.val = r->val;
+
+ if(opt.dump_all)
+ {
+ printf(" %s msr_read addr %lx val %llx\n",
+ h->dump_header,
+ r->addr, r->val);
+ }
+
+ h->post_process = hvm_msr_read_postprocess;
+}
+
+void hvm_vmcall_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned long eax;
+ } *r = (typeof(r))h->d;
+
+ if(opt.dump_all) {
+ if(r->eax < HYPERCALL_MAX)
+ printf(" %s vmcall %2lx (%s)\n",
+ h->dump_header,
+ r->eax,
+ hypercall_name[r->eax]);
+ else
+ printf(" %s vmcall %2lx\n",
+ h->dump_header,
+ r->eax);
+ }
+
+#if 0
+ if(opt.summary)
+ hvm_set_summary_handler(h, hvm_vmcall_summary);
+#endif
+}
+
+void hvm_inj_exc_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned vec, ec;
+ } *r = (typeof(r))h->d;
+
+ if ( opt.dump_cooked || opt.dump_all )
+ {
+ if(r->vec < HVM_TRAP_MAX)
+ printf(" %3lu.%09lu %s inj_exc trap %s ec %x\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+ hvm_trap_name[r->vec], r->ec);
+ else
+ printf(" %3lu.%09lu %s inj_exc trap %u ec %x\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+ r->vec, r->ec);
+ }
+
+}
+
+void hvm_intr_summary(struct hvm_data *h)
+{
+ int i;
+
+ for(i=0; i<EXTERNAL_INTERRUPT_MAX; i++)
+ if(h->summary.extint[i])
+ {
+ if(hvm_extint_vector_name[i])
+ printf(" %10s(%3d): %d\n",
+ hvm_extint_vector_name[i],
+ i,
+ h->summary.extint[i]);
+ else
+ printf(" [%3d]: %d\n",
+ i,
+ h->summary.extint[i]);
+ }
+ if(h->summary.extint[EXTERNAL_INTERRUPT_MAX])
+ printf(" Other: : %d\n",
+ h->summary.extint[EXTERNAL_INTERRUPT_MAX]);
+}
+
+
+void hvm_intr_process(struct hvm_data *h)
+{
+ unsigned vec = *(unsigned *)h->d;
+
+ if( (h->rip >> ADDR_SPACE_BITS) != 00
+ && (h->rip >> ADDR_SPACE_BITS) != ((0ULL-1)>> ADDR_SPACE_BITS) ) {
+ fprintf(stderr, "Unexpected rip %llx (shift %llx)\n",
+ h->rip,
+ h->rip >> ADDR_SPACE_BITS);
+ exit(1);
+ }
+
+ if ( opt.dump_cooked || opt.dump_all )
+ {
+ if ( vec < EXTERNAL_INTERRUPT_MAX &&
+ hvm_extint_vector_name[vec] )
+ printf(" %s intr vec %s(%x)\n",
+ h->dump_header,
+ hvm_extint_vector_name[vec],
+ vec);
+ else
+ printf(" %s intr vec %x\n",
+ h->dump_header, vec);
+ }
+
+ if(opt.scatterplot_interrupt_eip
+ && vec == opt.scatterplot_interrupt_vector)
+ {
+ /* Truncate to 40 bits */
+ unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1);
+
+ /* Want absolute tsc to global tsc */
+ scatterplot_vs_time(h->exit_tsc, rip);
+ }
+
+ if(opt.histogram_interrupt_eip
+ && vec == opt.histogram_interrupt_vector)
+ {
+ /* Truncate to 40 bits */
+ unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1);
+ unsigned index = rip / opt.histogram_interrupt_increment;
+
+ h->summary.extint_histogram[index]++;
+ }
+
+ if(opt.summary_info) {
+ if(opt.summary)
+ hvm_set_summary_handler(h, hvm_intr_summary);
+
+ if(vec < EXTERNAL_INTERRUPT_MAX)
+ h->summary.extint[vec]++;
+ else
+ h->summary.extint[EXTERNAL_INTERRUPT_MAX]++;
+ }
+}
+
+void hvm_pf_inject_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned cr2, ec;
+ } *r = (typeof(r))h->d;
+
+ if ( opt.dump_cooked || opt.dump_all )
+ {
+ printf(" %3lu.%09lu %s pf_inject guest_cr2 %x guest_ec %x\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+ r->cr2, r->ec);
+ }
+}
+
+void hvm_exception_nmi_generic_postprocess(struct hvm_data *h)
+{
+ static int warned = 0;
+ if(opt.dump_cooked)
+ {
+ printf(" %s exnmi no_handler\n",
+ h->dump_header);
+ }
+
+ if(opt.summary_info)
+ update_summary(&h->summary.pf_xen[PF_XEN_NO_HANDLER],
+ h->arc_cycles);
+
+ if(!warned) {
+ warned++;
+ fprintf(warn, "Strange, no handler for exnmi!\n");
+ }
+}
+
+void hvm_generic_postprocess(struct hvm_data *h)
+{
+ if(opt.summary_info) {
+ /* HLT checked at hvm_vmexit_close() */
+ }
+
+ if(opt.dump_cooked)
+ {
+ int i, evt = h->event_handler;
+
+ if(evt < HVM_EVENT_HANDLER_MAX)
+ printf(" %s %s [",
+ h->dump_header,
+ hvm_event_handler_name[evt]);
+ else
+ printf(" %s %d [",
+ h->dump_header,
+ evt);
+
+ for(i=0; i<4; i++) {
+ printf(" %x", h->inflight.generic.d[i]);
+ }
+
+ printf(" ]\n");
+ }
+}
+
+void hvm_generic_dump(struct record_info *ri, char * prefix)
+{
+ struct {
+ unsigned vcpu:16, domain:16;
+ unsigned d[4];
+ } *cr = (typeof(cr))ri->d;
+
+ char *evt_string, evt_number[256];
+ unsigned *d;
+ int i, evt = ri->event - TRC_HVM_HANDLER;
+
+ if(evt < HVM_EVENT_HANDLER_MAX)
+ {
+ evt_string = hvm_event_handler_name[evt];
+ }
+ else
+ {
+ snprintf(evt_number, 256, "hvm_handler %d", evt);
+ evt_string = evt_number;
+ }
+
+ d = ri->d;
+ printf("%s%s %s [",
+ prefix,
+ ri->dump_header,
+ evt_string);
+
+ for(i=0; i<4; i++)
+ {
+ printf(" %x", d[i]);
+ }
+
+ printf(" ]\n");
+}
+
+void hvm_handler_process(struct record_info *ri, struct hvm_data *h) {
+ /* Wait for first vmexit to initialize */
+ if(!h->init)
+ {
+ if(opt.dump_all)
+ hvm_generic_dump(ri,"!");
+ return;
+ }
+
+ h->d = ri->d;
+
+ /* Handle things that don't need a vmexit */
+ switch(ri->event) {
+ default:
+ goto needs_vmexit;
+ /* Records about changing guest state */
+ case TRC_HVM_PF_INJECT:
+ hvm_pf_inject_process(ri, h);
+ break;
+ case TRC_HVM_REINJ_VIRQ:
+ if ( opt.dump_cooked || opt.dump_all )
+ {
+ printf(" %3lu.%09lu %s inj_virq vec %u\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+ *(unsigned*)h->d);
+ }
+ break;
+ case TRC_HVM_INJ_EXC:
+ hvm_inj_exc_process(ri, h);
+ break;
+ case TRC_HVM_INJ_VIRQ:
+ hvm_inj_virq_process(ri, h);
+ break;
+ case TRC_HVM_OP_DESTROY_PROC:
+ if(h->v->cr3.data) {
+ struct cr3_value_struct *cur = h->v->cr3.data;
+ if(cur->destroy.callback)
+ fprintf(warn, "Strange, double callback for cr3 gmfn %llx!\n",
+ cur->gmfn);
+ cur->destroy.callback = 1;
+ } else if(opt.with_cr3_enumeration) {
+ fprintf(warn, "Warning: destroy_proc: don't know current cr3\n");
+ }
+ if ( opt.dump_cooked || opt.dump_all )
+ {
+ printf(" %3lu.%09lu %s destroy_proc cur_cr3 %llx\n",
+ ri->t.s, ri->t.ns, pcpu_string(ri->cpu), h->v->cr3.val);
+ }
+ break;
+ }
+
+ return;
+
+needs_vmexit:
+ /* Wait for the next vmexit */
+ if(!h->vmexit_valid)
+ {
+ if(opt.dump_all)
+ hvm_generic_dump(ri,"!");
+ return;
+ }
+
+ /* Keep generic "event handler" info */
+ h->event_handler = ri->event - TRC_HVM_HANDLER;
+
+ switch(ri->event) {
+ /* Records adding to the vmexit reason */
+ case TRC_HVM_INTR:
+ hvm_intr_process(h);
+ break;
+ case TRC_HVM_PF_XEN:
+ case TRC_HVM_PF_XEN64:
+ hvm_pf_xen_process(ri, h);
+ break;
+ case TRC_HVM_IO_READ:
+ hvm_io_read_process(h);
+ break;
+ case TRC_HVM_IO_WRITE:
+ hvm_io_write_process(h);
+ break;
+ case TRC_HVM_IO_ASSIST:
+ hvm_io_assist_process(h);
+ break;
+ case TRC_HVM_MMIO_ASSIST:
+ case TRC_HVM_MMIO_ASSIST|HVM_MMIO_ASSIST_WRITE:
+ hvm_mmio_assist_process(ri, h);
+ break;
+ case TRC_HVM_CR_WRITE:
+ hvm_cr_write_process(h);
+ break;
+ case TRC_HVM_MSR_WRITE:
+ hvm_msr_write_process(ri, h);
+ break;
+ case TRC_HVM_MSR_READ:
+ hvm_msr_read_process(ri, h);
+ break;
+ case TRC_HVM_VMMCALL:
+ hvm_vmcall_process(ri, h);
+ break;
+ case TRC_HVM_CR_READ:
+ case TRC_HVM_DR_READ:
+ case TRC_HVM_DR_WRITE:
+ case TRC_HVM_CPUID:
+ case TRC_HVM_NMI:
+ case TRC_HVM_SMI:
+ case TRC_HVM_HLT:
+ case TRC_HVM_INVLPG:
+ case TRC_HVM_MCE:
+ case TRC_HVM_CLTS:
+ case TRC_HVM_LMSW:
+ if ( h->post_process != NULL )
+ fprintf(warn, "Strange, h->postprocess already set!\n");
+ h->inflight.generic.event = ri->event;
+ bcopy(h->d, h->inflight.generic.d, sizeof(unsigned int) * 4);
+ h->post_process = hvm_generic_postprocess;
+ if(opt.dump_all)
+ {
+ hvm_generic_dump(ri, "]");
+ }
+ break;
+ }
+}
+
+void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc);
+void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
+ tsc_t tsc, int new_runstate);
+struct vcpu_data * vcpu_find(int did, int vid);
+void lose_vcpu(struct vcpu_data *v, tsc_t tsc);
+
+int domain_runstate(struct domain_data *d) {
+ int i;
+ int runstates[RUNSTATE_MAX];
+ int ret=-1;
+
+ if(d->did == DEFAULT_DOMAIN)
+ return 0;
+
+ for(i=0; i<RUNSTATE_MAX; i++)
+ runstates[i]=0;
+
+ for(i=0; i<=d->max_vid; i++)
+ if(d->vcpu[i])
+ runstates[d->vcpu[i]->runstate.state]++;
+
+ if(runstates[RUNSTATE_LOST])
+ ret=DOMAIN_RUNSTATE_LOST;
+ else if(runstates[RUNSTATE_RUNNING])
+ {
+ if(runstates[RUNSTATE_RUNNABLE])
+ ret=DOMAIN_RUNSTATE_CONCURRENCY_HAZARD;
+ else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+ ret= DOMAIN_RUNSTATE_PARTIAL_RUN;
+ else
+ ret= DOMAIN_RUNSTATE_FULL_RUN;
+ }
+ else if(runstates[RUNSTATE_RUNNABLE])
+ {
+ if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+ ret= DOMAIN_RUNSTATE_PARTIAL_CONTENTION;
+ else
+ ret= DOMAIN_RUNSTATE_FULL_CONTENTION;
+ }
+ else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+ {
+ ret= DOMAIN_RUNSTATE_BLOCKED;
+ } else {
+ fprintf(warn, "Strange, no meaningful runstates for d%d!\n",
+ d->did);
+ }
+
+#if 0
+ printf(" Max vid: %d\n", d->max_vid);
+ for(i=0; i<=d->max_vid; i++)
+ if(d->vcpu[i])
+ fprintf(warn, " v%d: %s\n",
+ i, runstate_name[d->vcpu[i]->runstate]);
+
+ for(i=0; i<RUNSTATE_MAX; i++)
+ fprintf(warn, " %s: %d\n",
+ runstate_name[i], runstates[i]);
+#endif
+
+ if(ret >= 0)
+ return ret;
+ else
+ exit(1);
+}
+
+static inline void runstate_update(struct vcpu_data *v, int new_runstate,
+ tsc_t tsc)
+{
+ struct domain_data *d = v->d;
+ if(v->runstate.tsc > 0 && v->runstate.tsc < tsc) {
+ update_cycles(v->runstates + v->runstate.state, tsc - v->runstate.tsc);
+
+ if(v->runstate.state == RUNSTATE_RUNNING)
+ update_cycles(&v->d->total_time, tsc - v->runstate.tsc);
+
+ /* How much did dom0 run this buffer? */
+ if(v->d->did == 0) {
+ int i;
+ for(i=0; i<MAX_CPUS; i++) {
+ struct pcpu_info * p = P.pcpu + i;
+ tsc_t start_tsc;
+ if(!p->active)
+ continue;
+ start_tsc = (p->volume.buffer_first_tsc > v->runstate.tsc) ?
+ p->volume.buffer_first_tsc :
+ v->runstate.tsc;
+ p->volume.buffer_dom0_runstate_cycles[v->runstate.state]
+ += tsc - start_tsc;
+#if 0
+ printf(" - updated p%d dom0_runstate %s to %lld cycles (+%lld)\n",
+ p->pid, runstate_name[v->runstate.state],
+ p->volume.buffer_dom0_runstate_cycles[v->runstate.state],
+ tsc - start_tsc);
+#endif
+ p->volume.buffer_dom0_runstate = new_runstate;
+ p->volume.buffer_dom0_runstate_tsc = tsc;
+ }
+ }
+ }
+ v->runstate.state = new_runstate;
+ v->runstate.tsc = tsc;
+
+ /* Determine the domain runstate */
+ if(d->runstate_tsc > 0 && d->runstate_tsc < tsc)
+ update_cycles(d->runstates + d->runstate, tsc - d->runstate_tsc);
+
+ d->runstate = domain_runstate(d);
+
+ d->runstate_tsc = tsc;
+}
+
+void hvm_vmexit_process(struct record_info *ri, struct hvm_data *h,
+ struct vcpu_data *v) {
+ struct {
+ union {
+ struct {
+ unsigned long long rip;
+ unsigned long exit_reason;
+ } x64;
+ struct {
+ unsigned long eip;
+ unsigned long exit_reason;
+ } x32;
+ };
+ } *r;
+
+ if(ri->extra_words != 2 && ri->extra_words != 3)
+ {
+ fprintf(warn, "FATAL: vmexit has unexpected extra words %d!\n",
+ ri->extra_words);
+ dump_generic(warn, ri);
+ exit(1);
+ }
+
+
+ r = (typeof(r))ri->d;
+
+ if(!h->init)
+ init_hvm_data(h, v);
+
+ h->vmexit_valid=1;
+
+ if(ri->event == TRC_HVM_VMEXIT64) {
+ if(v->guest_paging_levels != 4)
+ {
+ fprintf(warn, "%s: VMEXIT64, but guest_paging_levels %d. Switching to 4.\n",
+ __func__, v->guest_paging_levels);
+ v->guest_paging_levels = 4;
+ }
+ if(!is_valid_addr64(r->x64.rip))
+ fprintf(warn, "%s: invalid va %llx",
+ __func__, r->x64.rip);
+ h->rip = r->x64.rip;
+ h->exit_reason = r->x64.exit_reason;
+ } else {
+ if(v->guest_paging_levels == 4)
+ {
+ int new_paging_levels = opt.default_guest_paging_levels;
+ if(new_paging_levels == 4)
+ new_paging_levels = 2; /* Wild guess */
+ fprintf(warn, "%s: VMEXIT, but guest_paging_levels %d. Switching to %d(default).\n",
+ __func__, v->guest_paging_levels, new_paging_levels);
+ v->guest_paging_levels = new_paging_levels;
+ }
+ h->rip = r->x32.eip;
+ h->exit_reason = r->x32.exit_reason;
+ }
+
+ if(h->exit_reason > h->exit_reason_max)
+ {
+ fprintf(warn, "h->exit_reason %lx > exit_reason_max %lx!\n",
+ (unsigned long)h->exit_reason,
+ (unsigned long)h->exit_reason_max);
+ exit(1);
+ }
+
+ if(opt.dump_all) {
+ if ( h->exit_reason < h->exit_reason_max )
+ printf("]%s vmexit exit_reason %s eip %llx%s\n",
+ ri->dump_header,
+ h->exit_reason_name[h->exit_reason],
+ h->rip,
+ find_symbol(h->rip));
+ else
+ printf("]%s vmexit exit_reason %x eip %llx%s\n",
+ ri->dump_header,
+ h->exit_reason,
+ h->rip,
+ find_symbol(h->rip));
+ }
+
+ if(h->v->cr3.data && h->entry_tsc) {
+ update_cycles(&h->v->cr3.data->guest_time,
+ ri->tsc - h->entry_tsc);
+ h->v->cr3.data->run_time += (ri->tsc - h->entry_tsc);
+ }
+
+ h->exit_tsc = ri->tsc;
+ h->entry_tsc = 0;
+ h->resyncs = 0;
+ h->prealloc_unpin = 0;
+ h->short_summary_done = 0;
+
+ if(!opt.svm_mode && h->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+ {
+ bzero(&h->inflight.pf_xen, sizeof(h->inflight.pf_xen));
+ h->post_process = hvm_exception_nmi_generic_postprocess;
+ } else {
+ h->post_process = NULL;
+ }
+
+ {
+ struct time_struct t;
+
+ abs_cycles_to_time(h->exit_tsc, &t);
+
+ if(t.time)
+ {
+ snprintf(h->dump_header,
+ DUMP_HEADER_MAX,
+ "%3lu.%09lu %s", t.s, t.ns, pcpu_string(ri->cpu));
+ }
+ else
+ {
+ snprintf(h->dump_header,
+ DUMP_HEADER_MAX,
+ " %s", pcpu_string(ri->cpu));
+ }
+ }
+}
+
+void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) {
+
+ if(h->exit_tsc) {
+ if(h->exit_tsc > tsc)
+ h->arc_cycles = 0;
+ else {
+ h->arc_cycles = tsc - h->exit_tsc;
+
+ if(opt.summary_info) {
+ update_summary(&h->summary.exit_reason[h->exit_reason],
+ h->arc_cycles);
+ h->summary_info = 1;
+ }
+ }
+ }
+
+ if(h->post_process)
+ (h->post_process)(h);
+
+ if(h->arc_cycles) {
+ if(opt.summary_info && !h->short_summary_done) {
+ switch(h->event_handler) {
+ case HVM_EVENT_HANDLER_VMCALL:
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_VMCALL);
+ break;
+ case HVM_EVENT_HANDLER_INTR:
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_INTERRUPT);
+ break;
+ case HVM_EVENT_HANDLER_HLT:
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_HLT);
+ break;
+ default:
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_OTHER);
+ break;
+ }
+ }
+
+
+ if(h->v->cr3.data) {
+ h->v->cr3.data->run_time += h->arc_cycles;
+
+ if(opt.summary_info)
+ update_cycles(&h->v->cr3.data->hv_time,
+ h->arc_cycles);
+ }
+ }
+
+ h->exit_tsc = 0;
+ h->vmexit_valid = 0;
+ h->post_process = NULL;
+
+}
+
+void hvm_vmentry_process(struct record_info *ri, struct hvm_data *h) {
+ if(!h->init)
+ {
+ if(opt.dump_all)
+ printf("!%s vmentry\n",
+ ri->dump_header);
+ return;
+ }
+
+ /* Vista bug
+ * This has to be done here because irqs are injected on the path out
+ * to vmexit. */
+ if(h->v->vlapic.ipi_virq_injected)
+ {
+ struct vcpu_data *v = h->v;
+ struct vlapic_struct *vla = &v->vlapic;
+ unsigned long long lat=0;
+
+ if(ri->tsc > vla->first_ipi_tsc)
+ lat = ri->tsc - vla->first_ipi_tsc;
+ else
+ fprintf(stderr, "Strange, first_ipi_tsc %lld > ri->tsc %lld!\n",
+ vla->first_ipi_tsc, ri->tsc);
+
+ if((opt.dump_all || opt.dump_cooked) && vla->outstanding_ipis > 1)
+ {
+ struct time_struct t;
+ cycles_to_time(lat, &t);
+ printf(" d%dv%d received %d ipis, latency %lld (%lu.%09lu s)\n",
+ v->d->did, v->vid, vla->outstanding_ipis, lat,
+ t.s, t.ns);
+ }
+
+ if(opt.summary_info)
+ {
+ update_summary(&h->summary.ipi_latency, lat);
+ h->summary.ipi_count[vla->outstanding_ipis]++;
+
+ }
+
+ if(opt.dump_cooked || opt.dump_ipi_latency) {
+ struct time_struct t;
+ abs_cycles_to_time(ri->tsc, &t);
+ printf("Ipis: %d latency: %lld (time %lu.%09lu s)\n",
+ vla->outstanding_ipis, lat, t.s, t.ns);
+ }
+
+ clear_vlapic(vla);
+ }
+
+ if(h->w2h.waking && opt.dump_all)
+ printf(" [w2h] d%dv%d Finishing waking\n",
+ h->v->d->did, h->v->vid);
+
+ h->w2h.waking = 0;
+
+ if ( h->w2h.interrupts_wanting_tsc ) {
+ int i;
+ for(i=0; i<GUEST_INTERRUPT_MAX; i++)
+ {
+ if ( h->summary.guest_interrupt[i].start_tsc == 1 )
+ {
+ if(opt.dump_all)
+ printf(" [w2h] d%dv%d Setting vec %d tsc to %lld\n",
+ h->v->d->did, h->v->vid, i, ri->tsc);
+ h->summary.guest_interrupt[i].start_tsc = ri->tsc;
+ h->w2h.interrupts_wanting_tsc--;
+ if ( h->w2h.interrupts_wanting_tsc == 0 )
+ break;
+ }
+ }
+ }
+
+ if(!h->vmexit_valid)
+ {
+ if(opt.dump_all)
+ printf("!%s vmentry\n",
+ ri->dump_header);
+ return;
+ }
+
+ if(opt.dump_all) {
+ printf("]%s vmentry\n",
+ ri->dump_header);
+ }
+
+ hvm_close_vmexit(h, ri->tsc);
+ h->entry_tsc = ri->tsc;
+}
+
+void hvm_process(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+ struct vcpu_data *v = p->current;
+ struct hvm_data *h = &v->hvm;
+
+ if(v->d->did == IDLE_DOMAIN) {
+ fprintf(warn, "%s: Strange, hvm record for idle domain!\n",
+ __func__);
+ return;
+ }
+
+ if(!v->data_type)
+ v->data_type = VCPU_DATA_HVM;
+ else if(v->data_type != VCPU_DATA_HVM) {
+ fprintf(warn, "Non-hvm data type %d for d%dv%d on proc %d, skipping\n",
+ v->data_type, v->d->did, v->vid, p->pid);
+ dump_generic(warn, ri);
+ return;
+ }
+
+
+ if(ri->evt.sub == 2)
+ {
+ UPDATE_VOLUME(p, hvm[HVM_VOL_HANDLER], ri->size);
+ hvm_handler_process(ri, h);
+ }
+ else
+ {
+ switch(ri->event) {
+ /* HVM */
+ case TRC_HVM_VMEXIT:
+ case TRC_HVM_VMEXIT64:
+ UPDATE_VOLUME(p, hvm[HVM_VOL_VMEXIT], ri->size);
+ hvm_vmexit_process(ri, h, v);
+ break;
+ case TRC_HVM_VMENTRY:
+ UPDATE_VOLUME(p, hvm[HVM_VOL_VMENTRY], ri->size);
+ hvm_vmentry_process(ri, &p->current->hvm);
+ break;
+ default:
+ fprintf(warn, "Unknown hvm event: %x", ri->event);
+ }
+ }
+}
+
+void hvm_summary(struct hvm_data *h) {
+ int i;
+
+ if(!h->summary_info)
+ return;
+
+ printf("Exit reasons:\n");
+ for(i=0; i<h->exit_reason_max; i++) {
+ PRINT_SUMMARY(h->summary.exit_reason[i],
+ " %-20s ", h->exit_reason_name[i]);
+ if(h->exit_reason_summary_handler[i])
+ h->exit_reason_summary_handler[i](h);
+ }
+
+ printf("Guest interrupt counts:\n");
+ for(i=0; i<GUEST_INTERRUPT_MAX; i++)
+ if(h->summary.guest_interrupt[i].count) {
+ int j;
+ printf(" [%3d] %d\n",
+ i, h->summary.guest_interrupt[i].count);
+ for(j=1; j<GUEST_INTERRUPT_CASE_MAX; j++) {
+ char desc[80];
+ snprintf(desc, 80, " * %s", guest_interrupt_case_name[j]);
+ print_cycle_summary(h->summary.guest_interrupt[i].runtime+j, desc);
+ }
+ }
+ if(h->summary.guest_interrupt[i].count)
+ printf(" [%d+] %d\n",
+ i, h->summary.guest_interrupt[i].count);
+
+ if(opt.histogram_interrupt_eip)
+ {
+ unsigned max = ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment);
+ printf("Interrupt eip histogram:\n");
+ for(i=0; i<max; i++)
+ if(h->summary.extint_histogram[i])
+ {
+ printf("[%llx-%llx]: %d\n",
+ opt.histogram_interrupt_increment * i,
+ (opt.histogram_interrupt_increment * (i+1)) - 1,
+ h->summary.extint_histogram[i]);
+ }
+ }
+
+ PRINT_SUMMARY(h->summary.ipi_latency,
+ "IPI latency \n");
+ for(i=0; i<256; i++)
+ if(h->summary.ipi_count[i])
+ printf(" [%3d] %10d\n",
+ i, h->summary.ipi_count[i]);
+ hvm_io_address_summary(h->summary.pio, "IO address summary:");
+}
+
+/* ---- Shadow records ---- */
+union shadow_event
+{
+ unsigned event;
+ struct {
+ unsigned minor:8,
+ paging_levels:4;
+ };
+};
+
+/* WARNING - not thread safe */
+#define FLAGSTRING(_name, _char) \
+ if(e->flag_ ## _name) \
+ flagstring[i] = _char; \
+ i++;
+
+char * flag_string(struct pf_xen_extra *e)
+{
+ static char flagstring[32];
+ int i=0;
+
+ for(i=0; i<32; i++)
+ flagstring[i]='-';
+
+ i=0;
+
+ if(e->flag_set_ad)
+ flagstring[i]='d';
+ else if(e->flag_set_a)
+ flagstring[i]='a';
+ i++;
+
+ FLAGSTRING(shadow_l1_get_ref, 'g');
+ FLAGSTRING(shadow_l1_put_ref, 'p');
+ //FLAGSTRING(l2_propagate, '2');
+ FLAGSTRING(demote, 'D');
+ FLAGSTRING(promote, 'P');
+ FLAGSTRING(wrmap, 'w');
+ FLAGSTRING(wrmap_guess_found, 'G');
+ //FLAGSTRING(wrmap_brute_force, 'b');
+ FLAGSTRING(early_unshadow, 'e');
+ FLAGSTRING(prealloc_unhook, 'H');
+ FLAGSTRING(unsync, 'u');
+ FLAGSTRING(oos_fixup_add, 'a');
+ FLAGSTRING(oos_fixup_evict, 'x');
+
+ flagstring[i]=0;
+
+ return flagstring;
+}
+
+void shadow_emulate_postprocess(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if ( opt.summary_info )
+ {
+ update_eip(&h->v->d->emulate_eip_list,
+ h->rip,
+ h->arc_cycles,
+ 0, NULL);
+ update_summary(&h->summary.pf_xen[PF_XEN_EMULATE], h->arc_cycles);
+ update_summary(&h->summary.pf_xen_emul[e->pt_level], h->arc_cycles);
+ if(h->prealloc_unpin)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNPIN], h->arc_cycles);
+ if(e->flag_prealloc_unhook)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNHOOK], h->arc_cycles);
+ if(e->flag_early_unshadow)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_EARLY_UNSHADOW], h->arc_cycles);
+ if(e->flag_set_changed)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_CHANGED], h->arc_cycles);
+ else
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_UNCHANGED], h->arc_cycles);
+ if(e->flag_set_flush)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_FLUSH], h->arc_cycles);
+ if(e->flag_set_error)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_ERROR], h->arc_cycles);
+ if(e->flag_promote)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PROMOTE], h->arc_cycles);
+ if(e->flag_demote)
+ update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_DEMOTE], h->arc_cycles);
+ /* more summary info */
+
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE);
+ }
+
+ if ( opt.dump_cooked )
+ {
+ printf(" %s emulate va %llx eip %llx%s lvl %d/%d corr %llx wval %llx flags %s\n",
+ h->dump_header, e->va,
+ h->rip, find_symbol(h->rip),
+ e->pt_level, h->v->guest_paging_levels,
+ e->corresponding_va,
+ e->wval,
+ flag_string(e));
+ }
+
+
+ if(opt.scatterplot_unpin_promote) {
+ if(e->flag_early_unshadow)
+ scatterplot_vs_time(h->exit_tsc, -10);
+ if(h->prealloc_unpin)
+ scatterplot_vs_time(h->exit_tsc, 0);
+ if(e->flag_promote) {
+ if(opt.with_cr3_enumeration) {
+ if(h->v->cr3.data)
+ scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id);
+ } else
+ scatterplot_vs_time(h->exit_tsc, 2);
+ }
+ }
+
+
+}
+
+void shadow_emulate_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ union {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ struct {
+ unsigned gl1e, write_val;
+ unsigned va;
+ unsigned flags:29, emulation_count:3;
+ } gpl2;
+ struct {
+ unsigned long long gl1e, write_val;
+ unsigned va;
+ unsigned flags:29, emulation_count:3;
+ } gpl3;
+ struct {
+ unsigned long long gl1e, write_val;
+ unsigned long long va;
+ unsigned flags:29, emulation_count:3;
+ } gpl4;
+ } *r = (typeof(r))ri->d;
+
+ int gpl = h->v->guest_paging_levels;
+
+ /* Fill in extended information */
+ switch(gpl)
+ {
+ case 2:
+ if(sizeof(r->gpl2) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl2.va;
+ e->flags = r->gpl2.flags;
+ e->gl1e = r->gpl2.gl1e;
+ e->wval = r->gpl2.write_val;
+ break;
+ case 3:
+ if(sizeof(r->gpl3) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl3), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl3.va;
+ e->flags = r->gpl3.flags;
+ e->gl1e = r->gpl3.gl1e;
+ e->wval = r->gpl3.write_val;
+ break;
+ case 4:
+ if(sizeof(r->gpl4) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl4.va;
+ e->flags = r->gpl4.flags;
+ e->gl1e = r->gpl4.gl1e;
+ e->wval = r->gpl4.write_val;
+ break;
+ }
+
+ pf_preprocess(e,gpl);
+
+ if(opt.dump_all)
+ printf("]%s emulate va %llx gl1e %8llx wval %8llx flags %s(%x) pt_level %d corr %8llx\n",
+ ri->dump_header,
+ e->va,
+ e->gl1e, e->wval,
+ flag_string(e), e->flags,
+ e->pt_level, e->corresponding_va);
+
+ h->post_process = shadow_emulate_postprocess;
+
+}
+
+struct shadow_emulate_other {
+ unsigned long long gfn, va;
+};
+
+#define SHADOW_OTHER_LOGS_GFN_NOT_GMFN 1
+
+void shadow_parse_other(struct record_info *ri,
+ struct shadow_emulate_other *o,
+ int gpl) {
+ union {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN
+ /* D'OH! Accidentally used mfn_t in the struct, so gmfns are always
+ 64-bit... :-/ */
+ struct {
+ unsigned long gfn, va;
+ } gpl2;
+#endif
+ struct {
+ unsigned long long gfn;
+ unsigned long va;
+ } gpl3;
+ struct {
+ unsigned long long gfn, va;
+ } gpl4;
+ } *r = (typeof(r))ri->d;
+
+
+ switch(gpl)
+ {
+#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN
+ case 2:
+ if(sizeof(r->gpl2) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl2), gpl,
+ ri->extra_words * 4);
+ break;
+ }
+ o->va = r->gpl2.va;
+ o->gfn = r->gpl2.gfn;
+ break;
+#else
+ case 2:
+ /* FALLTHRU */
+#endif
+ case 3:
+ if(sizeof(r->gpl3) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl3), gpl,
+ ri->extra_words * 4);
+ break;
+ }
+ o->va = r->gpl3.va;
+ o->gfn = r->gpl3.gfn;
+ break;
+ case 4:
+ if(sizeof(r->gpl4) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl4), gpl,
+ ri->extra_words * 4);
+ break;
+ }
+ o->va = r->gpl4.va;
+ o->gfn = r->gpl4.gfn;
+ break;
+ }
+}
+
+#if 0
+void shadow_unsync_postprocess(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if(h->resyncs > 1)
+ fprintf(warn, "Strange, %d resyncs for an unsync!\n",
+ h->resyncs);
+
+ if(opt.summary_info) {
+ update_summary(&h->summary.pf_xen[PF_XEN_EMULATE_UNSYNC],
+ h->arc_cycles);
+ if(h->resyncs <= 1)
+ update_summary(&h->summary.pf_xen_unsync[h->resyncs],
+ h->arc_cycles);
+ }
+
+ if(opt.dump_cooked)
+ {
+ printf(" %s unsync gfn %llx %s (%d resyncs)\n",
+ h->dump_header, e->gfn,
+ h->resyncs?"(resync)":"",
+ h->resyncs);
+ }
+}
+
+
+void shadow_unsync_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ struct shadow_emulate_other r;
+
+ shadow_parse_other(ri, &r, h->v->guest_paging_levels);
+
+ e->gmfn = r.gmfn;
+ e->va = r.va;
+
+ pf_preprocess(e, h->v->guest_paging_levels);
+
+ if(opt.dump_all)
+ printf("]%s shadow unsync gmfn %llx va %llx pt_level %d corr %llx\n",
+ h->dump_header,
+ e->gmfn,
+ e->va,
+ e->pt_level,
+ e->corresponding_va);
+
+ h->post_process = shadow_unsync_postprocess;
+}
+#endif
+
+void shadow_fault_generic_postprocess(struct hvm_data *h);
+
+void shadow_emulate_other_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ struct shadow_emulate_other r;
+ union shadow_event sevt = { .event = ri->event };
+
+ shadow_parse_other(ri, &r, h->v->guest_paging_levels);
+
+ e->gfn = r.gfn;
+ e->va = r.va;
+
+ pf_preprocess(e, h->v->guest_paging_levels);
+
+ if(opt.dump_all)
+ printf("]%s shadow %s gfn %llx va %llx\n",
+ h->dump_header,
+ pf_xen_name[sevt.minor],
+ e->gfn,
+ e->va);
+
+ h->post_process = shadow_fault_generic_postprocess;
+}
+
+void shadow_fixup_postprocess(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if ( opt.summary_info )
+ {
+ update_summary(&h->summary.pf_xen[PF_XEN_FIXUP], h->arc_cycles);
+ if(h->prealloc_unpin) {
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PREALLOC_UNPIN], h->arc_cycles);
+ }
+ if(e->flag_unsync) {
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UNSYNC], h->arc_cycles);
+ if(h->resyncs < PF_XEN_FIXUP_UNSYNC_RESYNC_MAX)
+ update_summary(&h->summary.pf_xen_fixup_unsync_resync[h->resyncs],
+ h->arc_cycles);
+ else
+ update_summary(&h->summary.pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX],
+ h->arc_cycles);
+ }
+ if(e->flag_oos_fixup_add)
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_ADD], h->arc_cycles);
+ if(e->flag_oos_fixup_evict)
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_EVICT], h->arc_cycles);
+ if(e->flag_promote) {
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PROMOTE], h->arc_cycles);
+ if(e->flag_wrmap) {
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_WRMAP], h->arc_cycles);
+ if(e->flag_wrmap_brute_force)
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_BRUTE_FORCE], h->arc_cycles);
+ } else if(e->flag_wrmap_brute_force) {
+ fprintf(warn, "Strange: wrmap_bf but not wrmap!\n");
+
+ }
+
+ } else if(e->flag_wrmap)
+ fprintf(warn, "Strange, wrmap but not promote!\n");
+
+ if(!(e->flag_promote || h->prealloc_unpin || e->flag_unsync))
+ update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UPDATE_ONLY], h->arc_cycles);
+ /* more summary info */
+
+ if(e->flag_unsync)
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_UNSYNC);
+ else
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_FIXUP);
+ }
+
+ if ( opt.dump_cooked )
+ {
+ printf(" %s fixup%s va %llx eip %llx%s gl1e %llx flags %s\n",
+ h->dump_header,
+ e->flag_unsync?":unsync":"",
+ e->va,
+ h->rip, find_symbol(h->rip),
+ e->gl1e,
+ flag_string(e));
+ }
+
+ if(opt.scatterplot_unpin_promote) {
+ if(h->prealloc_unpin)
+ scatterplot_vs_time(h->exit_tsc, 0);
+ if(e->flag_promote) {
+ if(opt.with_cr3_enumeration) {
+ if(h->v->cr3.data)
+ scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id);
+ } else
+ scatterplot_vs_time(h->exit_tsc, 2);
+ }
+ }
+}
+
+void shadow_fixup_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ union {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ struct {
+ unsigned long gl1e, va, flags;
+ } gpl2;
+ struct {
+ unsigned long long gl1e;
+ unsigned long va, flags;
+ } gpl3;
+ struct {
+ unsigned long long gl1e, va;
+ unsigned long flags;
+ } gpl4;
+ } *r = (typeof(r))ri->d;
+ int gpl = h->v->guest_paging_levels;
+
+ switch(gpl)
+ {
+ case 2:
+ if(sizeof(r->gpl2) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl2.va;
+ e->flags = r->gpl2.flags;
+ e->gl1e = r->gpl2.gl1e;
+ break;
+ case 3:
+ if(sizeof(r->gpl3) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl3), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl3.va;
+ e->flags = r->gpl3.flags;
+ e->gl1e = r->gpl3.gl1e;
+ break;
+ case 4:
+ if(sizeof(r->gpl4) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl4.va;
+ e->flags = r->gpl4.flags;
+ e->gl1e = r->gpl4.gl1e;
+ break;
+ }
+
+ if(opt.dump_all)
+ printf("]%s fixup%s va %llx gl1e %llx flags (%x)%s\n",
+ h->dump_header,
+ e->flag_unsync?":unsync":"",
+ e->va, e->gl1e, e->flags,
+ flag_string(e));
+
+ h->post_process = shadow_fixup_postprocess;
+}
+
+void shadow_mmio_postprocess(struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+ if ( opt.summary_info )
+ {
+ update_summary(&h->summary.pf_xen[e->pf_case], h->arc_cycles);
+
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_MMIO);
+ }
+
+ if ( opt.dump_cooked )
+ {
+ if(e->mmio_data_valid)
+ printf(" %s %smmio %s va %llx eip %llx%s gpa %llx data %lx\n",
+ h->dump_header,
+ (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+ e->mmio_is_write?"write":"read",
+ e->va,
+ h->rip, find_symbol(h->rip),
+ e->gpa,
+ e->data);
+ else
+ printf(" %s %smmio %s va %llx eip %llx%s gpa %llx (no data)\n",
+ h->dump_header,
+ (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+ e->mmio_is_write?"write":"read",
+ e->va,
+ h->rip, find_symbol(h->rip),
+ e->gpa);
+ }
+
+}
+
+void shadow_mmio_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct pf_xen_extra *e = &h->inflight.pf_xen;
+ union {
+ /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+ so put it first for alignment sake. */
+ struct {
+ unsigned long va;
+ } gpl2;
+ struct {
+ unsigned long long va;
+ } gpl4;
+ } *r = (typeof(r))ri->d;
+ int gpl = h->v->guest_paging_levels;
+
+ switch(gpl)
+ {
+ case 2:
+ case 3:
+ if(sizeof(r->gpl2) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl2.va;
+ break;
+ case 4:
+ if(sizeof(r->gpl4) != ri->extra_words * 4)
+ {
+ fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+ __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+ ri->extra_words * 4);
+ break;
+ }
+ e->va = r->gpl4.va;
+ break;
+ }
+
+ if(opt.dump_all)
+ printf("]%s %smmio va %llx\n",
+ h->dump_header,
+ (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+ e->va);
+
+ h->post_process = shadow_mmio_postprocess;
+}
+
+void shadow_fault_generic_dump(unsigned long event, uint32_t *d, char *prefix,
+ char * dump_header)
+{
+ char *evt_string, evt_number[10];
+ union shadow_event sevt = { .event = event };
+ int i;
+
+ if(sevt.minor < PF_XEN_MAX)
+ {
+ evt_string = pf_xen_name[sevt.minor];
+ }
+ else
+ {
+ snprintf(evt_number, 10, "%d", sevt.minor);
+ evt_string = evt_number;
+ }
+
+ printf("%s%s shadow %s gl %d [",
+ prefix,
+ dump_header,
+ evt_string,
+ sevt.paging_levels);
+
+ for(i=0; i<4; i++)
+ {
+ printf(" %x", d[i]);
+ }
+
+ printf(" ]\n");
+}
+
+void shadow_fault_generic_postprocess(struct hvm_data *h)
+{
+ union shadow_event sevt = { .event = h->inflight.generic.event };
+ if(opt.summary_info) {
+ update_summary(&h->summary.pf_xen[sevt.minor],
+ h->arc_cycles);
+
+ hvm_update_short_summary(h, HVM_SHORT_SUMMARY_PROPAGATE);
+ }
+
+ if(opt.dump_cooked)
+ {
+ shadow_fault_generic_dump(h->inflight.generic.event,
+ h->inflight.generic.d,
+ " ", h->dump_header);
+ }
+}
+
+void shadow_fault_generic_process(struct record_info *ri, struct hvm_data *h)
+{
+ union shadow_event sevt = { .event = ri->event };
+
+ /* pf-case traces, vs others */
+ h->inflight.generic.event = ri->event;
+ bcopy(ri->d, h->inflight.generic.d, sizeof(unsigned int) * 4);
+
+ if(opt.dump_all)
+ shadow_fault_generic_dump(h->inflight.generic.event,
+ h->inflight.generic.d,
+ "]", ri->dump_header);
+
+ h->inflight.pf_xen.pf_case = sevt.minor;
+ h->post_process = shadow_fault_generic_postprocess;
+}
+
+void shadow_resync_process(struct record_info *ri, struct hvm_data *h)
+{
+ struct {
+ unsigned long long gfn;
+ } *r = (typeof(r))ri->d;
+
+ if(opt.dump_all || opt.dump_cooked)
+ printf(" %s oos resync %s gfn %llx\n",
+ ri->dump_header,
+ (ri->event == TRC_SHADOW_RESYNC_FULL)?"full":"only",
+ r->gfn);
+
+ h->resyncs++;
+}
+
+void shadow_prealloc_unpin_process(struct record_info *ri, struct hvm_data *h) {
+ struct {
+ unsigned long long gfn;
+ } *r = (typeof(r))ri->d;
+
+ if(opt.dump_all || opt.dump_cooked)
+ printf(" %s prealloc-unpin gfn %llx\n",
+ ri->dump_header, r->gfn);
+
+ if(h->prealloc_unpin)
+ fprintf(warn, "Strange, more than one prealloc_unpin per arc!\n");
+
+ h->prealloc_unpin = 1;
+
+ if(opt.with_cr3_enumeration)
+ cr3_prealloc_unpin(h->v, r->gfn);
+}
+
+void shadow_process(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+ struct hvm_data *h = &p->current->hvm;
+
+ union shadow_event sevt = { .event = ri->event };
+
+ int gpl = sevt.paging_levels + 2;
+
+ if(!h->init || !h->vmexit_valid)
+ {
+ if(opt.dump_all)
+ shadow_fault_generic_dump(ri->event,
+ ri->d,
+ "!", ri->dump_header);
+ return;
+ }
+
+ if(sevt.minor <= PF_XEN_NOT_SHADOW) {
+ if(p->current->guest_paging_levels != gpl)
+ {
+ fprintf(warn, "%s: Changing guest paging levels to %d\n",
+ __func__, gpl);
+ p->current->guest_paging_levels = gpl;
+ }
+ }
+
+ if(sevt.minor <= PF_XEN_LAST_FAULT) {
+ h->inflight.pf_xen.pf_case = sevt.minor;
+ if(opt.summary) {
+ hvm_set_summary_handler(h, hvm_pf_xen_summary);
+ }
+ }
+
+ /* FIXME - mask out paging levels */
+ switch(sevt.minor)
+ {
+ case PF_XEN_EMULATE:
+ shadow_emulate_process(ri, h);
+ break;
+ case PF_XEN_FIXUP:
+ shadow_fixup_process(ri, h);
+ break;
+ case PF_XEN_MMIO:
+ case PF_XEN_FAST_MMIO:
+ shadow_mmio_process(ri, h);
+ break;
+ case PF_XEN_EMULATE_UNSHADOW_USER:
+ case PF_XEN_EMULATE_UNSHADOW_EVTINJ:
+ case PF_XEN_EMULATE_UNSHADOW_UNHANDLED:
+ shadow_emulate_other_process(ri, h);
+ break;
+#if 0
+ case PF_XEN_EMULATE_UNSYNC:
+ shadow_unsync_process(ri, h);
+ break;
+#endif
+ case SHADOW_RESYNC_FULL:
+ case SHADOW_RESYNC_ONLY:
+ shadow_resync_process(ri, h);
+ break;
+ case SHADOW_PREALLOC_UNPIN:
+ shadow_prealloc_unpin_process(ri, h);
+ break;
+ default:
+ if(sevt.minor <= PF_XEN_LAST_FAULT) {
+ shadow_fault_generic_process(ri, h);
+ } else {
+ fprintf(warn, "Warning: processing shadow as generic\n");
+ process_generic(ri);
+ }
+ break;
+ }
+}
+
+/* ---- PV guests ---- */
+union pv_event {
+ unsigned event;
+ struct {
+ unsigned minor:8,
+ x64:1,
+ unused1:3,
+ sub:4,
+ main:12,
+ unused:4;
+ };
+};
+
+void pv_hypercall_process(struct record_info *ri, struct pv_data *pv) {
+ union {
+ struct {
+ uint32_t eip, eax;
+ } x32;
+ struct {
+ uint64_t eip;
+ uint32_t eax;
+ } x64;
+ } * r = (typeof(r)) ri->d;
+ union pv_event pevt = { .event = ri->event };
+ unsigned long long eip;
+ unsigned long eax;
+
+ if(pevt.x64) {
+ eip = r->x64.eip;
+ eax = r->x64.eax;
+ } else {
+ eip = r->x32.eip;
+ eax = r->x32.eax;
+ }
+
+ if(opt.summary_info) {
+ if(eax < PV_HYPERCALL_MAX)
+ pv->hypercall_count[eax]++;
+ }
+
+ if(opt.dump_cooked || opt.dump_all) {
+ if(eax < HYPERCALL_MAX)
+ printf(" %s hypercall %2lx (%s) eip %llx\n",
+ ri->dump_header, eax,
+ hypercall_name[eax], eip);
+ else
+ printf(" %s hypercall %lx eip %llx\n",
+ ri->dump_header, eax, eip);
+ }
+}
+
+void pv_trap_process(struct record_info *ri, struct pv_data *pv) {
+ union {
+ struct {
+ unsigned long eip;
+ unsigned trapnr:15,
+ use_error_code:1,
+ error_code:16;
+ } x32;
+ struct {
+ unsigned long long eip;
+ unsigned trapnr:15,
+ use_error_code:1,
+ error_code:16;
+ } x64;
+ } * r = (typeof(r)) ri->d;
+ union pv_event pevt = { .event = ri->event };
+ unsigned long long eip;
+ unsigned trapnr, use_ec, ec;
+
+ if(pevt.x64) {
+ eip = r->x64.eip;
+ trapnr = r->x64.trapnr;
+ use_ec = r->x64.use_error_code;
+ ec = r->x64.error_code;
+ } else {
+ eip = r->x32.eip;
+ trapnr = r->x32.trapnr;
+ use_ec = r->x32.use_error_code;
+ ec = r->x32.error_code;
+ }
+
+ if(opt.summary_info) {
+ if(trapnr < PV_TRAP_MAX)
+ pv->trap_count[trapnr]++;
+ }
+
+ if(opt.dump_cooked || opt.dump_all) {
+ printf(" %s trap %x eip %llx",
+ ri->dump_header, trapnr, eip);
+ if(use_ec)
+ printf(" ec %x\n", ec);
+ else
+ printf("\n");
+ }
+}
+
+void pv_generic_process(struct record_info *ri, struct pv_data *pv) {
+ union pv_event pevt = { .event = ri->event };
+ if ( opt.dump_cooked || opt.dump_all ) {
+ if(pevt.minor < PV_MAX && pv_name[pevt.minor])
+ printf(" %s %s",
+ ri->dump_header,
+ pv_name[pevt.minor]);
+ else
+ printf(" %s PV-%d ",
+ ri->dump_header, pevt.minor);
+ if(ri->extra_words) {
+ int i;
+ printf("[ ");
+ for(i=0; i<ri->extra_words; i++) {
+ printf("%x ", (unsigned)ri->d[i]);
+ }
+ printf("]\n");
+
+ }
+ }
+}
+
+void pv_summary(struct pv_data *pv) {
+ int i, j;
+
+ if(!pv->summary_info)
+ return;
+
+ printf("PV events:\n");
+ for(i=0; i<PV_MAX; i++) {
+ if(pv->count[i])
+ printf(" %s %d\n", pv_name[i], pv->count[i]);
+ switch(i) {
+ case PV_HYPERCALL:
+ for(j=0; j<PV_HYPERCALL_MAX; j++) {
+ if(pv->hypercall_count[j])
+ printf(" %-29s[%2d]: %6d\n",
+ hypercall_name[j],
+ j,
+ pv->hypercall_count[j]);
+ }
+ break;
+ case PV_TRAP:
+ for(j=0; j<PV_TRAP_MAX; j++) {
+ if(pv->trap_count[j])
+ printf(" [%d] %d\n",
+ j, pv->trap_count[j]);
+ }
+ break;
+ }
+ }
+}
+
+void pv_process(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+ struct vcpu_data *v = p->current;
+ struct pv_data *pv = &v->pv;
+
+ union pv_event pevt = { .event = ri->event };
+
+ if(v->d->did == IDLE_DOMAIN) {
+ fprintf(warn, "%s: Strange, pv record for idle domain!\n",
+ __func__);
+ return;
+ }
+
+ if(!v->data_type)
+ v->data_type = VCPU_DATA_PV;
+ else if(v->data_type != VCPU_DATA_PV) {
+ fprintf(warn, "ERROR: non-pv data type %d for d%dv%d\n",
+ v->data_type, v->d->did, v->vid);
+ exit(1);
+ }
+
+ if(opt.summary_info) {
+ pv->summary_info=1;
+
+ if(pevt.minor == PV_PTWR_EMULATION_PAE)
+ pv->count[PV_PTWR_EMULATION]++;
+ else
+ pv->count[pevt.minor]++;
+ }
+
+ switch(pevt.minor)
+ {
+ case PV_HYPERCALL:
+ pv_hypercall_process(ri, pv);
+ break;
+ case PV_TRAP:
+ pv_trap_process(ri, pv);
+ break;
+ case PV_PAGE_FAULT:
+ //pv_pf_process(ri, pv);
+ //break;
+ case PV_PTWR_EMULATION:
+ case PV_PTWR_EMULATION_PAE:
+ //pv_ptwr_emulation_process(ri, pv);
+ //break;
+ default:
+ pv_generic_process(ri, pv);
+ break;
+ }
+}
+
+/* ---- Schedule ---- */
+struct vcpu_data * vcpu_create(struct domain_data *d, int vid)
+{
+ struct vcpu_data *v;
+
+ assert(d->vcpu[vid] == NULL);
+
+ fprintf(warn, "Creating vcpu %d for dom %d\n", vid, d->did);
+
+ if((v=malloc(sizeof(*v)))==NULL)
+ {
+ fprintf(stderr, "%s: malloc %d failed!\n", __func__, sizeof(*d));
+ exit(1);
+ }
+
+ bzero(v, sizeof(*v));
+
+ v->vid = vid;
+ v->d = d;
+ v->p = NULL;
+ v->runstate.state = RUNSTATE_INIT;
+ v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
+
+ d->vcpu[vid] = v;
+
+ assert(v == v->d->vcpu[v->vid]);
+
+ if(vid > d->max_vid)
+ d->max_vid = vid;
+
+ return v;
+}
+
+/* Called by both domain_create and sched_default_domain_init */
+void domain_init(struct domain_data *d, int did)
+{
+ bzero(d, sizeof(*d));
+
+ d->did = did;
+ d->next = NULL;
+
+ if((opt.interval.output == INTERVAL_DOMAIN_SHORT_SUMMARY
+ || opt.interval.output == INTERVAL_DOMAIN_GUEST_INTERRUPT
+ || opt.interval.output == INTERVAL_DOMAIN_GRANT_MAPS)
+ && opt.interval.domain.did == did) {
+ P.interval.domain.d = d;
+ }
+}
+
+struct domain_data * domain_create(int did)
+{
+ struct domain_data *d;
+
+ fprintf(warn, "Creating domain %d\n", did);
+
+ if((d=malloc(sizeof(*d)))==NULL)
+ {
+ fprintf(stderr, "%s: malloc %d failed!\n", __func__, sizeof(*d));
+ exit(1);
+ }
+
+ /* Initialize domain & vcpus */
+ domain_init(d, did);
+
+ return d;
+ }
+
+struct domain_data * domain_find(int did)
+{
+ struct domain_data *d, *n, **q;
+
+ /* Look for domain, keeping track of the last pointer so we can add
+ a domain if we need to. */
+ for ( d = domain_list, q=&domain_list ;
+ d && (d->did < did) ;
+ q = &d->next, d=d->next ) ;
+
+ if(d && d->did == did)
+ return d;
+
+ /* Make a new domain */
+ n = domain_create(did);
+
+ /* Insert it into the list */
+ n->next = d;
+ *q = n;
+
+ return n;
+}
+
+struct vcpu_data * vcpu_find(int did, int vid)
+{
+ struct domain_data *d;
+ struct vcpu_data *v;
+
+ d = domain_find(did);
+
+ v = d->vcpu[vid];
+
+ if(!v)
+ v = vcpu_create(d, vid);
+
+ return v;
+}
+
+void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
+ tsc_t tsc, int new_runstate)
+{
+ assert(prev == prev->d->vcpu[prev->vid]);
+
+ if(prev->p != p)
+ {
+ fprintf(warn, "Strange, sched_switch on pcpu %d, prev->pcpu %d!\n",
+ p->pid, prev->p->pid);
+ prev->runstate.tsc = 0;
+ goto set;
+ }
+
+ assert(p->current);
+
+ if(p->current != prev)
+ {
+ fprintf(warn, "Strange, sched_switch prev d%dv%d, pcpu %d current d%dv%d!\n",
+ prev->d->did, prev->vid,
+ p->pid, p->current->d->did, p->current->vid);
+ prev->runstate.tsc = 0;
+ goto set;
+ }
+
+ if(prev->runstate.state != RUNSTATE_RUNNING)
+ {
+ fprintf(warn, "Strange, prev d%dv%d not running!\n",
+ prev->d->did, prev->vid);
+ prev->runstate.tsc = 0;
+ goto set;
+ }
+
+set:
+ p->current = NULL;
+ runstate_update(prev, new_runstate, tsc);
+}
+
+void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc)
+{
+ assert(next == next->d->vcpu[next->vid]);
+ assert(p->current == NULL);
+
+ if(next->activated)
+ {
+ /* We may get lost records at start-of-day, so ignore
+ setting runstate of default vcpus */
+ if(next->runstate.state == RUNSTATE_RUNNING
+ && next->d->did != DEFAULT_DOMAIN)
+ {
+ fprintf(warn, "Strange, next d%dv%d already running on proc %d!\n",
+ next->d->did, next->vid,
+ next->p->pid);
+ next->runstate.tsc = 0;
+ }
+ }
+ else
+ {
+ next->guest_paging_levels = opt.default_guest_paging_levels;
+ next->activated = 1;
+ }
+
+ runstate_update(next, RUNSTATE_RUNNING, tsc);
+
+ /* If we're moving from one pcpu to another, record change & update tsc */
+ if(next->p != p) {
+ if(next->pcpu_tsc)
+ update_cycles(&next->cpu_affinity, tsc - next->pcpu_tsc);
+ next->pcpu_tsc = tsc;
+ }
+
+ next->p = p;
+ p->current = next;
+}
+
+/* If current is the default domain, we're fixing up from something
+ * like start-of-day. Update what we can. */
+void vcpu_start(struct pcpu_info *p, struct vcpu_data *v) {
+ if( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
+ fprintf(stderr, "Strange, p->current not default domain!\n");
+ exit(1);
+ }
+
+ if(!p->first_tsc) {
+ fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
+ exit(1);
+ }
+
+ if(p->first_tsc <= p->current->runstate.tsc) {
+ fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
+ p->first_tsc,
+ p->current->runstate.tsc);
+ exit(1);
+ }
+
+ /* Change default domain to 'queued' */
+ runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
+
+ /* FIXME: Copy over data from the default domain this interval */
+ fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
+ v->d->did, v->vid, p->last_tsc - p->first_tsc);
+
+ /* Simulate the time since the first tsc */
+ runstate_update(v, RUNSTATE_RUNNING, p->first_tsc);
+ p->current = v;
+ v->p = p;
+}
+
+void sched_runstate_change_process(struct pcpu_info *p)
+{
+ struct vcpu_data *v;
+ struct record_info *ri = &p->ri;
+ struct {
+ unsigned vcpu:16, dom:16;
+ unsigned long long p1, p2;
+ } * r = (typeof(r))ri->d;
+ union {
+ unsigned long event;
+ struct {
+ unsigned lo:4,
+ new_runstate:4,
+ old_runstate:4,
+ sub:4,
+ main:12,
+ unused:4;
+ };
+ } sevt = { .event = ri->event };
+ int perfctrs;
+ struct last_oldstate_struct last_oldstate;
+
+ perfctrs = (ri->extra_words == 5);
+
+ if(opt.dump_cooked || opt.dump_all) {
+ if( perfctrs ) {
+ printf(" %s runstate_change_process {%lld,%lld} d%uv%u %s->%s\n",
+ ri->dump_header,
+ r->p1, r->p2,
+ r->dom, r->vcpu,
+ runstate_name[sevt.old_runstate],
+ runstate_name[sevt.new_runstate]);
+ } else {
+ printf(" %s runstate_change_process d%uv%u %s->%s\n",
+ ri->dump_header,
+ r->dom, r->vcpu,
+ runstate_name[sevt.old_runstate],
+ runstate_name[sevt.new_runstate]);
+ }
+ }
+
+ /* Sanity check: expected transitions */
+ if( (sevt.new_runstate == RUNSTATE_RUNNING
+ && sevt.old_runstate != RUNSTATE_RUNNABLE)
+ || (sevt.new_runstate == RUNSTATE_BLOCKED
+ && sevt.old_runstate == RUNSTATE_RUNNABLE ) )
+ {
+ fprintf(warn, "Strange, d%dv%d unexpected runstate transition %s->%s\n",
+ r->dom, r->vcpu,
+ runstate_name[sevt.old_runstate],
+ runstate_name[sevt.new_runstate]);
+ }
+
+ if(r->vcpu > MAX_CPUS)
+ {
+ fprintf(warn, "%s: vcpu %u > MAX_VCPUS %d!\n",
+ __func__, r->vcpu, MAX_CPUS);
+ return;
+ }
+
+ v = vcpu_find(r->dom, r->vcpu);
+
+ /* We want last_oldstate reset every time; so copy the last one and use
+ * that locally, clobbering the one in the vcpu struct. If it needs to
+ * be reset, it will be reset below. */
+ last_oldstate = v->runstate.last_oldstate;
+ v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
+
+ /* Close vmexits when the putative reason for blocking / &c stops.
+ * This way, we don't account cpu contention to some other overhead. */
+ if(sevt.new_runstate == RUNSTATE_RUNNABLE
+ && v->data_type == VCPU_DATA_HVM
+ && v->hvm.vmexit_valid) {
+ if(opt.dump_cooked)
+ printf("%s: d%dv%d changing to state runnable, closing vmexit\n",
+ __func__, r->dom, r->vcpu);
+ hvm_close_vmexit(&v->hvm, ri->tsc);
+ }
+
+ /* Track waking state */
+ if ( v->data_type == VCPU_DATA_HVM && v->runstate.state != RUNSTATE_LOST ) {
+ if ( sevt.new_runstate == RUNSTATE_RUNNABLE
+ && sevt.old_runstate == RUNSTATE_BLOCKED )
+ {
+ /* Hmm... want to make sure we're not in some weird
+ vmexit state... have to look later. */
+ if(opt.dump_all)
+ printf(" [w2h] d%dv%d Setting waking\n", v->d->did, v->vid);
+ v->hvm.w2h.waking = 1;
+ }
+ else if ( sevt.new_runstate != RUNSTATE_RUNNING
+ || sevt.old_runstate != RUNSTATE_RUNNABLE )
+ {
+ if( v->hvm.w2h.waking
+ && sevt.old_runstate == RUNSTATE_RUNNING
+ && sevt.new_runstate != RUNSTATE_OFFLINE )
+ {
+ fprintf(warn, "Strange, unexpected waking transition for d%dv%d: %s -> %s\n",
+ v->d->did, v->vid,
+ runstate_name[sevt.old_runstate],
+ runstate_name[sevt.new_runstate]);
+ v->hvm.w2h.waking = 0;
+ }
+
+ /* Close wake-to-halt summary */
+ if (sevt.new_runstate == RUNSTATE_BLOCKED
+ && sevt.old_runstate == RUNSTATE_RUNNING
+ && v->hvm.w2h.interrupts ) {
+ int i;
+ for(i=0; i<GUEST_INTERRUPT_MAX; i++) {
+ struct hvm_gi_struct *g=v->hvm.summary.guest_interrupt + i;
+ tsc_t start_tsc = g->start_tsc;
+ if(start_tsc) {
+ tsc_t t = (start_tsc == 1) ? 0 : ri->tsc - start_tsc;
+ if(opt.dump_all)
+ printf(" [w2h] Halting vec %d is_wake %d time %lld\n",
+ i,
+ g->is_wake,
+ t);
+
+ if(opt.scatterplot_wake_to_halt
+ && t
+ && g->is_wake)
+ scatterplot_vs_time(ri->tsc, t);
+
+ if(opt.summary && t) {
+ if(g->is_wake) {
+ if(v->hvm.w2h.interrupts==1)
+ update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE],
+ t);
+ update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY],
+ t);
+ } else {
+ update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT],
+ t);
+ }
+ }
+ g->start_tsc = 0;
+ g->is_wake = 0;
+ }
+ }
+ v->hvm.w2h.interrupts = 0;
+ v->hvm.w2h.vector = 0;
+ }
+ }
+ }
+
+ /* Sanity checks / tsc skew detection */
+ if( v->runstate.state != sevt.old_runstate
+ && v->runstate.state != RUNSTATE_INIT )
+ {
+ if(v->runstate.state == RUNSTATE_LOST) {
+ if( sevt.new_runstate == RUNSTATE_RUNNING )
+ goto update;
+ else if(opt.dump_all)
+ fprintf(warn, "%s: d%dv%d in runstate lost, not updating to %s\n",
+ __func__, v->d->did, v->vid,
+ runstate_name[sevt.new_runstate]);
+ goto no_update;
+ } else if (last_oldstate.wrong == sevt.new_runstate
+ && last_oldstate.actual == sevt.old_runstate) {
+ tsc_t lag, old_offset;
+ struct pcpu_info *p2;
+
+ if(ri->tsc < last_oldstate.tsc) {
+ fprintf(stderr, "FATAL: new tsc < detected runstate tsc!\n");
+ exit(1);
+ }
+
+ p2 = P.pcpu + last_oldstate.pid;
+
+ lag = ri->tsc
+ - last_oldstate.tsc;
+
+ old_offset = p2->tsc_skew.offset;
+
+ cpumask_union(&p2->tsc_skew.downstream, &p->tsc_skew.downstream);
+ cpumask_set(&p2->tsc_skew.downstream, p->pid);
+
+ if(cpumask_isset(&p2->tsc_skew.downstream, p2->pid)) {
+ fprintf(stderr, "FATAL: tsc skew dependency loop detected!\n");
+ exit(1);
+ }
+
+ p2->tsc_skew.offset += lag * 2;
+
+ fprintf(warn, "TSC skew detected p%d->p%d, %lld cycles. Changing p%d offset from %lld to %lld\n",
+ p->pid, p2->pid, lag,
+ p2->pid,
+ old_offset,
+ p2->tsc_skew.offset);
+
+ goto no_update;
+ } else {
+ fprintf(warn, "runstate_change old_runstate %s, d%dv%d runstate %s. Possible tsc skew.\n",
+ runstate_name[sevt.old_runstate],
+ v->d->did, v->vid,
+ runstate_name[v->runstate.state]);
+
+ v->runstate.last_oldstate.wrong = sevt.old_runstate;
+ v->runstate.last_oldstate.actual = v->runstate.state;
+ v->runstate.last_oldstate.tsc = ri->tsc;
+ v->runstate.last_oldstate.pid = p->pid;
+ goto update;
+ }
+ fprintf(stderr, "FATAL: Logic hole in %s\n", __func__);
+ exit(1);
+ }
+
+update:
+ /* Actually update the runstate. Special things to do if we're starting
+ * or stopping actually running on a physical cpu. */
+ if ( sevt.old_runstate == RUNSTATE_RUNNING
+ || v->runstate.state == RUNSTATE_RUNNING )
+ {
+#if 0
+ /* A lot of traces include cpi that shouldn't... */
+ if(perfctrs && v->runstate.tsc) {
+ unsigned long long run_cycles, run_instr;
+ double cpi;
+
+ //run_cycles = r->p1 - v->runstate_p1_start;
+ run_cycles = ri->tsc - v->runstate.tsc;
+ run_instr = r->p2 - v->runstate.p2_start;
+
+ cpi = ((double)run_cycles) / run_instr;
+
+ if(opt.dump_cooked || opt.dump_all) {
+ printf(" cpi: %2.2lf ( %lld / %lld )\n",
+ cpi, run_cycles, run_instr);
+ }
+
+ if(opt.scatterplot_cpi && v->d->did == 1)
+ printf("%lld,%2.2lf\n",
+ ri->tsc, cpi);
+
+ if(opt.summary_info)
+ update_cpi(&v->cpi, run_instr, run_cycles);
+ }
+#endif
+ /*
+ * Cases:
+ * old running, v running:
+ * normal (prev update p, lost record check)
+ * v running, old ! running:
+ * tsc skew (prev update v->p, lost record check)
+ * old running, v init:
+ start-of-day (fake update, prev p, lost record)
+ * old running, v !{running,init}:
+ * # (should never happen)
+ */
+ if( sevt.old_runstate == RUNSTATE_RUNNING ) {
+ if( v->runstate.state == RUNSTATE_INIT ) {
+ /* Start-of-day; account first tsc -> now to v */
+ vcpu_start(p, v);
+ } else if( v->runstate.state != RUNSTATE_RUNNING) {
+ /* This should never happen. */
+ fprintf(stderr, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
+ v->d->did, v->vid, runstate_name[v->runstate.state]);
+ exit(1);
+ }
+
+ vcpu_prev_update(p, v, ri->tsc, sevt.new_runstate);
+ } else {
+ vcpu_prev_update(v->p, v, ri->tsc, sevt.new_runstate);
+ }
+
+ if(P.lost_cpus && v->d->did != IDLE_DOMAIN) {
+ if(opt.dump_all)
+ fprintf(warn, "%s: %d lost cpus, setting d%dv%d runstate to RUNSTATE_LOST\n",
+ __func__, P.lost_cpus, v->d->did, v->vid);
+ lose_vcpu(v, ri->tsc);
+ }
+ }
+ else if ( sevt.new_runstate == RUNSTATE_RUNNING )
+ {
+ if(perfctrs) {
+ v->runstate.p1_start = r->p1;
+ v->runstate.p2_start = r->p2;
+ }
+
+ vcpu_next_update(p, v, ri->tsc);
+ }
+ else
+ {
+ runstate_update(v, sevt.new_runstate, ri->tsc);
+ }
+
+no_update:
+ return;
+}
+
+void sched_switch_process(struct pcpu_info *p)
+{
+ struct vcpu_data *prev, *next;
+ struct record_info *ri = &p->ri;
+ struct {
+ unsigned long prev_dom, prev_vcpu, next_dom, next_vcpu;
+ } * r = (typeof(r))ri->d;
+
+ if(opt.dump_cooked || opt.dump_all)
+ printf("%s sched_switch prev d%luv%lu next d%luv%lu\n",
+ ri->dump_header,
+ r->prev_dom, r->prev_vcpu,
+ r->next_dom, r->next_vcpu);
+
+ if(r->prev_vcpu > MAX_CPUS)
+ {
+ fprintf(warn, "%s: prev_vcpu %lu > MAX_VCPUS %d!\n",
+ __func__, r->prev_vcpu, MAX_CPUS);
+ return;
+ }
+
+ if(r->next_vcpu > MAX_CPUS)
+ {
+ fprintf(warn, "%s: next_vcpu %lu > MAX_VCPUS %d!\n",
+ __func__, r->next_vcpu, MAX_CPUS);
+ return;
+ }
+
+ prev = vcpu_find(r->prev_dom, r->prev_vcpu);
+ next = vcpu_find(r->next_dom, r->next_vcpu);
+
+ vcpu_prev_update(p, prev, ri->tsc, RUNSTATE_QUEUED); /* FIXME */
+
+ vcpu_next_update(p, next, ri->tsc);
+}
+
+void sched_default_vcpu_activate(struct pcpu_info *p)
+{
+ struct vcpu_data *v = default_domain.vcpu[p->pid];
+
+ if(!v)
+ v = vcpu_create(&default_domain, p->pid);
+
+ assert(v == v->d->vcpu[v->vid]);
+
+ v->activated = 1;
+ v->guest_paging_levels = opt.default_guest_paging_levels;
+ v->p = p;
+ v->runstate.state = RUNSTATE_RUNNING;
+
+ p->current = v;
+}
+
+void sched_default_domain_init(void)
+{
+ struct domain_data *d = &default_domain;
+
+ domain_init(d, DEFAULT_DOMAIN);
+}
+
+void runstate_clear(tsc_t * runstate_cycles)
+{
+ int i;
+ for(i=0; i<RUNSTATE_MAX; i++)
+ runstate_cycles[i]=0;
+}
+
+void runstate_summary(tsc_t * runstate_cycles)
+{
+ int i;
+ for(i=0; i<RUNSTATE_MAX; i++)
+ if(runstate_cycles[i]) {
+ struct time_struct t;
+ cycles_to_time(runstate_cycles[i], &t);
+ printf(" %s: %lu.%09lu s\n",
+ runstate_name[i], t.s, t.ns);
+ }
+}
+
+void sched_summary_vcpu(struct vcpu_data *v)
+{
+ int i;
+ char desc[30];
+
+ printf(" Runstates:\n");
+ for(i=0; i<RUNSTATE_MAX; i++) {
+ snprintf(desc,30, " %8s", runstate_name[i]);
+ print_cycle_summary(v->runstates+i, desc);
+ }
+ print_cpi_summary(&v->cpi);
+ print_cpu_affinity(&v->cpu_affinity, " cpu affinity");
+}
+
+void sched_summary_domain(struct domain_data *d)
+{
+ int i;
+ char desc[30];
+
+ printf(" Runstates:\n");
+ for(i=0; i<DOMAIN_RUNSTATE_MAX; i++) {
+ snprintf(desc,30, " %8s", domain_runstate_name[i]);
+ print_cycle_summary(d->runstates+i, desc);
+ }
+}
+
+
+void sched_process(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+
+ if(ri->evt.sub == 0xf) {
+ switch(ri->event)
+ {
+ case TRC_SCHED_SWITCH:
+ sched_switch_process(p);
+ break;
+ default:
+ process_generic(&p->ri);
+ }
+ } else {
+ if(ri->evt.sub == 1)
+ sched_runstate_change_process(p);
+ else {
+ UPDATE_VOLUME(p, sched_verbose, ri->size);
+ process_generic(&p->ri);
+ }
+ }
+}
+
+/* ---- Memory ---- */
+void mem_summary_domain(struct domain_data *d) {
+ int i;
+
+ printf(" Grant table ops:\n");
+
+ printf(" Done by:\n");
+ for(i=0; i<MEM_MAX; i++)
+ if(d->memops.done[i])
+ printf(" %-14s: %d\n",
+ mem_name[i],
+ d->memops.done[i]);
+
+ printf(" Done for:\n");
+ for(i=0; i<MEM_MAX; i++)
+ if(d->memops.done_for[i])
+ printf(" %-14s: %d\n",
+ mem_name[i],
+ d->memops.done_for[i]);
+}
+
+void mem_process(struct pcpu_info *p) {
+ struct record_info *ri = &p->ri;
+ struct {
+ int dom;
+ } *r = (typeof(r))ri->d;
+
+ int minor = ri->evt.minor;
+
+ if(opt.dump_all || opt.dump_cooked) {
+ if(minor < MEM_MAX)
+ printf(" %s %s d%d\n",
+ ri->dump_header,
+ mem_name[minor],
+ r->dom);
+ else
+ dump_generic(stdout, ri);
+ }
+
+ if(opt.summary_info && minor < MEM_MAX) {
+ struct domain_data *d;
+
+ if(p->current->d) {
+ p->current->d->memops.done[minor]++;
+ p->current->d->memops.done_interval[minor]++;
+ }
+ if((d=domain_find(r->dom))) {
+ d->memops.done_for[minor]++;
+ d->memops.done_for_interval[minor]++;
+ }
+ }
+}
+
+/* ---- Base ----- */
+void dump_generic(FILE * f, struct record_info *ri)
+{
+ int i;
+
+ fprintf(f, "]%s %7x(%x:%x:%x) %u [",
+ ri->dump_header,
+ ri->event,
+ ri->evt.main,
+ ri->evt.sub,
+ ri->evt.minor,
+ ri->extra_words);
+
+ for(i=0; i<ri->extra_words; i++) {
+ fprintf(f, " %x", ri->d[i]);
+ }
+
+ fprintf(f, " ]\n");
+}
+
+void process_generic(struct record_info *ri) {
+
+ if(opt.dump_cooked || opt.dump_all) {
+ dump_generic(stdout, ri);
+ }
+}
+
+void lose_vcpu(struct vcpu_data *v, tsc_t tsc)
+{
+ if(v->data_type == VCPU_DATA_HVM)
+ v->hvm.vmexit_valid=0;
+ runstate_update(v, RUNSTATE_LOST, tsc);
+ clear_vlapic(&v->vlapic);
+
+ if(v->data_type == VCPU_DATA_HVM) {
+ int i;
+ if(opt.dump_all)
+ printf(" [w2h] Clearing w2h state for d%dv%d\n",
+ v->d->did, v->vid);
+ v->hvm.w2h.interrupts=0;
+ v->hvm.w2h.vector=0;
+ v->hvm.w2h.waking = 0;
+ for(i=0; i<GUEST_INTERRUPT_MAX; i++) {
+ if(opt.dump_all && v->hvm.summary.guest_interrupt[i].start_tsc) {
+ printf(" Interrupt %d clearing start_tsc %lld\n",
+ i, v->hvm.summary.guest_interrupt[i].start_tsc);
+ }
+ v->hvm.summary.guest_interrupt[i].start_tsc = 0;
+ }
+ }
+}
+
+struct lost_record_struct {
+ int lost_records;
+ unsigned did:16,vid:16;
+ tsc_t first_tsc;
+};
+
+void process_lost_records(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+ struct lost_record_struct *r = (typeof(r))ri->d;
+ tsc_t first_tsc; /* TSC of first record that was lost */
+
+ /* Sanity checks */
+ if(ri->extra_words != 4)
+ {
+ fprintf(warn, "FATAL: Lost record has unexpected extra words %d!\n",
+ ri->extra_words);
+ exit(1);
+ }
+ else
+ first_tsc = r->first_tsc;
+
+ if(opt.dump_cooked || opt.dump_all)
+ {
+ printf(" %s lost_records count %d d%uv%u (cur d%dv%d) first_tsc %lld\n",
+ ri->dump_header, r->lost_records,
+ r->did, r->vid,
+ p->current->d->did, p->current->vid,
+ r->first_tsc);
+ }
+
+#if 0
+ if(opt.dump_trace_volume_on_lost_record)
+ volume_summary(&p->volume.last_buffer);
+#endif
+
+ clear_vlapic(&p->current->vlapic);
+ if(p->current->data_type == VCPU_DATA_HVM) {
+ p->current->hvm.vmexit_valid=0;
+ cr3_switch(0, &p->current->hvm);
+ }
+
+ /* We may lose scheduling records; so we need to:
+ * - Point all records until now to the next schedule in the
+ * "default" domain
+ * - Make sure there are no warnings / strangeness with the
+ * current vcpu (if it gets scheduled elsewhere).
+ */
+ vcpu_prev_update(p, p->current, first_tsc, RUNSTATE_LOST);
+
+#if 0
+ vcpu_next_update(p, default_domain.vcpu[p->pid], first_tsc);
+ if(p->current->data_type == VCPU_DATA_HVM) {
+ p->current->hvm.vmexit_valid=0;
+ }
+#endif
+
+ /* The lost record trace is processed early -- i.e.,
+ * After the last good record, rather than when the next
+ * record is processed. Between the time it's processed and
+ * the time it actually went in, the vcpu may be scheduled on
+ * other processors. So we can't switch vcpus until the first
+ * TSC'd record after the lost record. */
+ if(!p->lost_record.active) {
+ P.lost_cpus++;
+ if(P.lost_cpus > P.max_active_pcpu + 1) {
+ fprintf(warn, "ERROR: P.lost_cpus %d > P.max_active_pcpu + 1 %d!\n",
+ P.lost_cpus, P.max_active_pcpu + 1);
+ exit(1);
+ }
+ } else
+ fprintf(warn, "Strange, lost record for pcpu %d, but lost_record still active!\n",
+ p->pid);
+
+ p->lost_record.active = 1;
+ p->lost_record.tsc = first_tsc;
+
+ {
+ /* Any vcpu which is not actively running may be scheduled on the
+ * lost cpu. To avoid mis-accounting, we need to reset */
+ struct domain_data *d;
+ int i;
+ for(d=domain_list ; d; d=d->next)
+ {
+ if(d->did != DEFAULT_DOMAIN) {
+ for(i=0; i<MAX_CPUS; i++)
+ if(d->vcpu[i] &&
+ d->vcpu[i]->runstate.state != RUNSTATE_RUNNING) {
+ if(opt.dump_all)
+ fprintf(warn, "%s: setting d%dv%d to RUNSTATE_LOST\n",
+ __func__, d->did, i);
+ lose_vcpu(d->vcpu[i], first_tsc);
+ }
+ }
+ }
+ }
+
+ p->lost_record.domain_valid=1;
+ p->lost_record.did=r->did;
+ p->lost_record.vid=r->vid;
+}
+
+
+void process_lost_records_end(struct pcpu_info *p)
+{
+ struct record_info *ri = &p->ri;
+ struct lost_record_struct *r = (typeof(r))ri->d;
+
+ if(!p->lost_record.active) {
+ fprintf(warn, "FATAL: lost_records_end but pid %d not lost!\n",
+ p->pid);
+ exit(1);
+ }
+
+ /* Lost records. If this is the first record on a pcpu after the loss,
+ * Update the information. */
+ if(ri->tsc > p->lost_record.tsc)
+ {
+ if(opt.dump_cooked || opt.dump_all)
+ printf(" %s lost_records end ---\n",
+ pcpu_string(p->pid));
+ if(p->lost_record.domain_valid) {
+ int did = p->lost_record.did,
+ vid = p->lost_record.vid;
+ if(p->current)
+ {
+ fprintf(warn, "FATAL: lost_record valid (d%dv%d), but current d%dv%d!\n",
+ did, vid,
+ p->current->d->did, p->current->vid);
+ exit(1);
+ }
+
+ if(opt.dump_all)
+ fprintf(warn, "Changing p%d current to d%dv%d\n",
+ p->pid, did, vid);
+ vcpu_next_update(p,
+ vcpu_find(did, vid),
+ ri->tsc);
+ p->lost_record.domain_valid=0;
+ }
+
+
+ p->lost_record.active = 0;
+ P.lost_cpus--;
+ if(P.lost_cpus < 0) {
+ fprintf(warn, "ERROR: lost_cpus fell below 0 for pcpu %d!\n",
+ p->pid);
+ exit(1);
+ }
+ }
+}
+
+void base_process(struct pcpu_info *p) {
+ struct record_info *ri = &p->ri;
+ switch(ri->event)
+ {
+ case TRC_LOST_RECORDS:
+ process_lost_records(p);
+ break;
+ case TRC_LOST_RECORDS_END:
+ process_lost_records_end(p);
+ break;
+ case TRC_TRACE_VIRQ:
+ if(opt.dump_all || opt.dump_cooked) {
+ printf(" %s trace_virq\n", ri->dump_header);
+ }
+ P.buffer_trace_virq_tsc = ri->tsc;
+ break;
+ default:
+ process_generic(ri);
+ }
+ }
+
+
+
+/* Non-compat only */
+struct cpu_change_data {
+ int cpu;
+ unsigned window_size;
+};
+
+void activate_early_eof(void) {
+ struct pcpu_info *p;
+ int i;
+
+ fprintf(warn, "Short cpu_change window, activating early_eof\n");
+
+ P.early_eof = 1;
+
+ for(i=0; i<=P.max_active_pcpu; i++) {
+ p = P.pcpu + i;
+ if(p->active && p->file_offset > P.last_epoch_offset) {
+ fprintf(warn, " deactivating pid %d\n",
+ p->pid);
+ p->active = 0;
+ }
+ }
+}
+
+loff_t scan_for_new_pcpu(loff_t offset) {
+ ssize_t r;
+ struct trace_record rec;
+ struct cpu_change_data *cd;
+
+ r=__read_record(G.fd, &rec, offset);
+
+ if(r==0)
+ return 0;
+
+ if(rec.event != TRC_TRACE_CPU_CHANGE
+ || rec.cycle_flag)
+ {
+ fprintf(stderr, "%s: Unexpected record event %x!\n",
+ __func__, rec.event);
+ exit(1);
+ }
+
+ cd = (typeof(cd))rec.u.notsc.data;
+
+ if(cd->cpu > P.max_active_pcpu) {
+ struct pcpu_info *p = P.pcpu + cd->cpu;
+
+ fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
+ __func__, cd->cpu, offset);
+
+ p->active = 1;
+ /* Process this cpu_change record first */
+ p->ri.rec = rec;
+ p->ri.size = r;
+ __fill_in_record_info(p);
+
+ p->file_offset = offset;
+ p->next_cpu_change_offset = offset;
+
+ offset += r + cd->window_size;
+
+ sched_default_vcpu_activate(p);
+
+ P.max_active_pcpu = cd->cpu;
+
+ return offset;
+ } else {
+ return 0;
+ }
+}
+
+/*
+ * Conceptually, when we reach a cpu_change record that's not for our pcpu,
+ * we want to scan forward through the file until we reach one that's for us.
+ * However, looping through involves reading the file, which we'd rather
+ * do in one place. Because cpu_change records don't include a tsc,
+ * the same pcpu will be processed repeatedly until the cpu_change
+ * equals p->pid.
+ *
+ * There are two additional things we need to do in this algorithm:
+ * + Detect new pcpus as they come online
+ * + De-activate pcpus which don't have any more records
+ *
+ * Detecting new pcpus which are less than P.max_active_pcpu is straight-
+ * forward: when max_active_pcpu is searching for its next cpu window,
+ * it will pass by the new cpu's window, and can activate it then.
+ *
+ * Detecting new pcpus greater than P.max_active_pcpu is a little harder;
+ * When max_active_pcpu is scanning for its next cpu window, after it's found
+ * it, we need to scan one more window forward to see if its' an already-active
+ * pcpu; if not, activate it.
+ *
+ * We also need to deal with truncated files, where records from one pcpu may
+ * be present but not from another pcpu due to lack of disk space. The best
+ * thing to do is to find the last "epoch" and essentially truncate the file
+ * to that.
+ */
+void process_cpu_change(struct pcpu_info *p) {
+ struct record_info *ri = &p->ri;
+ struct cpu_change_data *r = (typeof(r))ri->d;
+
+ if(opt.dump_all) {
+ printf("]%s cpu_change this-cpu %u record-cpu %u window_size %u(0x%08x)\n",
+ ri->dump_header, p->pid, r->cpu, r->window_size,
+ r->window_size);
+ }
+
+ /* File sanity check */
+ if(p->file_offset != p->next_cpu_change_offset) {
+ fprintf(warn, "Strange, pcpu %d expected offet %llx, actual %llx!\n",
+ p->pid, p->next_cpu_change_offset, p->file_offset);
+ }
+
+ if(r->cpu > MAX_CPUS)
+ {
+ fprintf(stderr, "FATAL: cpu %d > MAX_CPUS %d.\n",
+ r->cpu, MAX_CPUS);
+ exit(1);
+ }
+
+ /* Detect beginning of new "epoch" while scanning thru file */
+ if((p->last_cpu_change_pid > r->cpu)
+ && (p->file_offset > P.last_epoch_offset)) {
+ P.last_epoch_offset = p->file_offset;
+ }
+
+ /* If that pcpu has never been activated, activate it. */
+ if(!P.pcpu[r->cpu].active && P.pcpu[r->cpu].file_offset == 0)
+ {
+ struct pcpu_info * p2 = P.pcpu + r->cpu;
+
+ p2->active = 1;
+ if(r->cpu > P.max_active_pcpu)
+ P.max_active_pcpu = r->cpu;
+
+ fprintf(warn, "%s: Activating pcpu %d at offset %llx\n",
+ __func__, r->cpu, p->file_offset);
+
+ sched_default_vcpu_activate(p2);
+
+ /* Taking this record as the first record should make everything
+ * run swimmingly. */
+ p2->ri = *ri;
+ p2->ri.cpu = r->cpu;
+ p2->ri.d = p2->ri.rec.u.notsc.data;
+ p2->file_offset = p->file_offset;
+ p2->next_cpu_change_offset = p->file_offset;
+ }
+
+ p->last_cpu_change_pid = r->cpu;
+
+ /* If this isn't the cpu we're looking for, skip the whole bunch */
+ if(p->pid != r->cpu)
+ {
+ p->file_offset += ri->size + r->window_size;
+ p->next_cpu_change_offset = p->file_offset;
+
+ if(p->file_offset > G.file_size) {
+ activate_early_eof();
+ } else if(P.early_eof && p->file_offset > P.last_epoch_offset) {
+ fprintf(warn, "%s: early_eof activated, pcpu %d past last_epoch_offset %llx, deactivating.\n",
+ __func__, p->pid, P.last_epoch_offset);
+ p->active = 0;
+ }
+ }
+ else
+ {
+ /* Track information about dom0 scheduling and records */
+ if(opt.dump_trace_volume_on_lost_record) {
+ tsc_t cycles;
+ struct time_struct t;
+
+ /* Update dom0 runstates */
+ cycles = (p->volume.buffer_first_tsc > p->volume.buffer_dom0_runstate_tsc) ?
+ p->volume.buffer_first_tsc :
+ p->volume.buffer_dom0_runstate_tsc;
+ p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate]
+ += ri->tsc - cycles;
+
+ printf(" - updated p%d dom0_runstate %s to %lld cycles (+%lld)\n",
+ p->pid, runstate_name[p->volume.buffer_dom0_runstate],
+ p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate],
+ ri->tsc - cycles);
+
+ /* print info */
+ cycles = ri->tsc - p->volume.buffer_first_tsc;
+ cycles_to_time(cycles, &t);
+ printf("Buffer time: %lu.%09lu (%lld cycles)\n",
+ t.s, t.ns, cycles);
+ if(p->volume.buffer_size)
+ printf("Rate: %lld cycles / byte\n",
+ cycles / p->volume.buffer_size);
+ if(P.buffer_trace_virq_tsc)
+ {
+ cycles = ri->tsc - P.buffer_trace_virq_tsc;
+ cycles_to_time(cycles, &t);
+ printf("trace_virq latency: %lu.%09lu (%lld cycles)\n",
+ t.s, t.ns, cycles);
+ P.buffer_trace_virq_tsc = 0;
+ }
+ else
+ {
+ printf("No trace_virq record found.\n");
+ }
+ printf("Dom0 runstates this buffer:\n");
+ runstate_summary(p->volume.buffer_dom0_runstate_cycles);
+ volume_summary(&p->volume.last_buffer);
+
+ /* reset info */
+ p->volume.buffer_first_tsc = 0;
+ p->volume.buffer_size = r->window_size;
+ runstate_clear(p->volume.buffer_dom0_runstate_cycles);
+ volume_clear(&p->volume.last_buffer);
+ }
+
+ p->file_offset += ri->size;
+ p->next_cpu_change_offset = p->file_offset + r->window_size;
+
+ if(p->next_cpu_change_offset > G.file_size)
+ activate_early_eof();
+ else if(p->pid == P.max_active_pcpu)
+ scan_for_new_pcpu(p->next_cpu_change_offset);
+
+ }
+}
+
+void process_record(struct pcpu_info *p) {
+ struct record_info *ri = &p->ri;
+ int toplevel;
+
+ /* Process only TRC_TRACE_CPU_CHANGE */
+ if(ri->event == TRC_TRACE_CPU_CHANGE) {
+ process_cpu_change(p);
+ return;
+ }
+
+ if ( opt.dump_no_processing )
+ goto out;
+
+ p->summary = 1;
+
+ if( opt.dump_raw_process )
+ {
+ if(ri->rec.cycle_flag)
+ printf("* %x %d %lld\n",
+ ri->event, ri->extra_words, ri->tsc);
+ else
+ printf("* %x %d -\n",
+ ri->event, ri->extra_words);
+ }
+
+ /* Find the first tsc set */
+ if(ri->tsc && ri->tsc >= P.f.first_tsc) {
+ /* We use the order_tsc to account for the second processing of
+ * a lost record. */
+ tsc_t tsc = p->order_tsc;
+
+ if(P.f.first_tsc == 0) {
+ P.f.first_tsc = tsc;
+ if ( opt.interval_mode ) {
+ P.interval.start_tsc = tsc;
+ }
+ } else {
+ if ( opt.interval_mode ) {
+ if(P.interval.start_tsc > tsc) {
+ fprintf(warn, "FATAL: order_tsc %lld < interval.start_tsc %lld!\n",
+ tsc, P.interval.start_tsc);
+ exit(1);
+ }
+ while ( tsc - P.interval.start_tsc > opt.interval.cycles ) {
+ interval_callback();
+ P.interval.start_tsc += opt.interval.cycles;
+ }
+ }
+ }
+
+ P.f.last_tsc=tsc;
+
+ P.f.total_cycles = P.f.last_tsc - P.f.first_tsc;
+
+ P.now = tsc;
+ }
+
+ abs_cycles_to_time(ri->tsc, &ri->t);
+
+ if(opt.dump_cooked || opt.dump_all) {
+ if(ri->t.time) {
+ snprintf(ri->dump_header,
+ DUMP_HEADER_MAX,
+ "%3lu.%09lu %s ", ri->t.s, ri->t.ns, pcpu_string(ri->cpu));
+ } else {
+ snprintf(ri->dump_header,
+ DUMP_HEADER_MAX,
+ " %s ", pcpu_string(ri->cpu));
+ }
+ }
+
+ {
+ unsigned int i, count;
+ for(i=0, count=0; i<TOPLEVEL_MAX; i++)
+ if(ri->evt.main & (1<<i))
+ {
+ toplevel=i;
+ count++;
+ }
+ if(count != 1)
+ {
+ fprintf(warn, "FATAL: unexpected number bits(%d) in evt.main! event %x main %x sub %x minor %x\n",
+ count,
+ ri->event,
+ ri->evt.main, ri->evt.sub, ri->evt.minor);
+ exit(1);
+ }
+ }
+
+ switch(toplevel) {
+ case TRC_GEN_MAIN:
+ base_process(p);
+ break;
+ case TRC_SCHED_MAIN:
+ sched_process(p);
+ break;
+ case TRC_HVM_MAIN:
+ hvm_process(p);
+ break;
+ case TRC_SHADOW_MAIN:
+ shadow_process(p);
+ break;
+ case TRC_PV_MAIN:
+ pv_process(p);
+ break;
+ case TRC_MEM_MAIN:
+ mem_process(p);
+ break;
+ case TRC_DOM0OP_MAIN:
+ default:
+ process_generic(ri);
+ }
+
+ UPDATE_VOLUME(p, toplevel[toplevel], ri->size);
+
+ if(!p->volume.buffer_first_tsc)
+ p->volume.buffer_first_tsc = ri->tsc;
+
+ out:
+ /* Lost records gets processed twice */
+ if(ri->event != TRC_LOST_RECORDS)
+ p->file_offset += ri->size;
+}
+
+static inline ssize_t get_rec_size(struct trace_record *rec) {
+ ssize_t s;
+
+ s = sizeof(uint32_t);
+
+ if(rec->cycle_flag)
+ s += sizeof(tsc_t);
+
+ s += rec->extra_words * sizeof(uint32_t);
+
+ return s;
+}
+
+#define STDIN 0
+
+void progress_child_exec(void) {
+ fclose(stdin);
+ dup2(G.progress.pipe[0], STDIN);
+
+ execlp("zenity", "zenity", "--progress", "--auto-close", "--title",
+ "Analyzing", "--text", G.trace_file, "--auto-kill", NULL);
+}
+
+void progress_init(void) {
+ int pid;
+
+ pipe(G.progress.pipe);
+
+ if(!(pid = fork())) {
+ progress_child_exec();
+
+ fprintf(stderr, "%s: exec failed (%s), disabling progress bar\n",
+ __func__, strerror(errno));
+ opt.progress = 0;
+ exit(1);
+ } else if( pid < 0 ) {
+ fprintf(stderr, "%s: could not fork: %s, disabling progress bar\n",
+ __func__, strerror(errno));
+ opt.progress = 0;
+ }
+
+ if( (G.progress.out = fdopen(G.progress.pipe[1], "w")) < 0 ) {
+ fprintf(stderr, "%s: could not fdopen pipe: %s, disabling progress bar\n",
+ __func__, strerror(errno));
+ opt.progress = 0;
+ }
+
+}
+
+void progress_update(loff_t offset) {
+ long long p;
+
+ p = ( offset * 100 ) / G.file_size;
+
+ fprintf(G.progress.out, "%lld\n", p);
+ fflush(G.progress.out);
+
+ p += 1;
+
+ G.progress.update_offset = ( G.file_size * p ) / 100;
+
+#if 0
+ fprintf(stderr, "Progress: %lld %% Next update_offset: %lld\n",
+ p-1,
+ G.progress.update_offset);
+#endif
+}
+
+void progress_finish(void) {
+ int pid;
+
+ fprintf(G.progress.out, "100\n");
+ fflush(G.progress.out);
+ fclose(G.progress.out);
+
+ wait(NULL);
+
+ if(!(pid = fork())) {
+ /* Child */
+ char text[128];
+
+ snprintf(text, 128, "Finished analyzing %s",
+ G.trace_file);
+ execlp("zenity", "zenity", "--info", "--text", text, NULL);
+ }
+}
+
+ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset)
+{
+ ssize_t r, rsize;
+
+ r=pread64(G.fd, rec, sizeof(*rec), offset);
+
+ if(r < 0) {
+ /* Read error */
+ perror("read");
+ fprintf(stderr, "offset %llx\n", (unsigned long long)offset);
+ return 0;
+ } else if(r==0) {
+ /* End-of-file */
+ return 0;
+ } else if(r < sizeof(uint32_t)) {
+ /* Full header not read */
+ fprintf(stderr, "%s: short read (%d bytes)\n",
+ __func__, r);
+ exit(1);
+ }
+
+ rsize=get_rec_size(rec);
+
+ if(r < rsize) {
+ /* Full record not read */
+ fprintf(stderr, "%s: short read (%d, expected %d)\n",
+ __func__, r, rsize);
+ return 0;
+ }
+
+ return rsize;
+}
+
+void __fill_in_record_info(struct pcpu_info *p)
+{
+ struct record_info *ri;
+ tsc_t tsc=0;
+
+ ri = &p->ri;
+
+ ri->event = ri->rec.event;
+ ri->extra_words = ri->rec.extra_words;
+
+ if(ri->rec.cycle_flag) {
+ tsc = (((tsc_t)ri->rec.u.tsc.tsc_hi) << 32)
+ | ri->rec.u.tsc.tsc_lo;
+
+ tsc += p->tsc_skew.offset;
+
+ ri->tsc = tsc;
+ ri->d = ri->rec.u.tsc.data;
+
+ if(p->first_tsc == 0)
+ p->first_tsc = tsc;
+
+ /* We process lost record twice: once at the first_tsc,
+ once at the time it was placed in the log */
+ if(ri->event == TRC_LOST_RECORDS && ri->extra_words == 4) {
+ struct lost_record_struct *r = (typeof(r))ri->d;
+ p->order_tsc = r->first_tsc + p->tsc_skew.offset;
+ } else
+ p->order_tsc = tsc;
+
+ p->last_tsc = tsc;
+ } else {
+ ri->tsc = p->last_tsc;
+ ri->d = ri->rec.u.notsc.data;
+ }
+
+ if ( opt.dump_raw_reads ) {
+ int i;
+ printf("R p%2d o%016llx %8lx %d ",
+ p->pid, p->file_offset,
+ (unsigned long)ri->rec.event, ri->rec.extra_words);
+
+ if(ri->rec.cycle_flag)
+ printf("t%016llx [ ", tsc);
+ else
+ printf(" [ ");
+
+ for(i=0; i<ri->rec.extra_words; i++)
+ printf("%x ",
+ ri->d[i]);
+ printf("]\n");
+ }
+
+ /* Updated tracing uses CPU_CHANGE. If we hit one of these,
+ * it will process very next (since the tsc isn't updated), and
+ * we'll skip forward appropriately. */
+ ri->cpu = p->pid;
+}
+
+ssize_t read_record(int fd, struct pcpu_info * p) {
+ loff_t * offset;
+ struct record_info *ri;
+
+ offset = &p->file_offset;
+ ri = &p->ri;
+
+ ri->size = __read_record(fd, &ri->rec, *offset);
+ if(ri->size)
+ {
+ __fill_in_record_info(p);
+ }
+ else
+ {
+ fprintf(warn, "%s: read returned zero, deactivating pcpu %d\n",
+ __func__, p->pid);
+ p->active = 0;
+ }
+
+ return ri->size;
+}
+
+/* WARNING not thread-safe */
+char * pcpu_string(int pcpu)
+{
+ static char s[MAX_CPUS+1] = { 0 };
+ static int max_active_pcpu=-1, last_pcpu=-1;
+
+ assert(P.max_active_pcpu < MAX_CPUS);
+ assert(pcpu <= P.max_active_pcpu);
+
+ if(P.max_active_pcpu > max_active_pcpu)
+ {
+ int i;
+ for(i=max_active_pcpu + 1; i<= P.max_active_pcpu; i++) {
+ if(P.pcpu[i].lost_record.active)
+ s[i]='l';
+ else
+ s[i]='-';
+ }
+ }
+
+ if(last_pcpu >= 0)
+ s[last_pcpu]='-';
+
+ s[pcpu]='x';
+ last_pcpu = pcpu;
+
+ return s;
+}
+
+struct pcpu_info * choose_next_record(void)
+{
+ int i, max_active_pcpu = -1;
+ struct pcpu_info * p, *min_p=NULL;
+ loff_t min_offset = 0;
+
+ for(i=0; i<=P.max_active_pcpu; i++)
+ {
+ p = P.pcpu+i;
+ if(!p->active)
+ continue;
+
+ max_active_pcpu = i;
+ if(!min_p || p->order_tsc < min_p->order_tsc)
+ min_p = p;
+
+ if(!min_offset || p->file_offset < min_offset)
+ min_offset = p->file_offset;
+ }
+
+ P.max_active_pcpu = max_active_pcpu;
+
+ if(opt.progress && min_offset >= G.progress.update_offset)
+ progress_update(min_offset);
+
+ /* If there are active pcpus, make sure we chose one */
+ assert(min_p || (max_active_pcpu==-1));
+
+ return min_p;
+}
+
+void process_records(void) {
+ while(1) {
+ struct pcpu_info *p = NULL;
+
+ if(!(p=choose_next_record()))
+ return;
+
+ process_record(p);
+
+ /* Lost records gets processed twice. */
+ if(p->ri.event == TRC_LOST_RECORDS) {
+ p->ri.event = TRC_LOST_RECORDS_END;
+ if(p->ri.tsc > p->order_tsc)
+ p->order_tsc = p->ri.tsc;
+ else {
+ fprintf(warn, "Strange, lost_record ri->tsc %lld !> p->order_tsc %lld!\n",
+ p->ri.tsc, p->order_tsc);
+ exit(1);
+ }
+ }
+ else
+ read_record(G.fd, p);
+
+ }
+}
+
+void vcpu_summary(struct vcpu_data *v)
+{
+ printf("-- v%d --\n", v->vid);
+ sched_summary_vcpu(v);
+ switch(v->data_type) {
+ case VCPU_DATA_HVM:
+ hvm_summary(&v->hvm);
+ break;
+ case VCPU_DATA_PV:
+ pv_summary(&v->pv);
+ break;
+ default:
+ break;
+ }
+}
+
+void domain_summary(void)
+{
+ struct domain_data * d;
+ int i;
+
+ if(opt.show_default_domain_summary) {
+ d = &default_domain;
+ printf("|-- Default domain --|\n");
+
+ for( i = 0; i < MAX_CPUS ; i++ )
+ {
+ if(d->vcpu[i])
+ vcpu_summary(d->vcpu[i]);
+ }
+ }
+
+ for ( d = domain_list ; d ; d=d->next )
+ {
+ int i;
+ printf("|-- Domain %d --|\n", d->did);
+
+ sched_summary_domain(d);
+
+ mem_summary_domain(d);
+
+ for( i = 0; i < MAX_CPUS ; i++ )
+ {
+ if(d->vcpu[i])
+ vcpu_summary(d->vcpu[i]);
+ }
+
+ printf("Emulate eip list\n");
+ dump_eip(d->emulate_eip_list);
+
+ cr3_dump_list(d->cr3_value_head);
+ }
+}
+
+char * stringify_cpu_hz(long long cpu_hz);
+
+void summary(void) {
+ int i;
+ printf("Total time: %.2lf seconds (using cpu speed %s)\n",
+ ((double)(P.f.total_cycles))/opt.cpu_hz,
+ stringify_cpu_hz(opt.cpu_hz));
+ printf("--- Log volume summary ---\n");
+ for(i=0; i<MAX_CPUS; i++)
+ {
+ struct pcpu_info *p = P.pcpu+i;
+ if(!p->summary)
+ continue;
+ printf(" - cpu %d -\n", i);
+ volume_summary(&p->volume.total);
+ }
+ domain_summary();
+}
+
+void init_pcpus(void) {
+ int i=0;
+ loff_t offset = 0;
+
+ for(i=0; i<MAX_CPUS; i++)
+ {
+ P.pcpu[i].pid=i;
+ }
+
+ P.max_active_pcpu = -1;
+
+ sched_default_domain_init();
+
+ /* Scan through the cpu_change recs until we see a duplicate */
+ do {
+ offset = scan_for_new_pcpu(offset);
+
+ if(!offset) {
+ fprintf(warn, "%s: through first trace write, done for now.\n",
+ __func__);
+ }
+ } while(offset);
+
+}
+
+enum {
+ OPT_NULL=0,
+ /* Dumping info */
+ OPT_DUMP_COOKED,
+ OPT_DUMP_RAW_READS,
+ OPT_DUMP_RAW_PROCESS,
+ OPT_DUMP_NO_PROCESSING,
+ OPT_DUMP_IPI_LATENCY,
+ OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD,
+ /* Extra tracking functionality */
+ OPT_WITH_CR3_ENUMERATION,
+ OPT_WITH_PIO_ENUMERATION,
+ OPT_SCATTERPLOT_INTERRUPT_EIP,
+ OPT_SCATTERPLOT_CPI,
+ OPT_SCATTERPLOT_UNPIN_PROMOTE,
+ OPT_SCATTERPLOT_CR3_SWITCH,
+ OPT_SCATTERPLOT_WAKE_TO_HALT,
+ OPT_SCATTERPLOT_IO,
+ OPT_HISTOGRAM_INTERRUPT_EIP,
+ /* Interval options */
+ OPT_INTERVAL_CR3_SCHEDULE_TIME,
+ OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL,
+ OPT_INTERVAL_CR3_SCHEDULE_ORDERED,
+ OPT_INTERVAL_CR3_SHORT_SUMMARY,
+ OPT_INTERVAL_DOMAIN_SHORT_SUMMARY,
+ OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT,
+ OPT_INTERVAL_DOMAIN_GRANT_MAPS,
+ /* Summary info */
+ OPT_SHOW_DEFAULT_DOMAIN_SUMMARY,
+ OPT_SAMPLE_SIZE,
+ /* Guest info */
+ OPT_DEFAULT_GUEST_PAGING_LEVELS,
+ OPT_SYMBOL_FILE,
+ /* Hardware info */
+ OPT_SVM_MODE,
+ OPT_CPU_HZ,
+ /* Misc */
+ OPT_PROGRESS,
+ /* Specific letters */
+ OPT_DUMP_ALL='a',
+ OPT_INTERVAL_LENGTH='i',
+ OPT_SUMMARY='s',
+};
+
+enum {
+ OPT_GROUP_SUMMARY=1,
+ OPT_GROUP_DUMP,
+ OPT_GROUP_INTERVAL,
+ OPT_GROUP_EXTRA,
+ OPT_GROUP_GUEST,
+ OPT_GROUP_HARDWARE
+};
+
+#define xstr(x) str(x)
+#define str(x) #x
+
+#define GHZ 1000000000LL
+#define MHZ 1000000LL
+#define KHZ 1000LL
+
+void parse_cpu_hz(char * arg) {
+ float hz_base;
+ char * next_ptr;
+
+ hz_base=strtof(arg, &next_ptr);
+ if(next_ptr == arg) {
+ fprintf(stderr, "Invalid cpu_hz %s\n", arg);
+ exit(1);
+ }
+ switch(*next_ptr) {
+ case '\0':
+ opt.cpu_hz=(long long)hz_base;
+ break;
+ case 'G':
+ opt.cpu_hz= hz_base * GHZ;
+ break;
+ case 'M':
+ opt.cpu_hz=hz_base * MHZ;
+ break;
+ case 'K':
+ opt.cpu_hz=hz_base * KHZ;
+ break;
+ default:
+ fprintf(stderr, "Unknown suffix %c\n", *next_ptr);
+ exit(1);
+ }
+}
+
+/* WARNING not thread-safe */
+char * stringify_cpu_hz(long long cpu_hz) {
+ static char cpu_string[20], suffix;
+ float hz;
+
+ if(cpu_hz > GHZ) {
+ hz = (float)cpu_hz / GHZ;
+ suffix = 'G';
+ } else if(cpu_hz > MHZ) {
+ hz = (float)cpu_hz / MHZ;
+ suffix = 'M';
+ } else if(cpu_hz > KHZ) {
+ hz = (float)cpu_hz / KHZ;
+ suffix = 'k';
+ } else {
+ hz = cpu_hz;
+ suffix = ' ';
+ }
+
+ snprintf(cpu_string, 20, "%1.2lf %cHz", hz, suffix);
+
+ return cpu_string;
+}
+
+int parse_cr3_array(char *arg) {
+ char *p, *q;
+ int n=1, i;
+
+ /* Count the number of commas (and thus the number of elements) */
+ for(p=arg; *p; p++)
+ if(*p == ',')
+ n++;
+
+ opt.interval.cr3.count = n;
+ opt.interval.cr3.values = malloc(n * sizeof(unsigned long long));
+ P.interval.cr3.array = malloc(n * sizeof(struct cr3_value_struct *));
+
+ if(!opt.interval.cr3.values || !P.interval.cr3.array) {
+ fprintf(stderr, "Malloc failed!\n");
+ exit(1);
+ }
+
+ bzero(P.interval.cr3.array, n*sizeof(struct cr3_value_struct *));
+
+ /* Now parse the elements */
+ p = q = arg;
+ for(i=0; i<n; i++) {
+ opt.interval.cr3.values[i] = strtoull(p, &q, 0);
+ if(p == q) {
+ fprintf(stderr, "Bad format: %s\n", q);
+ return -1;
+ }
+ if(*q == ',')
+ q++;
+ else if(*q != '\0') {
+ fprintf(stderr, "Bad format: %s\n", q);
+ return -1;
+ }
+ p=q;
+ }
+
+ return n;
+}
+
+error_t cmd_parser(int key, char *arg, struct argp_state *state)
+{
+ switch (key)
+ {
+ /* Dump group */
+ case OPT_DUMP_COOKED:
+ opt.dump_cooked = 1;
+ G.output_defined = 1;
+ break;
+ case OPT_DUMP_ALL:
+ opt.dump_all = 1;
+ G.output_defined = 1;
+ break;
+ case OPT_DUMP_RAW_READS:
+ opt.dump_raw_reads = 1;
+ G.output_defined = 1;
+ break;
+ case OPT_DUMP_NO_PROCESSING:
+ opt.dump_no_processing = 1;
+ opt.dump_raw_reads = 1;
+ G.output_defined = 1;
+ break;
+ case OPT_DUMP_RAW_PROCESS:
+ opt.dump_raw_process = 1;
+ G.output_defined = 1;
+ break;
+ case OPT_DUMP_IPI_LATENCY:
+ opt.dump_ipi_latency = 1;
+ break;
+ case OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD:
+ opt.dump_trace_volume_on_lost_record = 1;
+ break;
+ /* Extra group */
+ case OPT_WITH_CR3_ENUMERATION:
+ opt.with_cr3_enumeration=1;
+ break;
+ case OPT_WITH_PIO_ENUMERATION:
+ opt.with_pio_enumeration=1;
+ break;
+ case OPT_SHOW_DEFAULT_DOMAIN_SUMMARY:
+ opt.show_default_domain_summary=1;
+ break;
+ case OPT_SAMPLE_SIZE:
+ {
+ char * inval;
+ opt.sample_size = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+ break;
+ }
+ case OPT_SCATTERPLOT_INTERRUPT_EIP:
+ {
+ char * inval;
+ G.output_defined = 1;
+ opt.scatterplot_interrupt_eip=1;
+ opt.scatterplot_interrupt_vector = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+ }
+ break;
+ case OPT_SCATTERPLOT_CPI:
+ G.output_defined = 1;
+ opt.scatterplot_cpi=1;
+ break;
+ case OPT_SCATTERPLOT_UNPIN_PROMOTE:
+ G.output_defined = 1;
+ opt.scatterplot_unpin_promote=1;
+ break;
+ case OPT_SCATTERPLOT_CR3_SWITCH:
+ G.output_defined = 1;
+ opt.scatterplot_cr3_switch=1;
+ break;
+ case OPT_SCATTERPLOT_WAKE_TO_HALT:
+ G.output_defined = 1;
+ opt.scatterplot_wake_to_halt=1;
+ break;
+ case OPT_SCATTERPLOT_IO:
+ {
+ char * inval;
+ G.output_defined = 1;
+ opt.scatterplot_io=1;
+ opt.scatterplot_io_port = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+ }
+ break;
+ case OPT_HISTOGRAM_INTERRUPT_EIP:
+ {
+ char * inval, *p;
+
+ opt.histogram_interrupt_eip=1;
+ opt.histogram_interrupt_vector = (int)strtol(arg, &inval, 0);
+
+ if( inval == arg )
+ argp_usage(state);
+
+ p = inval;
+
+ if(*p == ',')
+ opt.histogram_interrupt_increment = (unsigned long long)strtoull(p+1, &inval, 0);
+ else
+ opt.histogram_interrupt_increment = 0x1000000;
+
+ printf("Making histogram of eips at interrupt %d, increment %llx\n",
+ opt.histogram_interrupt_vector,
+ opt.histogram_interrupt_increment);
+ }
+ break;
+
+ case OPT_INTERVAL_LENGTH:
+ {
+ char * inval;
+
+ opt.interval.msec = (unsigned) (strtof(arg, &inval) * 1000);
+
+ if ( inval == arg )
+ argp_usage(state);
+
+ break;
+ }
+
+ case OPT_INTERVAL_CR3_SCHEDULE_TIME:
+ {
+ if(parse_cr3_array(arg) < 0)
+ goto usage;
+ opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ usage:
+ fprintf(stderr, "Invalid input for cr3_schedule_time\n");
+ argp_usage(state);
+ break;
+ }
+
+ case OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL:
+ opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+
+ case OPT_INTERVAL_CR3_SCHEDULE_ORDERED:
+ opt.interval.output = INTERVAL_CR3_SCHEDULE_ORDERED;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+
+ case OPT_INTERVAL_CR3_SHORT_SUMMARY:
+ {
+ if(parse_cr3_array(arg) < 0 || opt.interval.cr3.count != 1)
+ goto usage;
+ opt.interval.output = INTERVAL_CR3_SHORT_SUMMARY;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ }
+
+ case OPT_INTERVAL_DOMAIN_SHORT_SUMMARY:
+ {
+ char *inval;
+
+ opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+
+ opt.interval.output = INTERVAL_DOMAIN_SHORT_SUMMARY;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ }
+
+ case OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT:
+ {
+ char *inval;
+
+ opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+
+ opt.interval.output = INTERVAL_DOMAIN_GUEST_INTERRUPT;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ }
+
+ case OPT_INTERVAL_DOMAIN_GRANT_MAPS:
+ {
+ char *inval;
+
+ opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+ if( inval == arg )
+ argp_usage(state);
+
+ opt.interval.output = INTERVAL_DOMAIN_GRANT_MAPS;
+ opt.interval_mode = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ }
+
+ /* Summary group */
+ case OPT_SUMMARY:
+ opt.summary = 1;
+ opt.summary_info = 1;
+ G.output_defined = 1;
+ break;
+ /* Guest info group */
+ case OPT_DEFAULT_GUEST_PAGING_LEVELS:
+ {
+ char *inval;
+ opt.default_guest_paging_levels = (int)strtol(arg, &inval, 0);
+ if ( inval == arg )
+ argp_usage(state);
+ }
+ break;
+ case OPT_SYMBOL_FILE:
+ /* FIXME - strcpy */
+ G.symbol_file = arg;
+ break;
+ /* Hardware info group */
+ case OPT_SVM_MODE:
+ opt.svm_mode = 1;
+ break;
+ case OPT_CPU_HZ:
+ parse_cpu_hz(arg);
+ break;
+ break;
+
+ case OPT_PROGRESS:
+ opt.progress = 1;
+ break;
+
+ case ARGP_KEY_ARG:
+ {
+ /* FIXME - strcpy */
+ if (state->arg_num == 0)
+ G.trace_file = arg;
+ else
+ argp_usage(state);
+ }
+ break;
+ case ARGP_KEY_END:
+ {
+ if(opt.interval_mode) {
+ opt.interval.cycles = ( opt.interval.msec * opt.cpu_hz ) / 1000 ;
+ interval_header();
+ }
+
+ if(!G.output_defined)
+ {
+ fprintf(stderr, "No output defined, using summary.\n");
+ opt.summary = 1;
+ opt.summary_info = 1;
+ }
+ fprintf(stderr, "Using %s hardware-assisted virtualization.\n",
+ opt.svm_mode?"SVM":"VMX");
+ }
+ break;
+
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp_option cmd_opts[] = {
+ /* Dump group */
+ { .name = "dump-cooked",
+ .key = OPT_DUMP_COOKED,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump a sanitized summary of vmexit/vmentry.", },
+
+ { .name = "dump-all",
+ .key = OPT_DUMP_ALL,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump all records as they come in.", },
+
+ { .name = "dump-raw-reads",
+ .key = OPT_DUMP_RAW_READS,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump raw data as it's read from disk. Useful mainly for debugging the analysis tool.", },
+
+ { .name = "dump-no-processing",
+ .key = OPT_DUMP_NO_PROCESSING,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Don't do any processing on records other than cpu changes. Implies dump-raw-reads (or you wouldn't get anything).", },
+
+ { .name = "dump-raw-process",
+ .key = OPT_DUMP_RAW_PROCESS,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump raw data as it's processed. Useful mainly for debugging the analysis tool.", },
+
+ { .name = "dump-ipi-latency",
+ .key = OPT_DUMP_IPI_LATENCY,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump IPI latency info as IPIs are delivered (vector 0xd1 only).", },
+
+ { .name = "dump-trace-volume-on-lost-record",
+ .key = OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD,
+ .group = OPT_GROUP_DUMP,
+ .doc = "Dump the volume of trace types in the previous cpu buffer when a lost record is created.", },
+
+ /* Extra processing group */
+ { .name = "with-cr3-enumeration",
+ .key = OPT_WITH_CR3_ENUMERATION,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Keep track of per-cr3 values", },
+
+ { .name = "with-pio-enumeration",
+ .key = OPT_WITH_PIO_ENUMERATION,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Report summary info on indiviaul IO addresses", },
+
+ { .name = "scatterplot-interrupt-eip",
+ .key = OPT_SCATTERPLOT_INTERRUPT_EIP,
+ .arg = "vector",
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of eips as a function of time.", },
+
+ { .name = "scatterplot-cpi",
+ .key = OPT_SCATTERPLOT_CPI,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of cpi.", },
+
+ { .name = "scatterplot-unpin-promote",
+ .key = OPT_SCATTERPLOT_UNPIN_PROMOTE,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of unpins and promotions. If " \
+ "--with-cr3-enumeration is included, promotions include current cr3.", },
+
+ { .name = "scatterplot-cr3-switch",
+ .key = OPT_SCATTERPLOT_CR3_SWITCH,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of cr3 switches.", },
+
+ { .name = "scatterplot-wake-to-halt",
+ .key = OPT_SCATTERPLOT_WAKE_TO_HALT,
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of wake-to-halt.", },
+
+ { .name = "scatterplot-io",
+ .key = OPT_SCATTERPLOT_IO,
+ .arg = "port",
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output scatterplot of io latencies for givein address as a function of time.", },
+
+ { .name = "histogram-interrupt-eip",
+ .key = OPT_HISTOGRAM_INTERRUPT_EIP,
+ .arg = "vector[,increment]",
+ .group = OPT_GROUP_EXTRA,
+ .doc = "Output histograms of eips.", },
+
+ { .name = "interval",
+ .key = OPT_INTERVAL_LENGTH,
+ .arg = "sec",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Interval length to do time-based graphs, in seconds", },
+
+ { .name = "interval-cr3-schedule-time",
+ .key = OPT_INTERVAL_CR3_SCHEDULE_TIME,
+ .arg = "gmfn[,gmfn...]",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with the listed cr3 value(s) every interval.", },
+
+ { .name = "interval-cr3-schedule-time-all",
+ .key = OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL,
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with all cr3 values every interval.", },
+
+ { .name = "interval-cr3-schedule-ordered",
+ .key = OPT_INTERVAL_CR3_SCHEDULE_ORDERED,
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print summary with the top 10 cr3 values every interval.", },
+
+ { .name = "interval-cr3-short-summary",
+ .key = OPT_INTERVAL_CR3_SHORT_SUMMARY,
+ .arg = "gmfn",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with the hvm short summary of cr3 value every interval.", },
+
+ { .name = "interval-domain-short-summary",
+ .key = OPT_INTERVAL_DOMAIN_SHORT_SUMMARY,
+ .arg = "domain-id",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with the hvm short summary of given domain every interval.", },
+
+ { .name = "interval-domain-guest-interrupt",
+ .key = OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT,
+ .arg = "domain-id",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with the guest interrupt count of given domain every interval.", },
+
+ { .name = "interval-domain-grant-maps",
+ .key = OPT_INTERVAL_DOMAIN_GRANT_MAPS,
+ .arg = "domain-id",
+ .group = OPT_GROUP_INTERVAL,
+ .doc = "Print a csv with the grant maps done on behalf of a given domain every interval.", },
+
+ /* Summary group */
+ { .name = "show-default-domain-summary",
+ .key = OPT_SHOW_DEFAULT_DOMAIN_SUMMARY,
+ .group = OPT_GROUP_SUMMARY,
+ .doc = "Show default domain information on summary", },
+
+ { .name = "sample-size",
+ .key = OPT_SAMPLE_SIZE,
+ .arg = "size",
+ .group = OPT_GROUP_SUMMARY,
+ .doc = "Keep [size] samples for percentile purposes. Enter 0 to " \
+ "disable. Default 10240.", },
+
+ { .name = "summary",
+ .key = OPT_SUMMARY,
+ .group = OPT_GROUP_SUMMARY,
+ .doc = "Output a summary", },
+
+ /* Guest info */
+ { .name = "default-guest-paging-levels",
+ .key = OPT_DEFAULT_GUEST_PAGING_LEVELS,
+ .group = OPT_GROUP_GUEST,
+ .arg = "L",
+ .doc = "Default guest paging levels. Mainly necessary for Rio, as Miami traces include guest paging levels where appropriate.", },
+
+ { .name = "symbol-file",
+ .key = OPT_SYMBOL_FILE,
+ .group = OPT_GROUP_GUEST,
+ .arg = "filename",
+ .doc = "A symbol file for interpreting guest eips.", },
+
+ /* Hardware info */
+ { .name = "cpu-hz",
+ .key = OPT_CPU_HZ,
+ .group = OPT_GROUP_HARDWARE,
+ .arg = "HZ",
+ .doc = "Cpu speed of the tracing host, used to convert tsc into seconds.", },
+
+ { .name = "svm-mode",
+ .key = OPT_SVM_MODE,
+ .group = OPT_GROUP_HARDWARE,
+ .doc = "Assume AMD SVM-style vmexit error codes. (Default is Intel VMX.)", },
+
+ { .name = "progress",
+ .key = OPT_PROGRESS,
+ .doc = "Progress dialog. Requires the zenity (GTK+) executable.", },
+
+ { 0 },
+};
+
+const struct argp parser_def = {
+ .options = cmd_opts,
+ .parser = cmd_parser,
+ .args_doc = "[trace file]",
+ .doc = "",
+};
+
+const char *argp_program_version = "xenalyze - XenServer Trunk";
+const char *argp_program_bug_address = "George Dunlap <george.dunlap@eu.citrix.com>";
+
+
+int main(int argc, char *argv[]) {
+
+ argp_parse(&parser_def, argc, argv, 0, NULL, NULL);
+
+ if (G.trace_file == NULL)
+ exit(1);
+
+ if ( (G.fd = open(G.trace_file, O_RDONLY|O_LARGEFILE)) < 0) {
+ perror("open");
+ exit(1);
+ } else {
+ struct stat s;
+ fstat(G.fd, &s);
+ G.file_size = s.st_size;
+ }
+ if (G.symbol_file != NULL)
+ parse_symbol_file(G.symbol_file);
+
+ if(opt.dump_cooked || opt.dump_all)
+ warn = stdout;
+ else
+ warn = stderr;
+
+ init_pcpus();
+
+ if(opt.progress)
+ progress_init();
+
+ process_records();
+
+ if(opt.interval_mode)
+ interval_tail();
+
+ if(opt.summary)
+ summary();
+
+ if(opt.progress)
+ progress_finish();
+
+ return 0;
+}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */