]> xenbits.xensource.com Git - xenalyze.git/commitdiff
Initial commit.
author--list <gdunlap@gdunlap-desktop.(none)>
Thu, 11 Sep 2008 11:36:53 +0000 (12:36 +0100)
committer--list <gdunlap@gdunlap-desktop.(none)>
Thu, 11 Sep 2008 11:36:53 +0000 (12:36 +0100)
Makefile [new file with mode: 0644]
analyze.c [new file with mode: 0644]
analyze.h [new file with mode: 0644]
code-intro.txt [new file with mode: 0644]
dump-raw.c [new file with mode: 0644]
trace.h [new file with mode: 0644]

diff --git a/Makefile b/Makefile
new file mode 100644 (file)
index 0000000..4be0477
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+CC = gcc
+
+CFLAGS += -g
+CFLAGS += -fno-strict-aliasing
+CFLAGS += -std=gnu99
+CFLAGS += -Wall -Wstrict-prototypes
+CFLAGS += -Wno-unused-value
+CFLAGS += -Wdeclaration-after-statement
+
+CFLAGS  += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
+CFLAGS += -mno-tls-direct-seg-refs
+CFLAGS  += -Werror
+
+BIN      = analyze dump-raw
+
+HDRS = trace.h analyze.h
+
+all: $(BIN)
+
+.PHONY: clean
+clean:
+       $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
+
+%: %.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) -o $@ $<
diff --git a/analyze.c b/analyze.c
new file mode 100644 (file)
index 0000000..0ccd852
--- /dev/null
+++ b/analyze.c
@@ -0,0 +1,7935 @@
+#define _XOPEN_SOURCE 600
+#include <stdio.h>
+#include <stdlib.h>
+#include <argp.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "trace.h"
+#include "analyze.h"
+#include <errno.h>
+#include <strings.h>
+#include <string.h>
+#include <assert.h>
+
+typedef unsigned long long tsc_t;
+
+#define DEFAULT_CPU_HZ 2400000000LL
+#define ADDR_SPACE_BITS 48
+#define DEFAULT_SAMPLE_SIZE 10240
+#define DEFAULT_INTERVAL_LENGTH 1000
+
+/* -- Global variables -- */
+struct {
+    int fd;
+    struct symbol_struct * symbols;
+    char * symbol_file;
+    char * trace_file;
+    int output_defined;
+    loff_t file_size;
+    struct {
+        loff_t update_offset;
+        int pipe[2];
+        FILE* out;
+        int pid;
+    } progress;
+} G = {
+    .fd=-1,
+    .symbols = NULL,
+    .symbol_file = NULL,
+    .trace_file = NULL,
+    .output_defined = 0,
+    .file_size = 0,
+    .progress = { .update_offset = 0 },
+};
+
+struct {
+    unsigned 
+        scatterplot_interrupt_eip:1,
+        scatterplot_cpi:1,
+        scatterplot_unpin_promote:1,
+        scatterplot_cr3_switch:1,
+        scatterplot_wake_to_halt:1,
+        scatterplot_io:1,
+        histogram_interrupt_eip:1,
+        interval_mode:1,
+        dump_cooked:1,
+        dump_all:1,
+        dump_raw_process:1,
+        dump_raw_reads:1,
+        dump_no_processing:1,
+        dump_ipi_latency:1,
+        dump_trace_volume_on_lost_record:1,
+        with_cr3_enumeration:1,
+        with_pio_enumeration:1,
+        show_default_domain_summary:1,
+        progress:1,
+        svm_mode:1,
+        summary:1,
+        summary_info;
+    long long cpu_hz;
+    int scatterplot_interrupt_vector;
+    int scatterplot_io_port;
+    int histogram_interrupt_vector;
+    unsigned long long histogram_interrupt_increment;
+    int default_guest_paging_levels;
+    int sample_size;
+    struct {
+        tsc_t cycles;
+        /* Used if interval is specified in seconds to delay calculating
+         * time_interval until all arguments have been processed (specifically,
+         * cpu_hz). */
+        unsigned msec;
+        enum {
+            INTERVAL_CR3_SCHEDULE_TIME,
+            INTERVAL_CR3_SCHEDULE_ORDERED,
+            INTERVAL_CR3_SHORT_SUMMARY,
+            INTERVAL_DOMAIN_SHORT_SUMMARY,
+            INTERVAL_DOMAIN_GUEST_INTERRUPT,
+            INTERVAL_DOMAIN_GRANT_MAPS
+        } output;
+        /* Options for specific interval output types */
+        union {
+            struct {
+                unsigned long long *values;
+                int count;
+            } cr3;
+            struct {
+                int did;
+            } domain;
+        };
+    } interval;
+} opt = {
+    .scatterplot_interrupt_eip=0,
+    .scatterplot_cpi=0,
+    .scatterplot_unpin_promote=0, 
+    .scatterplot_cr3_switch=0,
+    .scatterplot_wake_to_halt=0,
+    .histogram_interrupt_eip=0,
+    .dump_cooked = 0,
+    .dump_all = 0,
+    .dump_raw_process = 0,
+    .dump_raw_reads = 0,
+    .dump_no_processing = 0,
+    .dump_ipi_latency = 0,
+    .dump_trace_volume_on_lost_record = 0,
+    .with_cr3_enumeration = 0,
+    .with_pio_enumeration = 1,
+    .show_default_domain_summary = 0,
+    .progress = 0,
+    .svm_mode = 0,
+    .summary = 0,
+    .cpu_hz = DEFAULT_CPU_HZ,
+    .default_guest_paging_levels = 2,
+    .sample_size = DEFAULT_SAMPLE_SIZE,
+    .interval = { .msec = DEFAULT_INTERVAL_LENGTH },
+};
+
+/* -- on-disk trace buffer definitions -- */
+struct trace_record {
+    unsigned event:28,
+        extra_words:3,
+        cycle_flag:1;
+    union {
+        struct {
+            uint32_t tsc_lo, tsc_hi;
+            uint32_t data[7];
+        } tsc;
+        struct {
+            uint32_t data[7];
+        } notsc;
+    } u;
+};
+
+FILE *warn = NULL;
+
+/* -- General info about a current record -- */
+struct time_struct {
+    unsigned long long time;
+    unsigned long s, ns;
+};
+
+#define DUMP_HEADER_MAX 256
+
+struct record_info {
+    int cpu;
+    tsc_t tsc;
+    union {
+        unsigned event;
+        struct {
+            unsigned minor:12,
+                sub:4,
+                main:12,
+                unused:4;
+        } evt;
+    };
+    int extra_words;
+    int size;
+    uint32_t *d;
+    char dump_header[DUMP_HEADER_MAX];
+    struct time_struct t;
+    struct trace_record rec;
+};
+
+/* -- Summary data -- */
+struct cycle_framework {
+    tsc_t first_tsc, last_tsc, total_cycles;
+};
+
+struct event_cycle_summary {
+    int count, cycles_count, interval_count;
+    long long cycles, interval_cycles;
+    long long *cycles_sample;
+};
+
+struct cycle_summary {
+    int count, interval_count;
+    unsigned long long cycles, interval_cycles;
+    long long *sample;
+};
+
+struct weighted_cpi_summary {
+    int count, interval_count;
+    unsigned long long instructions, interval_instructions;
+    unsigned long long cycles, interval_cycles;
+    float *cpi;
+    unsigned long long *cpi_weight;
+};
+
+/* -- Symbol list information -- */
+#define SYMBOL_ENTRIES_PER_STRUCT 1023
+#define SYMBOL_NAME_SIZE 124
+struct symbol_struct {
+    int count;
+    struct {
+        unsigned long long addr;
+        char name[SYMBOL_NAME_SIZE];
+    } symbols[SYMBOL_ENTRIES_PER_STRUCT];
+    struct symbol_struct *next;
+};
+
+
+void parse_symbol_file(char *fn) {
+    unsigned long long last_addr = 0;
+    FILE * symbol_file;
+    struct symbol_struct ** p=&G.symbols;
+
+    if((symbol_file=fopen(fn, "rb"))==NULL) {
+        fprintf(stderr, "Could not open symbol file %s\n", fn);
+        perror("open");
+        exit(1);
+    }
+    while(!feof(symbol_file)) {
+        /* Allocate a new struct if we need it */
+        if(!*p) {
+            *p = malloc(sizeof(**p));
+            if(!*p) {
+                fprintf(stderr, "Malloc failed!\n");
+                exit(1);
+            }
+            (*p)->count=0;
+            (*p)->next=NULL;
+        }
+
+        /* FIXME -- use SYMBOL_NAME_SIZE */
+        fscanf(symbol_file, "%llx %128s",
+               &(*p)->symbols[(*p)->count].addr,
+               (*p)->symbols[(*p)->count].name);
+
+        if( ((*p)->symbols[(*p)->count].addr > 0)
+            && ((*p)->symbols[(*p)->count].addr < last_addr) )  {
+            fprintf(stderr, "Symbol file not properly ordered: %llx %s < %llx!\n",
+                    (*p)->symbols[(*p)->count].addr,
+                    (*p)->symbols[(*p)->count].name,
+                    last_addr);
+            exit(1);
+        } else
+            last_addr = (*p)->symbols[(*p)->count].addr;
+            
+            
+
+        (*p)->count++;
+
+        /* If this struct is full, point to the next.  It will be allocated
+           if needed. */
+        if((*p)->count == SYMBOL_ENTRIES_PER_STRUCT) {
+            p=&((*p)->next);
+        }
+    }
+}
+
+/* WARNING not thread safe */
+char * find_symbol(unsigned long long addr) {
+    struct symbol_struct * p=G.symbols;
+    int i;
+    char * lastname="ZERO";
+    unsigned long long offset=addr;
+    static char name[128];
+
+    if(!p) {
+        name[0]=0;
+        return name;
+    }
+
+    while(1) {
+        if(!p)
+            goto finish;
+        for(i=0; i<p->count; i++) {
+            if(p->symbols[i].addr > addr)
+                goto finish;
+            else {
+                lastname=p->symbols[i].name;
+                offset=addr - p->symbols[i].addr;
+            }
+        }
+        p=p->next;
+    }
+ finish:
+    snprintf(name, 128, "(%s +%llx)",
+             lastname, offset);
+    return name;
+}
+
+/* -- Eip list data -- */
+enum {
+    EIP_LIST_TYPE_NONE=0,
+    EIP_LIST_TYPE_MAX
+};
+
+struct eip_list_struct {
+    struct eip_list_struct *next;
+    unsigned long long eip;
+    struct event_cycle_summary summary;
+    int type;
+    void * extra;
+};
+
+struct {
+    void (*update)(struct eip_list_struct *, void *);
+    void (*new)(struct eip_list_struct *, void *);
+    void (*dump)(struct eip_list_struct *);
+} eip_list_type[EIP_LIST_TYPE_MAX] = {
+    [EIP_LIST_TYPE_NONE] = {
+        .update=NULL, 
+        .new=NULL,
+        .dump=NULL },
+};
+
+
+/* --- HVM class of events --- */
+
+/*
+ *  -- Algorithms -- 
+ *
+ * Interrupt Wake-to-halt detection
+ *
+ * Purpose: To correlate device interrupts to vcpu runtime.
+ *
+ * Diagram:
+ *  ...
+ *  blocked  -> runnable     <- set to waking
+ *  ...
+ *  runnable -> running
+ *  inj_virq A               <- Note "waking" interrupt
+ *  vmenter                  <- Start tsc of "wake-to-halt" interval.
+                                Turn off 'waking'.
+ *  ...
+ *  inj_virq B               <- Note alternate interrupt
+ *  vmenter                  <- Start tsc of "interrupt-to-halt" interval
+ *  ...
+ *  vmexit                   <- End tsc of "x-to-halt" interval
+ *  running -> blocked       <- Process
+ *
+ *  The "waking" interrupts we want to sub-classify into
+ *  "wake-only" (when interrupt was the only interrupt from wake to halt) and
+ *  "wake-all"  (whether this was the only interrupt or not). 
+ */
+
+/* VMX data */
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MACHINE_CHECK       41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+
+#define HVM_VMX_EXIT_REASON_MAX (EXIT_REASON_TPR_BELOW_THRESHOLD+1)
+
+char * hvm_vmx_exit_reason_name[HVM_VMX_EXIT_REASON_MAX] = {
+    [0] = "NONE",
+       [EXIT_REASON_EXCEPTION_NMI]="EXCEPTION_NMI",
+       [EXIT_REASON_EXTERNAL_INTERRUPT]="EXTERNAL_INTERRUPT",
+       [EXIT_REASON_PENDING_INTERRUPT]="PENDING_INTERRUPT",
+       [EXIT_REASON_TASK_SWITCH]="TASK_SWITCH",
+       [EXIT_REASON_CPUID]="CPUID",
+       [EXIT_REASON_HLT]="HLT",
+       [EXIT_REASON_INVLPG]="INVLPG",
+       [EXIT_REASON_RDPMC]="RDPMC",
+       [EXIT_REASON_RDTSC]="RDTSC",
+       [EXIT_REASON_VMCALL]="VMCALL",
+       [EXIT_REASON_VMCLEAR]="VMCLEAR",
+       [EXIT_REASON_VMLAUNCH]="VMLAUNCH",
+       [EXIT_REASON_VMPTRLD]="VMPTRLD",
+       [EXIT_REASON_VMPTRST]="VMPTRST",
+       [EXIT_REASON_VMREAD]="VMREAD",
+       [EXIT_REASON_VMRESUME]="VMRESUME",
+       [EXIT_REASON_VMWRITE]="VMWRITE",
+       [EXIT_REASON_VMOFF]="VMOFF",
+       [EXIT_REASON_VMON]="VMON",
+       [EXIT_REASON_CR_ACCESS]="CR_ACCESS",
+       [EXIT_REASON_DR_ACCESS]="DR_ACCESS",
+       [EXIT_REASON_IO_INSTRUCTION]="IO_INSTRUCTION",
+       [EXIT_REASON_MSR_READ]="MSR_READ",
+       [EXIT_REASON_MSR_WRITE]="MSR_WRITE",
+       [EXIT_REASON_MWAIT_INSTRUCTION]="MWAIT_INSTRUCTION",
+       [EXIT_REASON_MONITOR_INSTRUCTION]="MONITOR_INSTRUCTION",
+       [EXIT_REASON_PAUSE_INSTRUCTION]="PAUSE_INSTRUCTION",
+       [EXIT_REASON_MACHINE_CHECK]="MACHINE_CHECK",
+       [EXIT_REASON_TPR_BELOW_THRESHOLD]="TPR_BELOW_THRESHOLD",
+};
+
+/* SVM data */
+enum VMEXIT_EXITCODE
+{
+    /* control register read exitcodes */
+    VMEXIT_CR0_READ    =   0,
+    VMEXIT_CR1_READ    =   1,
+    VMEXIT_CR2_READ    =   2,
+    VMEXIT_CR3_READ    =   3,
+    VMEXIT_CR4_READ    =   4,
+    VMEXIT_CR5_READ    =   5,
+    VMEXIT_CR6_READ    =   6,
+    VMEXIT_CR7_READ    =   7,
+    VMEXIT_CR8_READ    =   8,
+    VMEXIT_CR9_READ    =   9,
+    VMEXIT_CR10_READ   =  10,
+    VMEXIT_CR11_READ   =  11,
+    VMEXIT_CR12_READ   =  12,
+    VMEXIT_CR13_READ   =  13,
+    VMEXIT_CR14_READ   =  14,
+    VMEXIT_CR15_READ   =  15,
+
+    /* control register write exitcodes */
+    VMEXIT_CR0_WRITE   =  16,
+    VMEXIT_CR1_WRITE   =  17,
+    VMEXIT_CR2_WRITE   =  18,
+    VMEXIT_CR3_WRITE   =  19,
+    VMEXIT_CR4_WRITE   =  20,
+    VMEXIT_CR5_WRITE   =  21,
+    VMEXIT_CR6_WRITE   =  22,
+    VMEXIT_CR7_WRITE   =  23,
+    VMEXIT_CR8_WRITE   =  24,
+    VMEXIT_CR9_WRITE   =  25,
+    VMEXIT_CR10_WRITE  =  26,
+    VMEXIT_CR11_WRITE  =  27,
+    VMEXIT_CR12_WRITE  =  28,
+    VMEXIT_CR13_WRITE  =  29,
+    VMEXIT_CR14_WRITE  =  30,
+    VMEXIT_CR15_WRITE  =  31,
+
+    /* debug register read exitcodes */
+    VMEXIT_DR0_READ    =  32,
+    VMEXIT_DR1_READ    =  33,
+    VMEXIT_DR2_READ    =  34,
+    VMEXIT_DR3_READ    =  35,
+    VMEXIT_DR4_READ    =  36,
+    VMEXIT_DR5_READ    =  37,
+    VMEXIT_DR6_READ    =  38,
+    VMEXIT_DR7_READ    =  39,
+    VMEXIT_DR8_READ    =  40,
+    VMEXIT_DR9_READ    =  41,
+    VMEXIT_DR10_READ   =  42,
+    VMEXIT_DR11_READ   =  43,
+    VMEXIT_DR12_READ   =  44,
+    VMEXIT_DR13_READ   =  45,
+    VMEXIT_DR14_READ   =  46,
+    VMEXIT_DR15_READ   =  47,
+
+    /* debug register write exitcodes */
+    VMEXIT_DR0_WRITE   =  48,
+    VMEXIT_DR1_WRITE   =  49,
+    VMEXIT_DR2_WRITE   =  50,
+    VMEXIT_DR3_WRITE   =  51,
+    VMEXIT_DR4_WRITE   =  52,
+    VMEXIT_DR5_WRITE   =  53,
+    VMEXIT_DR6_WRITE   =  54,
+    VMEXIT_DR7_WRITE   =  55,
+    VMEXIT_DR8_WRITE   =  56,
+    VMEXIT_DR9_WRITE   =  57,
+    VMEXIT_DR10_WRITE  =  58,
+    VMEXIT_DR11_WRITE  =  59,
+    VMEXIT_DR12_WRITE  =  60,
+    VMEXIT_DR13_WRITE  =  61,
+    VMEXIT_DR14_WRITE  =  62,
+    VMEXIT_DR15_WRITE  =  63,
+
+    /* processor exception exitcodes (VMEXIT_EXCP[0-31]) */
+    VMEXIT_EXCEPTION_DE  =  64, /* divide-by-zero-error */
+    VMEXIT_EXCEPTION_DB  =  65, /* debug */
+    VMEXIT_EXCEPTION_NMI =  66, /* non-maskable-interrupt */
+    VMEXIT_EXCEPTION_BP  =  67, /* breakpoint */
+    VMEXIT_EXCEPTION_OF  =  68, /* overflow */
+    VMEXIT_EXCEPTION_BR  =  69, /* bound-range */
+    VMEXIT_EXCEPTION_UD  =  70, /* invalid-opcode*/
+    VMEXIT_EXCEPTION_NM  =  71, /* device-not-available */
+    VMEXIT_EXCEPTION_DF  =  72, /* double-fault */
+    VMEXIT_EXCEPTION_09  =  73, /* unsupported (reserved) */
+    VMEXIT_EXCEPTION_TS  =  74, /* invalid-tss */
+    VMEXIT_EXCEPTION_NP  =  75, /* segment-not-present */
+    VMEXIT_EXCEPTION_SS  =  76, /* stack */
+    VMEXIT_EXCEPTION_GP  =  77, /* general-protection */
+    VMEXIT_EXCEPTION_PF  =  78, /* page-fault */
+    VMEXIT_EXCEPTION_15  =  79, /* reserved */
+    VMEXIT_EXCEPTION_MF  =  80, /* x87 floating-point exception-pending */
+    VMEXIT_EXCEPTION_AC  =  81, /* alignment-check */
+    VMEXIT_EXCEPTION_MC  =  82, /* machine-check */
+    VMEXIT_EXCEPTION_XF  =  83, /* simd floating-point */
+
+    /* exceptions 20-31 (exitcodes 84-95) are reserved */
+
+    /* ...and the rest of the #VMEXITs */
+    VMEXIT_INTR             =  96,
+    VMEXIT_NMI              =  97,
+    VMEXIT_SMI              =  98,
+    VMEXIT_INIT             =  99,
+    VMEXIT_VINTR            = 100,
+    VMEXIT_CR0_SEL_WRITE    = 101,
+    VMEXIT_IDTR_READ        = 102,
+    VMEXIT_GDTR_READ        = 103,
+    VMEXIT_LDTR_READ        = 104,
+    VMEXIT_TR_READ          = 105,
+    VMEXIT_IDTR_WRITE       = 106,
+    VMEXIT_GDTR_WRITE       = 107,
+    VMEXIT_LDTR_WRITE       = 108,
+    VMEXIT_TR_WRITE         = 109,
+    VMEXIT_RDTSC            = 110,
+    VMEXIT_RDPMC            = 111,
+    VMEXIT_PUSHF            = 112,
+    VMEXIT_POPF             = 113,
+    VMEXIT_CPUID            = 114,
+    VMEXIT_RSM              = 115,
+    VMEXIT_IRET             = 116,
+    VMEXIT_SWINT            = 117,
+    VMEXIT_INVD             = 118,
+    VMEXIT_PAUSE            = 119,
+    VMEXIT_HLT              = 120,
+    VMEXIT_INVLPG           = 121,
+    VMEXIT_INVLPGA          = 122,
+    VMEXIT_IOIO             = 123,
+    VMEXIT_MSR              = 124,
+    VMEXIT_TASK_SWITCH      = 125,
+    VMEXIT_FERR_FREEZE      = 126,
+    VMEXIT_SHUTDOWN         = 127,
+    VMEXIT_VMRUN            = 128,
+    VMEXIT_VMMCALL          = 129,
+    VMEXIT_VMLOAD           = 130,
+    VMEXIT_VMSAVE           = 131,
+    VMEXIT_STGI             = 132,
+    VMEXIT_CLGI             = 133,
+    VMEXIT_SKINIT           = 134,
+    VMEXIT_RDTSCP           = 135,
+    VMEXIT_ICEBP            = 136,
+    VMEXIT_WBINVD           = 137,
+    VMEXIT_MONITOR          = 138,
+    VMEXIT_MWAIT            = 139,
+    VMEXIT_MWAIT_CONDITIONAL= 140,
+    VMEXIT_NPF              = 1024, /* nested paging fault */
+    VMEXIT_INVALID          =  -1
+};
+
+#define HVM_SVM_EXIT_REASON_MAX 1025
+char * hvm_svm_exit_reason_name[HVM_SVM_EXIT_REASON_MAX] = {
+    /* 0-15 */
+    "VMEXIT_CR0_READ",
+    "VMEXIT_CR1_READ",
+    "VMEXIT_CR2_READ",
+    "VMEXIT_CR3_READ",
+    "VMEXIT_CR4_READ",
+    "VMEXIT_CR5_READ",
+    "VMEXIT_CR6_READ",
+    "VMEXIT_CR7_READ",
+    "VMEXIT_CR8_READ",
+    "VMEXIT_CR9_READ",
+    "VMEXIT_CR10_READ",
+    "VMEXIT_CR11_READ",
+    "VMEXIT_CR12_READ",
+    "VMEXIT_CR13_READ",
+    "VMEXIT_CR14_READ",
+    "VMEXIT_CR15_READ",
+    /* 16-31 */
+    "VMEXIT_CR0_WRITE",
+    "VMEXIT_CR1_WRITE",
+    "VMEXIT_CR2_WRITE",
+    "VMEXIT_CR3_WRITE",
+    "VMEXIT_CR4_WRITE",
+    "VMEXIT_CR5_WRITE",
+    "VMEXIT_CR6_WRITE",
+    "VMEXIT_CR7_WRITE",
+    "VMEXIT_CR8_WRITE",
+    "VMEXIT_CR9_WRITE",
+    "VMEXIT_CR10_WRITE",
+    "VMEXIT_CR11_WRITE",
+    "VMEXIT_CR12_WRITE",
+    "VMEXIT_CR13_WRITE",
+    "VMEXIT_CR14_WRITE",
+    "VMEXIT_CR15_WRITE",
+    /* 32-47 */
+    "VMEXIT_DR0_READ",
+    "VMEXIT_DR1_READ",
+    "VMEXIT_DR2_READ",
+    "VMEXIT_DR3_READ",
+    "VMEXIT_DR4_READ",
+    "VMEXIT_DR5_READ",
+    "VMEXIT_DR6_READ",
+    "VMEXIT_DR7_READ",
+    "VMEXIT_DR8_READ",
+    "VMEXIT_DR9_READ",
+    "VMEXIT_DR10_READ",
+    "VMEXIT_DR11_READ",
+    "VMEXIT_DR12_READ",
+    "VMEXIT_DR13_READ",
+    "VMEXIT_DR14_READ",
+    "VMEXIT_DR15_READ",
+    /* 48-63 */
+    "VMEXIT_DR0_WRITE",
+    "VMEXIT_DR1_WRITE",
+    "VMEXIT_DR2_WRITE",
+    "VMEXIT_DR3_WRITE",
+    "VMEXIT_DR4_WRITE",
+    "VMEXIT_DR5_WRITE",
+    "VMEXIT_DR6_WRITE",
+    "VMEXIT_DR7_WRITE",
+    "VMEXIT_DR8_WRITE",
+    "VMEXIT_DR9_WRITE",
+    "VMEXIT_DR10_WRITE",
+    "VMEXIT_DR11_WRITE",
+    "VMEXIT_DR12_WRITE",
+    "VMEXIT_DR13_WRITE",
+    "VMEXIT_DR14_WRITE",
+    "VMEXIT_DR15_WRITE",
+    /* 64-83 */
+    "VMEXIT_EXCEPTION_DE",
+    "VMEXIT_EXCEPTION_DB",
+    "VMEXIT_EXCEPTION_NMI",
+    "VMEXIT_EXCEPTION_BP",
+    "VMEXIT_EXCEPTION_OF",
+    "VMEXIT_EXCEPTION_BR",
+    "VMEXIT_EXCEPTION_UD",
+    "VMEXIT_EXCEPTION_NM",
+    "VMEXIT_EXCEPTION_DF",
+    "VMEXIT_EXCEPTION_09",
+    "VMEXIT_EXCEPTION_TS",
+    "VMEXIT_EXCEPTION_NP",
+    "VMEXIT_EXCEPTION_SS",
+    "VMEXIT_EXCEPTION_GP",
+    "VMEXIT_EXCEPTION_PF",
+    "VMEXIT_EXCEPTION_15",
+    "VMEXIT_EXCEPTION_MF",
+    "VMEXIT_EXCEPTION_AC",
+    "VMEXIT_EXCEPTION_MC",
+    "VMEXIT_EXCEPTION_XF",
+    /* 84-95 */
+    "VMEXIT_EXCEPTION_20",
+    "VMEXIT_EXCEPTION_21",
+    "VMEXIT_EXCEPTION_22",
+    "VMEXIT_EXCEPTION_23",
+    "VMEXIT_EXCEPTION_24",
+    "VMEXIT_EXCEPTION_25",
+    "VMEXIT_EXCEPTION_26",
+    "VMEXIT_EXCEPTION_27",
+    "VMEXIT_EXCEPTION_28",
+    "VMEXIT_EXCEPTION_29",
+    "VMEXIT_EXCEPTION_30",
+    "VMEXIT_EXCEPTION_31",
+    /* 96-99 */
+    "VMEXIT_INTR",
+    "VMEXIT_NMI",
+    "VMEXIT_SMI",
+    "VMEXIT_INIT",
+    /* 100-109 */
+    "VMEXIT_VINTR",
+    "VMEXIT_CR0_SEL_WRITE",
+    "VMEXIT_IDTR_READ",
+    "VMEXIT_GDTR_READ",
+    "VMEXIT_LDTR_READ",
+    "VMEXIT_TR_READ",
+    "VMEXIT_IDTR_WRITE",
+    "VMEXIT_GDTR_WRITE",
+    "VMEXIT_LDTR_WRITE",
+    "VMEXIT_TR_WRITE",
+    /* 110-119 */
+    "VMEXIT_RDTSC",
+    "VMEXIT_RDPMC",
+    "VMEXIT_PUSHF",
+    "VMEXIT_POPF",
+    "VMEXIT_CPUID",
+    "VMEXIT_RSM",
+    "VMEXIT_IRET",
+    "VMEXIT_SWINT",
+    "VMEXIT_INVD",
+    "VMEXIT_PAUSE",
+    /* 120-129 */
+    "VMEXIT_HLT",
+    "VMEXIT_INVLPG",
+    "VMEXIT_INVLPGA",
+    "VMEXIT_IOIO",
+    "VMEXIT_MSR",
+    "VMEXIT_TASK_SWITCH",
+    "VMEXIT_FERR_FREEZE",
+    "VMEXIT_SHUTDOWN",
+    "VMEXIT_VMRUN",
+    "VMEXIT_VMMCALL",
+    /* 130-139 */
+    "VMEXIT_VMLOAD",
+    "VMEXIT_VMSAVE",
+    "VMEXIT_STGI",
+    "VMEXIT_CLGI",
+    "VMEXIT_SKINIT",
+    "VMEXIT_RDTSCP",
+    "VMEXIT_ICEBP",
+    "VMEXIT_WBINVD",
+    "VMEXIT_MONITOR",
+    "VMEXIT_MWAIT",
+    /* 140 */
+    "VMEXIT_MWAIT_CONDITIONAL",
+    [VMEXIT_NPF] = "VMEXIT_NPF", /* nested paging fault */
+};
+
+
+#if ( HVM_VMX_EXIT_REASON_MAX > HVM_SVM_EXIT_REASON_MAX )
+# define HVM_EXIT_REASON_MAX HVM_VMX_EXIT_REASON_MAX
+# error - Strange!
+#else
+# define HVM_EXIT_REASON_MAX HVM_SVM_EXIT_REASON_MAX
+#endif
+
+/* General hvm information */
+#define SPURIOUS_APIC_VECTOR   0xff
+#define ERROR_APIC_VECTOR      0xfe
+#define INVALIDATE_TLB_VECTOR  0xfd
+#define EVENT_CHECK_VECTOR     0xfc
+#define CALL_FUNCTION_VECTOR   0xfb
+#define THERMAL_APIC_VECTOR    0xfa
+#define LOCAL_TIMER_VECTOR     0xf9
+
+#define EXTERNAL_INTERRUPT_MAX 256
+
+/* Stringify numbers */
+char * hvm_extint_vector_name[EXTERNAL_INTERRUPT_MAX] = {
+    [SPURIOUS_APIC_VECTOR] = "SPURIOS_APIC",
+    [ERROR_APIC_VECTOR] =    "ERROR_APIC",
+    [INVALIDATE_TLB_VECTOR]= "INVALIDATE_TLB",
+    [EVENT_CHECK_VECTOR]=    "EVENT_CHECK",
+    [CALL_FUNCTION_VECTOR]=  "CALL_FUNCTION",
+    [THERMAL_APIC_VECTOR]=   "THERMAL_APIC",
+    [LOCAL_TIMER_VECTOR] =   "LOCAL_TIMER",
+};
+
+#define HVM_TRAP_MAX 20
+
+char * hvm_trap_name[HVM_TRAP_MAX] = {
+       [0] =  "Divide",
+       [1] =  "RESERVED",
+       [2] =  "NMI",
+       [3] =  "Breakpoint",
+       [4] =  "Overflow",
+       [5] =  "BOUND",
+       [6] =  "Invalid Op",
+       [7] =  "Coprocessor not present",
+       [8] =  "Double Fault",
+       [9] =  "Coprocessor segment overrun",
+       [10] = "TSS",
+       [11] = "Segment not present",
+       [12] = "Stack-segment fault",
+       [13] = "GP",
+       [14] = "Page fault",
+       [15] = "RESERVED",
+       [16] = "FPU",
+       [17] = "Alignment check",
+       [18] = "Machine check",
+       [19] = "SIMD",
+};
+
+
+enum {
+    HVM_EVENT_HANDLER_PF_XEN = 1,
+    HVM_EVENT_HANDLER_PF_INJECT,
+    HVM_EVENT_HANDLER_INJ_EXC,
+    HVM_EVENT_HANDLER_INJ_VIRQ,
+    HVM_EVENT_HANDLER_REINJ_VIRQ,
+    HVM_EVENT_HANDLER_IO_READ,
+    HVM_EVENT_HANDLER_IO_WRITE,
+    HVM_EVENT_HANDLER_CR_READ, /* 8 */
+    HVM_EVENT_HANDLER_CR_WRITE,
+    HVM_EVENT_HANDLER_DR_READ,
+    HVM_EVENT_HANDLER_DR_WRITE,
+    HVM_EVENT_HANDLER_MSR_READ,
+    HVM_EVENT_HANDLER_MSR_WRITE,
+    HVM_EVENT_HANDLER_CPUID,
+    HVM_EVENT_HANDLER_INTR,
+    HVM_EVENT_HANDLER_NMI, /* 16 */
+    HVM_EVENT_HANDLER_SMI,
+    HVM_EVENT_HANDLER_VMCALL,
+    HVM_EVENT_HANDLER_HLT,
+    HVM_EVENT_HANDLER_INVLPG,
+    HVM_EVENT_HANDLER_MCE,
+    HVM_EVENT_HANDLER_IO_ASSIST,
+    HVM_EVENT_HANDLER_MMIO_ASSIST,
+    HVM_EVENT_HANDLER_CLTS,
+    HVM_EVENT_HANDLER_LMSW,
+    HVM_EVENT_HANDLER_MAX
+};
+char * hvm_event_handler_name[HVM_EVENT_HANDLER_MAX] = {
+    NULL,
+    "pf_xen",
+    "pf_inject",
+    "inj_exc",
+    "inj_virq",
+    "reinj_virq", 
+    "io_read",
+    "io_write",
+    "cr_read", /* 8 */
+    "cr_write",
+    "dr_read",
+    "dr_write",
+    "msr_read",
+    "msr_write",
+    "cpuid",
+    "intr",
+    "nmi", /* 16 */
+    "smi",
+    "vmcall",
+    "hlt",
+    "invlpg",
+    "mce",
+    "io_assist",
+    "mmio_assist",
+    "clts",
+    "lmsw",
+};
+
+enum {
+    HVM_VOL_VMENTRY,
+    HVM_VOL_VMEXIT,
+    HVM_VOL_HANDLER,
+    HVM_VOL_MAX
+};
+
+enum {
+    GUEST_INTERRUPT_CASE_NONE,
+    /* This interrupt woke, no other interrupts until halt */
+    GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE,
+    /* This interrupt woke, maybe another interrupt before halt */
+    GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY,
+    /* Time from interrupt (running) to halt */
+    GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT,
+    GUEST_INTERRUPT_CASE_MAX,
+};
+
+char *guest_interrupt_case_name[] = {
+    [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE]="wake to halt alone",
+    /* This interrupt woke, maybe another interrupt before halt */
+    [GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY]  ="wake to halt any  ",
+    /* Time from interrupt (running) to halt */
+    [GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT] ="intr to halt      ",
+};
+
+char *hvm_vol_name[HVM_VOL_MAX] = {
+    [HVM_VOL_VMENTRY]="vmentry",
+    [HVM_VOL_VMEXIT] ="vmexit",
+    [HVM_VOL_HANDLER]="handler",
+};
+
+enum {
+    HYPERCALL_set_trap_table = 0,
+    HYPERCALL_mmu_update,
+    HYPERCALL_set_gdt,
+    HYPERCALL_stack_switch,
+    HYPERCALL_set_callbacks,
+    HYPERCALL_fpu_taskswitch,
+    HYPERCALL_sched_op_compat,
+    HYPERCALL_platform_op,
+    HYPERCALL_set_debugreg,
+    HYPERCALL_get_debugreg,
+    HYPERCALL_update_descriptor,
+    HYPERCALL_memory_op=12,
+    HYPERCALL_multicall,
+    HYPERCALL_update_va_mapping,
+    HYPERCALL_set_timer_op,
+    HYPERCALL_event_channel_op_compat,
+    HYPERCALL_xen_version,
+    HYPERCALL_console_io,
+    HYPERCALL_physdev_op_compat,
+    HYPERCALL_grant_table_op,
+    HYPERCALL_vm_assist,
+    HYPERCALL_update_va_mapping_otherdomain,
+    HYPERCALL_iret,
+    HYPERCALL_vcpu_op,
+    HYPERCALL_set_segment_base,
+    HYPERCALL_mmuext_op,
+    HYPERCALL_acm_op,
+    HYPERCALL_nmi_op,
+    HYPERCALL_sched_op,
+    HYPERCALL_callback_op,
+    HYPERCALL_xenoprof_op,
+    HYPERCALL_event_channel_op,
+    HYPERCALL_physdev_op,
+    HYPERCALL_hvm_op,
+    HYPERCALL_sysctl,
+    HYPERCALL_domctl,
+    HYPERCALL_kexec_op,
+    HYPERCALL_MAX
+};
+
+char *hypercall_name[HYPERCALL_MAX] = {
+    [HYPERCALL_set_trap_table]="set_trap_table",
+    [HYPERCALL_mmu_update]="mmu_update",
+    [HYPERCALL_set_gdt]="set_gdt",
+    [HYPERCALL_stack_switch]="stack_switch",
+    [HYPERCALL_set_callbacks]="set_callbacks",
+    [HYPERCALL_fpu_taskswitch]="fpu_taskswitch",
+    [HYPERCALL_sched_op_compat]="sched_op(compat)",
+    [HYPERCALL_platform_op]="platform_op",
+    [HYPERCALL_set_debugreg]="set_debugreg",
+    [HYPERCALL_get_debugreg]="get_debugreg",
+    [HYPERCALL_update_descriptor]="update_descriptor",
+    [HYPERCALL_memory_op]="memory_op",
+    [HYPERCALL_multicall]="multicall",
+    [HYPERCALL_update_va_mapping]="update_va_mapping",
+    [HYPERCALL_set_timer_op]="set_timer_op",
+    [HYPERCALL_event_channel_op_compat]="evtchn_op(compat)",
+    [HYPERCALL_xen_version]="xen_version",
+    [HYPERCALL_console_io]="console_io",
+    [HYPERCALL_physdev_op_compat]="physdev_op(compat)",
+    [HYPERCALL_grant_table_op]="grant_table_op",
+    [HYPERCALL_vm_assist]="vm_assist",
+    [HYPERCALL_update_va_mapping_otherdomain]="update_va_mapping_otherdomain",
+    [HYPERCALL_iret]="iret",
+    [HYPERCALL_vcpu_op]="vcpu_op",
+    [HYPERCALL_set_segment_base]="set_segment_base",
+    [HYPERCALL_mmuext_op]="mmuext_op",
+    [HYPERCALL_acm_op]="acm_op",
+    [HYPERCALL_nmi_op]="nmi_op",
+    [HYPERCALL_sched_op]="sched_op",
+    [HYPERCALL_callback_op]="callback_op",
+    [HYPERCALL_xenoprof_op]="xenoprof_op",
+    [HYPERCALL_event_channel_op]="evtchn_op",
+    [HYPERCALL_physdev_op]="physdev_op",
+    [HYPERCALL_hvm_op]="hvm_op",
+    [HYPERCALL_sysctl]="sysctl",
+    [HYPERCALL_domctl]="domctl",
+    [HYPERCALL_kexec_op]="kexec_op"
+};
+
+enum {
+    PF_XEN_EMUL_LVL_0,
+    PF_XEN_EMUL_LVL_1,
+    PF_XEN_EMUL_LVL_2,
+    PF_XEN_EMUL_LVL_3,
+    PF_XEN_EMUL_LVL_4,
+    PF_XEN_EMUL_EARLY_UNSHADOW,
+    PF_XEN_EMUL_SET_CHANGED,
+    PF_XEN_EMUL_SET_UNCHANGED,
+    PF_XEN_EMUL_SET_FLUSH,
+    PF_XEN_EMUL_SET_ERROR,
+    PF_XEN_EMUL_PROMOTE,
+    PF_XEN_EMUL_DEMOTE,
+    PF_XEN_EMUL_PREALLOC_UNPIN,
+    PF_XEN_EMUL_PREALLOC_UNHOOK,
+    PF_XEN_EMUL_MAX,
+};
+
+char * pf_xen_emul_name[PF_XEN_EMUL_MAX] = {
+    [PF_XEN_EMUL_LVL_0]="non-linmap",
+    [PF_XEN_EMUL_LVL_1]="linmap l1",
+    [PF_XEN_EMUL_LVL_2]="linmap l2",
+    [PF_XEN_EMUL_LVL_3]="linmap l3",
+    [PF_XEN_EMUL_LVL_4]="linmap l4",
+    [PF_XEN_EMUL_EARLY_UNSHADOW]="early unshadow",
+    [PF_XEN_EMUL_SET_UNCHANGED]="set unchanged",
+    [PF_XEN_EMUL_SET_CHANGED]="set changed",
+    [PF_XEN_EMUL_SET_FLUSH]="set changed",
+    [PF_XEN_EMUL_SET_ERROR]="set changed",
+    [PF_XEN_EMUL_PROMOTE]="promote",
+    [PF_XEN_EMUL_DEMOTE]="demote",
+    [PF_XEN_EMUL_PREALLOC_UNPIN]="unpin",
+    [PF_XEN_EMUL_PREALLOC_UNHOOK]="unhook",
+};
+
+/* Rio only */
+enum {
+    PF_XEN_NON_EMUL_VA_USER,
+    PF_XEN_NON_EMUL_VA_KERNEL,
+    PF_XEN_NON_EMUL_EIP_USER,
+    PF_XEN_NON_EMUL_EIP_KERNEL,
+    PF_XEN_NON_EMUL_MAX,
+};
+
+char * pf_xen_non_emul_name[PF_XEN_NON_EMUL_MAX] = {
+    [PF_XEN_NON_EMUL_VA_USER]="va user",
+    [PF_XEN_NON_EMUL_VA_KERNEL]="va kernel",
+    [PF_XEN_NON_EMUL_EIP_USER]="eip user",
+    [PF_XEN_NON_EMUL_EIP_KERNEL]="eip kernel",
+};
+
+enum {
+    PF_XEN_FIXUP_PREALLOC_UNPIN,
+    PF_XEN_FIXUP_PREALLOC_UNHOOK,
+    PF_XEN_FIXUP_UNSYNC,
+    PF_XEN_FIXUP_OOS_ADD,
+    PF_XEN_FIXUP_OOS_EVICT,
+    PF_XEN_FIXUP_PROMOTE,
+    PF_XEN_FIXUP_UPDATE_ONLY,
+    PF_XEN_FIXUP_WRMAP,
+    PF_XEN_FIXUP_BRUTE_FORCE,
+    PF_XEN_FIXUP_MAX,
+};
+
+char * pf_xen_fixup_name[PF_XEN_FIXUP_MAX] = {
+    [PF_XEN_FIXUP_PREALLOC_UNPIN] = "unpin",
+    [PF_XEN_FIXUP_PREALLOC_UNHOOK] = "unhook",
+    [PF_XEN_FIXUP_UNSYNC] = "unsync",
+    [PF_XEN_FIXUP_OOS_ADD] = "oos-add",
+    [PF_XEN_FIXUP_OOS_EVICT] = "oos-evict",
+    [PF_XEN_FIXUP_PROMOTE] = "promote",
+    [PF_XEN_FIXUP_UPDATE_ONLY] = "update",
+    [PF_XEN_FIXUP_WRMAP] = "wrmap",
+    [PF_XEN_FIXUP_BRUTE_FORCE] = "wrmap-bf",
+};
+
+enum {
+    PF_XEN_NOT_SHADOW = 1,
+    PF_XEN_FAST_PROPAGATE,
+    PF_XEN_FAST_MMIO,
+    PF_XEN_FALSE_FAST_PATH,
+    PF_XEN_MMIO,
+    PF_XEN_FIXUP,
+    PF_XEN_DOMF_DYING,
+    PF_XEN_EMULATE,
+    PF_XEN_EMULATE_UNSHADOW_USER,
+    PF_XEN_EMULATE_UNSHADOW_EVTINJ,
+    PF_XEN_EMULATE_UNSHADOW_UNHANDLED,
+    PF_XEN_EMULATE_UNSHADOW_HEURISTIC,
+    PF_XEN_LAST_FAULT=PF_XEN_EMULATE_UNSHADOW_HEURISTIC,
+    PF_XEN_NON_EMULATE,
+    PF_XEN_NO_HANDLER,
+    PF_XEN_MAX,
+};
+
+#define SHADOW_PREALLOC_UNPIN 13
+#define SHADOW_RESYNC_FULL 24
+#define SHADOW_RESYNC_ONLY 25
+
+char * pf_xen_name[PF_XEN_MAX] = {
+    [PF_XEN_NOT_SHADOW]="propagate",
+    [PF_XEN_FAST_PROPAGATE]="fast propagate",
+    [PF_XEN_FAST_MMIO]="fast mmio",
+    [PF_XEN_FALSE_FAST_PATH]="false fast path",
+    [PF_XEN_MMIO]="mmio",
+    [PF_XEN_FIXUP]="fixup",
+    [PF_XEN_DOMF_DYING]="dom dying",
+    [PF_XEN_EMULATE]="emulate",
+    [PF_XEN_EMULATE_UNSHADOW_USER]="unshadow:user-mode",
+    [PF_XEN_EMULATE_UNSHADOW_EVTINJ]="unshadow:evt inj",
+    [PF_XEN_EMULATE_UNSHADOW_UNHANDLED]="unshadow:unhandled instr",
+    [PF_XEN_NON_EMULATE]="fixup|mmio",
+    [PF_XEN_NO_HANDLER]="(no handler)",
+};
+
+#define CORR_VA_INVALID (0ULL-1)
+
+struct pf_xen_extra {
+    unsigned long long va;
+    union {
+        unsigned flags;
+        struct {
+            unsigned flag_set_ad:1,
+                flag_set_a:1,
+                flag_shadow_l1_get_ref:1,
+                flag_shadow_l1_put_ref:1,
+                flag_l2_propagate:1,
+                flag_set_changed:1,
+                flag_set_flush:1,
+                flag_set_error:1,
+                flag_demote:1,
+                flag_promote:1,
+                flag_wrmap:1,
+                flag_wrmap_guess_found:1,
+                flag_wrmap_brute_force:1,
+                flag_early_unshadow:1,
+                flag_emulation_2nd_pt_written:1,
+                flag_emulation_last_failed:1,
+                flag_emulate_full_pt:1,
+                flag_prealloc_unhook:1,
+                flag_unsync:1,
+                flag_oos_fixup_add:1,
+                flag_oos_fixup_evict:1;
+        };
+    }; /* Miami + ; fixup & emulate */
+    unsigned long error_code; /* Rio only */
+
+    /* Calculated */
+    int pf_case; /* Rio */
+
+    /* MMIO only */
+    unsigned long long gpa;
+    unsigned long data;
+
+    /* Emulate only */
+    unsigned long long gl1e; /* Miami + */
+    unsigned long long wval; /* Miami */
+    unsigned long long corresponding_va;
+    unsigned int pt_index[5], pt_is_lo;
+    int pt_level;
+
+    /* Other */
+    unsigned long long gfn;
+
+    /* Flags */
+    unsigned corr_valid:1,
+        corr_is_kernel:1,
+        va_is_kernel:1,
+        mmio_data_valid:1,
+        mmio_is_write:1;
+};
+
+struct pcpu_info;
+
+#define GUEST_INTERRUPT_MAX 350
+#define CR_MAX 9
+#define RESYNCS_MAX 17
+#define PF_XEN_FIXUP_UNSYNC_RESYNC_MAX 2
+
+struct hvm_data {
+    /* Summary information */
+    int init;
+    int vmexit_valid;
+    int summary_info;
+    struct vcpu_data *v; /* up-pointer */
+
+    /* SVM / VMX compatibility. FIXME - should be global */
+    char ** exit_reason_name;
+    int exit_reason_max;
+    void (* exit_reason_summary_handler[HVM_EXIT_REASON_MAX])(struct hvm_data *);
+
+    /* Information about particular exit reasons */
+    struct {
+        struct event_cycle_summary exit_reason[HVM_EXIT_REASON_MAX];
+        int extint[EXTERNAL_INTERRUPT_MAX+1];
+        int *extint_histogram;
+        struct event_cycle_summary trap[HVM_TRAP_MAX];
+        struct event_cycle_summary pf_xen[PF_XEN_MAX];
+        struct event_cycle_summary pf_xen_emul[PF_XEN_EMUL_MAX];
+        struct event_cycle_summary pf_xen_emul_early_unshadow[5];
+        struct event_cycle_summary pf_xen_non_emul[PF_XEN_NON_EMUL_MAX];
+        struct event_cycle_summary pf_xen_fixup[PF_XEN_FIXUP_MAX];
+        struct event_cycle_summary pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX+1];
+        struct event_cycle_summary cr_write[CR_MAX];
+        struct event_cycle_summary cr3_write_resyncs[RESYNCS_MAX+1];
+        struct hvm_gi_struct {
+            int count;
+            struct cycle_summary runtime[GUEST_INTERRUPT_CASE_MAX];
+            /* OK, not summary info, but still... */
+            int is_wake;
+            tsc_t start_tsc;
+        } guest_interrupt[GUEST_INTERRUPT_MAX + 1];
+        /* IPI Latency */
+        struct event_cycle_summary ipi_latency;
+        int ipi_count[256];
+        struct io_address *mmio, *pio;
+    } summary;
+
+    /* In-flight accumulation information */
+    union {
+        struct {
+            unsigned port:16,size:16;
+            unsigned long val;
+        } io;
+        struct pf_xen_extra pf_xen;
+        struct {
+            unsigned cr;
+            unsigned long long val;
+            int repromote;
+        } cr_write;
+        struct {
+            unsigned addr;
+            unsigned long long val;
+        } msr;
+        struct {
+            unsigned long event;
+            uint32_t d[4];
+        } generic;
+    } inflight;
+    int resyncs;
+    void (*post_process)(struct hvm_data *);
+    tsc_t exit_tsc, arc_cycles, entry_tsc;
+    unsigned long long rip;
+    unsigned exit_reason, event_handler;
+    char dump_header[256];
+    int short_summary_done:1, prealloc_unpin:1;
+
+    /* Immediate processing */
+    void *d;
+
+    /* Wake-to-halt detection.  See comment above. */
+    struct {
+        unsigned waking:1;
+        /* Wake vector: keep track of time from vmentry until:
+           next halt, or next interrupt */
+        int vector, interrupts, interrupts_wanting_tsc;
+    } w2h;
+};
+
+enum {
+    HVM_SHORT_SUMMARY_EMULATE,
+    HVM_SHORT_SUMMARY_UNSYNC,
+    HVM_SHORT_SUMMARY_FIXUP,
+    HVM_SHORT_SUMMARY_MMIO,
+    HVM_SHORT_SUMMARY_PROPAGATE,
+    HVM_SHORT_SUMMARY_CR3,
+    HVM_SHORT_SUMMARY_VMCALL,
+    HVM_SHORT_SUMMARY_INTERRUPT,
+    HVM_SHORT_SUMMARY_HLT,
+    HVM_SHORT_SUMMARY_OTHER,
+    HVM_SHORT_SUMMARY_MAX,
+};
+
+char *hvm_short_summary_name[HVM_SHORT_SUMMARY_MAX] = {
+    [HVM_SHORT_SUMMARY_EMULATE]  ="emulate",
+    [HVM_SHORT_SUMMARY_UNSYNC]   ="unsync",
+    [HVM_SHORT_SUMMARY_FIXUP]    ="fixup",
+    [HVM_SHORT_SUMMARY_MMIO]     ="mmio",
+    [HVM_SHORT_SUMMARY_PROPAGATE]="propagate",
+    [HVM_SHORT_SUMMARY_CR3]      ="cr3",
+    [HVM_SHORT_SUMMARY_VMCALL]   ="vmcall",
+    [HVM_SHORT_SUMMARY_INTERRUPT]="intr",
+    [HVM_SHORT_SUMMARY_HLT]      ="hlt",
+    [HVM_SHORT_SUMMARY_OTHER]    ="other",
+};
+
+struct hvm_short_summary_struct {
+    struct cycle_summary s[HVM_SHORT_SUMMARY_MAX];
+};
+
+void init_hvm_data(struct hvm_data *h, struct vcpu_data *v) {
+    int i;
+
+    if(h->init)
+        return;
+
+    h->v = v;
+
+    h->init = 1;
+
+    if(opt.svm_mode) {
+        h->exit_reason_max = HVM_SVM_EXIT_REASON_MAX;
+        h->exit_reason_name = hvm_svm_exit_reason_name;
+    } else {
+        h->exit_reason_max = HVM_VMX_EXIT_REASON_MAX;
+        h->exit_reason_name = hvm_vmx_exit_reason_name;
+    }
+
+    if(opt.histogram_interrupt_eip) {
+        int count = ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment);
+        size_t size = count * sizeof(int);
+        h->summary.extint_histogram = malloc(size);
+        if(h->summary.extint_histogram)
+            bzero(h->summary.extint_histogram, size);
+        else {
+            fprintf(stderr, "FATAL: Could not allocate %d bytes for interrupt histogram!\n",
+                    size);
+            exit(1);
+        }
+            
+    }
+    for(i=0; i<GUEST_INTERRUPT_MAX+1; i++)
+        h->summary.guest_interrupt[i].count=0;
+}
+
+/* PV data */
+enum {
+    PV_HYPERCALL=1,
+    PV_TRAP=3,
+    PV_PAGE_FAULT,
+    PV_FORCED_INVALID_OP,
+    PV_EMULATE_PRIVOP,
+    PV_EMULATE_4GB,
+    PV_MATH_STATE_RESTORE,
+    PV_PAGING_FIXUP,
+    PV_GDT_LDT_MAPPING_FAULT,
+    PV_PTWR_EMULATION,
+    PV_PTWR_EMULATION_PAE,
+    PV_MAX
+};
+
+char *pv_name[PV_MAX] = {
+    [PV_HYPERCALL]="hypercall",
+    [PV_TRAP]="trap",
+    [PV_PAGE_FAULT]="page_fault",
+    [PV_FORCED_INVALID_OP]="forced_invalid_op",
+    [PV_EMULATE_PRIVOP]="emulate privop",
+    [PV_EMULATE_4GB]="emulate 4g",
+    [PV_MATH_STATE_RESTORE]="math state restore",
+    [PV_PAGING_FIXUP]="paging fixup",
+    [PV_GDT_LDT_MAPPING_FAULT]="gdt/ldt mapping fault",
+    [PV_PTWR_EMULATION]="writable pt emulation"
+};
+
+#define PV_HYPERCALL_MAX 56
+#define PV_TRAP_MAX 20
+
+struct pv_data {
+    unsigned summary_info:1;
+    int count[PV_MAX];
+    int hypercall_count[PV_HYPERCALL_MAX];
+    int trap_count[PV_TRAP_MAX];
+};
+
+/* Sched data */
+
+enum {
+    SCHED_DOM_ADD=1,
+    SCHED_DOM_REM,
+    SCHED_SLEEP,
+    SCHED_WAKE,
+    SCHED_YIELD,
+    SCHED_BLOCK,
+    SCHED_SHUTDOWN,
+    SCHED_CTL,
+    SCHED_ADJDOM,
+    SCHED_SWITCH,
+    SCHED_S_TIMER_FN,
+    SCHED_T_TIMER_FN,
+    SCHED_DOM_TIMER_FN,
+    SCHED_SWITCH_INFPREV,
+    SCHED_SWITCH_INFNEXT,
+    SCHED_SHUTDOWN_CODE,
+    SCHED_MAX
+};
+
+enum {
+    RUNSTATE_RUNNING=0,
+    RUNSTATE_RUNNABLE,
+    RUNSTATE_BLOCKED,
+    RUNSTATE_OFFLINE,
+    RUNSTATE_LOST,
+    RUNSTATE_QUEUED,
+    RUNSTATE_INIT,
+    RUNSTATE_MAX
+};
+
+char * runstate_name[RUNSTATE_MAX]={
+    [RUNSTATE_RUNNING]= "running",
+    [RUNSTATE_RUNNABLE]="runnable",
+    [RUNSTATE_BLOCKED]= "blocked", /* to be blocked */
+    [RUNSTATE_OFFLINE]= "offline",
+    [RUNSTATE_QUEUED]=  "queued",
+    [RUNSTATE_INIT]=    "init",
+    [RUNSTATE_LOST]=    "lost",
+};
+
+/* Memory data */
+enum {
+    MEM_PAGE_GRANT_MAP = 1,
+    MEM_PAGE_GRANT_UNMAP,
+    MEM_PAGE_GRANT_TRANSFER,
+    MEM_MAX
+};
+
+char *mem_name[MEM_MAX] = {
+    [MEM_PAGE_GRANT_MAP]      = "grant-map",
+    [MEM_PAGE_GRANT_UNMAP]    = "grant-unmap",
+    [MEM_PAGE_GRANT_TRANSFER] = "grant-transfer",
+};
+
+/* Per-unit information. */
+
+struct cr3_value_struct {
+    struct cr3_value_struct * next;
+    struct cr3_value_struct * gnext;
+    unsigned long long gmfn;
+    int cr3_id;
+    unsigned long long first_time, last_time, run_time;
+    struct cycle_summary total_time, guest_time, hv_time;
+    int switch_count, flush_count;
+
+    struct hvm_short_summary_struct hvm;
+
+    struct {
+        int now;
+        int count;
+    } prealloc_unpin;
+
+    struct {
+        unsigned callback:1;
+        unsigned flush_count, switch_count;
+        unsigned fixup_user, emulate_corr_user;
+    } destroy;
+};
+
+#define MAX_CPUS 32
+typedef uint32_t cpu_mask_t;
+
+#define IDLE_DOMAIN 32767
+#define DEFAULT_DOMAIN 32768
+
+struct vlapic_struct {
+    int outstanding_ipis;
+    tsc_t first_ipi_tsc;
+    int ipi_virq_injected;
+};
+
+struct vcpu_data {
+    int vid;
+    struct domain_data *d; /* up-pointer */
+    unsigned activated:1;
+
+    int guest_paging_levels;
+
+    /* Schedule info */
+    struct {
+        int state;
+        tsc_t tsc;
+        /* TSC skew detection/correction */
+        struct last_oldstate_struct {
+            int wrong, actual, pid;
+            tsc_t tsc;
+        } last_oldstate;
+        /* Performance counters */
+        unsigned long long p1_start, p2_start;
+    } runstate;
+    struct pcpu_info *p;
+    tsc_t pcpu_tsc;
+
+    /* Hardware tracking */
+    struct {
+        long long val;
+        tsc_t start_time;
+        struct cr3_value_struct *data;
+    } cr3;
+
+    /* IPI latency tracking */
+    struct vlapic_struct vlapic;
+
+    /* Summary info */
+    struct cycle_framework f;
+    struct cycle_summary runstates[RUNSTATE_MAX];
+    struct weighted_cpi_summary cpi;
+    struct cycle_summary cpu_affinity;
+    enum {
+        VCPU_DATA_NONE=0,
+        VCPU_DATA_HVM,
+        VCPU_DATA_PV
+    } data_type;
+    union {
+        struct hvm_data hvm;
+        struct pv_data pv;
+    };
+};
+
+enum {
+    DOMAIN_RUNSTATE_BLOCKED=0,
+    DOMAIN_RUNSTATE_PARTIAL_RUN,
+    DOMAIN_RUNSTATE_FULL_RUN,
+    DOMAIN_RUNSTATE_PARTIAL_CONTENTION,
+    DOMAIN_RUNSTATE_CONCURRENCY_HAZARD,
+    DOMAIN_RUNSTATE_FULL_CONTENTION,
+    DOMAIN_RUNSTATE_LOST,
+    DOMAIN_RUNSTATE_MAX
+};
+
+char * domain_runstate_name[] = {
+    [DOMAIN_RUNSTATE_BLOCKED]="blocked",
+    [DOMAIN_RUNSTATE_PARTIAL_RUN]="partial run",
+    [DOMAIN_RUNSTATE_FULL_RUN]="full run",
+    [DOMAIN_RUNSTATE_PARTIAL_CONTENTION]="partial contention",
+    [DOMAIN_RUNSTATE_CONCURRENCY_HAZARD]="concurrency_hazard",
+    [DOMAIN_RUNSTATE_FULL_CONTENTION]="full_contention",
+    [DOMAIN_RUNSTATE_LOST]="lost",
+};
+
+struct domain_data {
+    struct domain_data *next;
+    int did;
+    struct vcpu_data *vcpu[MAX_CPUS];
+
+    int max_vid;
+
+    int runstate;
+    tsc_t runstate_tsc;
+    struct cycle_summary total_time;
+    struct cycle_summary runstates[DOMAIN_RUNSTATE_MAX];
+    struct cr3_value_struct *cr3_value_head;
+    struct eip_list_struct *emulate_eip_list;
+    
+    int guest_interrupt[GUEST_INTERRUPT_MAX+1];
+    struct hvm_short_summary_struct hvm_short;
+    struct {
+        int done[MEM_MAX];
+        int done_interval[MEM_MAX];
+
+        int done_for[MEM_MAX];
+        int done_for_interval[MEM_MAX];
+    } memops;
+};
+
+struct domain_data * domain_list=NULL;
+
+struct domain_data default_domain;
+
+enum {
+    TOPLEVEL_GEN=0,
+    TOPLEVEL_SCHED,
+    TOPLEVEL_DOM0OP,
+    TOPLEVEL_HVM,
+    TOPLEVEL_MEM,
+    TOPLEVEL_PV,
+    TOPLEVEL_SHADOW,
+    TOPLEVEL_MAX=12
+};
+
+char * toplevel_name[TOPLEVEL_MAX] = {
+    [TOPLEVEL_GEN]="gen",
+    [TOPLEVEL_SCHED]="sched",
+    [TOPLEVEL_DOM0OP]="dom0op",
+    [TOPLEVEL_HVM]="hvm",
+    [TOPLEVEL_MEM]="mem",
+    [TOPLEVEL_PV]="pv",
+    [TOPLEVEL_SHADOW]="shadow",
+};
+
+struct trace_volume {
+    unsigned long long toplevel[TOPLEVEL_MAX];
+    unsigned long long sched_verbose;
+    unsigned long long hvm[HVM_VOL_MAX];
+} volume;
+
+#define UPDATE_VOLUME(_p,_x,_s) \
+    do {                        \
+        (_p)->volume.total._x += _s;          \
+        (_p)->volume.last_buffer._x += _s;    \
+    } while(0)
+
+void volume_clear(struct trace_volume *vol)
+{
+    bzero(vol, sizeof(*vol));
+}
+
+void volume_summary(struct trace_volume *vol)
+{
+    int j, k;
+    for(j=0; j<TOPLEVEL_MAX; j++)
+        if(vol->toplevel[j]) {
+            printf(" %-6s: %10lld\n",
+                   toplevel_name[j], vol->toplevel[j]);
+            switch(j) {
+            case TOPLEVEL_SCHED:
+                if(vol->sched_verbose)
+                    printf(" +-verbose: %10lld\n",
+                           vol->sched_verbose);
+                break;
+            case TOPLEVEL_HVM:
+                for(k=0; k<HVM_VOL_MAX; k++) {
+                    if(vol->hvm[k])
+                        printf(" +-%-7s: %10lld\n",
+                               hvm_vol_name[k], vol->hvm[k]);
+                }
+                
+                break;
+            }
+        }
+}
+
+struct pcpu_info {
+    /* Information about this pcpu */
+    unsigned active:1, summary:1;
+    int pid;
+
+    /* Information related to scanning thru the file */
+    tsc_t first_tsc, last_tsc, order_tsc;
+    loff_t file_offset;
+    loff_t next_cpu_change_offset;
+    struct record_info ri;
+    int last_cpu_change_pid;
+
+    /* Information related to tsc skew detection / correction */
+    struct {
+        tsc_t offset;
+        cpu_mask_t downstream; /* To detect cycles in dependencies */
+    } tsc_skew;
+
+    /* Information related to domain tracking */
+    struct vcpu_data * current;
+    struct {
+        unsigned active:1,
+            domain_valid:1;
+        unsigned did:16,vid:16;
+        tsc_t tsc;
+    } lost_record;
+
+    /* Record volume */
+    struct {
+        tsc_t buffer_first_tsc,
+            buffer_dom0_runstate_tsc,
+            buffer_dom0_runstate_cycles[RUNSTATE_MAX];
+        int buffer_dom0_runstate;
+        unsigned buffer_size;
+        struct trace_volume total, last_buffer;
+    } volume;
+};
+
+void __fill_in_record_info(struct pcpu_info *p);
+
+#define INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX 10
+
+struct {
+    int max_active_pcpu;
+    loff_t last_epoch_offset;
+    int early_eof;
+    int lost_cpus;
+    tsc_t now;
+    struct cycle_framework f;
+    tsc_t buffer_trace_virq_tsc;
+    struct pcpu_info pcpu[MAX_CPUS];
+
+    struct {
+        int id;
+        /* Invariant: head null => tail null; head !null => tail valid */
+        struct cr3_value_struct *head, **tail;
+    } cr3;
+
+    struct {
+        tsc_t start_tsc;
+        /* Information about specific interval output types */
+        union {
+            struct {
+                struct cr3_value_struct ** array;
+            } cr3;
+            struct {
+                struct domain_data *d;
+                int guest_vector[INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX];
+            } domain;
+        };
+    } interval;
+} P = { 0 };
+
+char * pcpu_string(int pcpu);
+void process_generic(struct record_info *ri);
+void dump_generic(FILE *f, struct record_info *ri);
+ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset);
+
+void cpumask_init(cpu_mask_t *c) {
+    *c = 0UL;
+}
+
+void cpumask_clear(cpu_mask_t *c, int cpu) {
+    *c &= ~(1UL << cpu);
+}
+
+void cpumask_set(cpu_mask_t *c, int cpu) {
+    *c |= (1UL << cpu);
+}
+
+int cpumask_isset(const cpu_mask_t *c, int cpu) {
+    if(*c & (1UL<<cpu))
+        return 1;
+    else
+        return 0;
+}
+
+void cpumask_union(cpu_mask_t *d, const cpu_mask_t *s) {
+    *d |= *s;
+}
+
+/* -- Time code -- */
+
+void cycles_to_time(unsigned long long c, struct time_struct *t) {
+    t->time = ((c) * 1000) / (opt.cpu_hz / 1000000 );    
+    t->s = t->time / 1000000000;                        
+    t->ns = t->time % 1000000000;                       
+}
+
+void abs_cycles_to_time(unsigned long long ac, struct time_struct *t) {
+    if(ac > P.f.first_tsc) {
+        t->time = ((ac - P.f.first_tsc) * 1000) / (opt.cpu_hz / 1000000 );    
+        t->s = t->time / 1000000000;                        
+        t->ns = t->time % 1000000000;
+    } else {
+        t->time = t->s = t->ns = 0;
+    }
+}
+
+tsc_t abs_cycles_to_global(unsigned long long ac) {
+    if(ac > P.f.first_tsc)
+        return ac - P.f.first_tsc;
+    else
+        return 0;
+}
+
+void scatterplot_vs_time(tsc_t atsc, long long y) {
+    struct time_struct t;
+
+    abs_cycles_to_time(atsc, &t);
+
+    printf("%lu.%09lu %lld\n", t.s, t.ns, y);
+}
+
+/* -- Summary Code -- */
+
+/* With compliments to "Numerical Recipes in C", which provided the algorithm
+ * and basic template for this function. */
+long long percentile(long long * A, int N, int ple) {
+    int I, J, L, R, K;
+
+    long long X, W;
+
+    /* Find K, the element # we want */
+    K=N*ple/100;
+
+    /* Set the left and right boundaries of the current search space */
+    L=0; R=N-1;
+
+    while(L < R) {
+        /* X: The value to order everything higher / lower than */
+        X=A[K];
+
+        /* Starting at the left and the right... */
+        I=L;
+        J=R;
+
+        do {
+            /* Find the first element on the left that is out-of-order w/ X */
+            while(A[I]<X)
+                I++;
+            /* Find the first element on the right that is out-of-order w/ X */
+            while(X<A[J])
+                J--;
+
+            /* If we found something out-of-order */
+            if(I<=J) {
+                /* Switch the values */
+                W=A[I];
+                A[I]=A[J];
+                A[J]=W;
+
+                /* And move on */
+                I++; J--;
+            }
+        } while (I <= J); /* Keep going until our pointers meet or pass */
+    
+        /* Re-adjust L and R, based on which element we're looking for */
+        if(J<K)
+            L=I;
+        if(K<I)
+            R=J;
+    }
+
+    return A[K];
+}
+
+float weighted_percentile(float * A, /* values */
+                                       unsigned long long * w, /* weights */
+                                       int N,                  /* total */
+                                       int ple)                /* percentile */
+{
+    int L, R, I, J, K;
+    unsigned long long L_weight, R_weight, I_weight, J_weight,
+        K_weight, N_weight;
+
+    float X, t1;
+    unsigned long long t2;
+
+    int progress;
+
+    /* Calculate total weight */
+    N_weight=0;
+
+    for(I=0; I<N; I++) {
+        assert(w[I]!=0);
+        N_weight += w[I];
+    }
+
+    /* Find K_weight, the target weight we want */
+    K_weight = N_weight * ple / 100;
+
+    /* Set the left and right boundaries of the current search space */
+    L=0;
+    L_weight = 0;
+    R=N-1;
+    R_weight = N_weight - w[R];
+
+    /* Search between L and R, narrowing down until we're done */
+    while(L < R) {
+        /* Chose an ordering value from right in the middle */
+        K = (L + R) >> 1;
+        /* X: The value to order everything higher / lower than */
+        X=A[K];
+
+        /* Starting at the left and the right... */
+        I=L; I_weight = L_weight;
+        J=R; J_weight = R_weight;
+
+        do {
+            /* Find the first element on the left that is out-of-order w/ X */
+            while(A[I]<X) {
+                I_weight += w[I];
+                I++;
+            }
+            /* Find the first element on the right that is out-of-order w/ X */
+            while(X<A[J]) {
+                J_weight -= w[J];
+                J--;
+            }
+
+            /* If we actually found something... */
+            if(I<=J) {
+                /* Switch the values */
+                t1=A[I];
+                A[I]=A[J];
+                A[J]=t1;
+
+                t2=w[I];
+                w[I]=w[J];
+                w[J]=t2;
+
+                /* And move in */
+                I_weight += w[I];
+                I++;
+
+                J_weight -= w[J];
+                J--;
+            }
+        } while (I <= J); /* Keep going until our pointers meet or pass */
+
+        progress = 0;
+    
+        /* Re-adjust L and R, based on which element we're looking for */
+        if(J_weight<K_weight) {
+            progress = 1;
+            L=I; L_weight = I_weight;
+        }
+        if(K_weight<I_weight) {
+            progress = 1;
+            R=J; R_weight = J_weight;
+        }
+    }
+
+    return A[L];
+}
+
+long long self_weighted_percentile(long long * A,
+                                   int N,            /* total */
+                                   int ple)          /* percentile */
+{
+    int L, R, I, J, K;
+    long long L_weight, R_weight, I_weight, J_weight,
+        K_weight, N_weight;
+
+    long long X, t1;
+
+    int progress;
+
+    /* Calculate total weight */
+    N_weight=0;
+
+    for(I=0; I<N; I++) {
+        if(A[I] < 0)
+            fprintf(warn, "%s: Value %lld less than zero!\n",
+                    __func__, A[I]);
+        assert(A[I]!=0);
+        N_weight += A[I];
+    }
+
+    /* Find K_weight, the target weight we want */
+    K_weight = N_weight * ple / 100;
+
+    /* Set the left and right boundaries of the current search space */
+    L=0;
+    L_weight = 0;
+    R=N-1;
+    R_weight = N_weight - A[R];
+
+    /* Search between L and R, narrowing down until we're done */
+    while(L < R) {
+        /* Chose an ordering value from right in the middle */
+        K = (L + R) >> 1;
+        /* X: The value to order everything higher / lower than */
+        X=A[K];
+
+        /* Starting at the left and the right... */
+        I=L; I_weight = L_weight;
+        J=R; J_weight = R_weight;
+
+        do {
+            /* Find the first element on the left that is out-of-order w/ X */
+            while(A[I]<X) {
+                I_weight += A[I];
+                I++;
+            }
+            /* Find the first element on the right that is out-of-order w/ X */
+            while(X<A[J]) {
+                J_weight -= A[J];
+                J--;
+            }
+
+            /* If we actually found something... */
+            if(I<=J) {
+                /* Switch the values */
+                t1=A[I];
+                A[I]=A[J];
+                A[J]=t1;
+
+                /* And move in */
+                I_weight += A[I];
+                I++;
+
+                J_weight -= A[J];
+                J--;
+            }
+        } while (I <= J); /* Keep going until our pointers meet or pass */
+
+        progress = 0;
+    
+        /* Re-adjust L and R, based on which element we're looking for */
+        if(J_weight<K_weight) {
+            progress = 1;
+            L=I; L_weight = I_weight;
+        }
+        if(K_weight<I_weight) {
+            progress = 1;
+            R=J; R_weight = J_weight;
+        }
+    }
+
+    return A[L];
+}
+
+static inline double __cycles_percent(long long cycles, long long total) {
+    return (double)(cycles*100) / total;
+}
+
+static inline double __summary_percent(struct event_cycle_summary *s,
+                                       struct cycle_framework *f) {
+    return __cycles_percent(s->cycles, f->total_cycles);
+}
+
+static inline double summary_percent_global(struct event_cycle_summary *s) {
+    return __summary_percent(s, &P.f);
+}
+
+static inline void update_summary(struct event_cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary.  This algorithm will
+ * generate a sample set that approximates an even sample.  We can
+ * then take the percentiles on this, and get an approximate value. */
+    if(c) {
+        if(opt.sample_size) {
+            int lap = (s->cycles_count/opt.sample_size)+1,
+                index =s->cycles_count % opt.sample_size;
+            if((index - (lap/3))%lap == 0) {
+                if(!s->cycles_sample) {
+                    s->cycles_sample = malloc(sizeof(*s->cycles_sample) * opt.sample_size);
+                    if(!s->cycles_sample) {
+                        fprintf(stderr, "%s: malloc failed!\n", __func__);
+                        exit(1);
+                    }
+                }
+                s->cycles_sample[index]=c;
+            }
+        }
+        s->cycles_count++;
+        s->cycles += c;
+
+        s->interval_count++;
+        s->interval_cycles += c;
+    }
+    s->count++;
+}
+
+static inline void clear_interval_summary(struct event_cycle_summary *s) {
+    s->interval_count = 0;
+    s->interval_cycles = 0;
+}
+
+static inline void update_cycles(struct cycle_summary *s, long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary.  This algorithm will
+ * generate a sample set that approximates an even sample.  We can
+ * then take the percentiles on this, and get an approximate value. */
+    int lap, index;
+
+    assert(c!=0);
+
+    if ( opt.sample_size ) {
+        lap = (s->count/opt.sample_size)+1;
+        index =s->count % opt.sample_size;
+
+        if((index - (lap/3))%lap == 0) {
+            if(!s->sample) {
+                s->sample = malloc(sizeof(*s->sample) * opt.sample_size);
+                if(!s->sample) {
+                    fprintf(stderr, "%s: malloc failed!\n", __func__);
+                    exit(1);
+                }
+            }
+            s->sample[index] = c;
+        }
+    }
+
+    if(c > 0) {
+        s->cycles += c;
+        s->interval_cycles += c;
+    } else {
+        s->cycles += -c;
+        s->interval_cycles += -c;
+    }
+    s->count++;
+    s->interval_count++;
+}
+
+static inline void clear_interval_cycles(struct cycle_summary *s) {
+    s->interval_cycles = 0;
+    s->interval_count = 0;
+}
+
+static inline void update_cpi(struct weighted_cpi_summary *s,
+                              unsigned long long i,
+                              unsigned long long c) {
+/* We don't know ahead of time how many samples there are, and working
+ * with dynamic stuff is a pain, and unnecessary.  This algorithm will
+ * generate a sample set that approximates an even sample.  We can
+ * then take the percentiles on this, and get an approximate value. */
+    int lap, index;
+
+    if ( opt.sample_size ) {
+        lap = (s->count/opt.sample_size)+1;
+        index =s->count % opt.sample_size;
+
+        if((index - (lap/3))%lap == 0) {
+            if(!s->cpi) {
+                assert(!s->cpi_weight);
+
+                s->cpi = malloc(sizeof(*s->cpi) * opt.sample_size);
+                s->cpi_weight = malloc(sizeof(*s->cpi_weight) * opt.sample_size);
+                if(!s->cpi || !s->cpi_weight) {
+                    fprintf(stderr, "%s: malloc failed!\n", __func__);
+                    exit(1);
+                }
+            }
+            assert(s->cpi_weight);
+
+            s->cpi[index] = (float) c / i;
+            s->cpi_weight[index]=c;
+        }
+    }
+
+    s->instructions += i;
+    s->cycles += c;
+    s->count++;
+
+    s->interval_instructions += i;
+    s->interval_cycles += c;
+    s->interval_count++;
+}
+
+static inline void clear_interval_cpi(struct weighted_cpi_summary *s) {
+    s->interval_cycles = 0;
+    s->interval_count = 0;
+    s->interval_instructions = 0;
+}
+
+static inline void print_cpu_affinity(struct cycle_summary *s, char *p) {
+    if(s->count) {
+        long long avg;
+        avg = s->cycles / s->count;
+
+        if ( opt.sample_size ) {
+            long long  p5, p50, p95;
+            int data_size = s->count;
+           if(data_size > opt.sample_size)
+                data_size = opt.sample_size;
+
+            p50 = percentile(s->sample, data_size, 50);
+            p5 = percentile(s->sample, data_size, 5);
+            p95 = percentile(s->sample, data_size, 95);
+
+            printf("%s: %7d %6lld {%6lld|%6lld|%6lld}\n",
+                   p, s->count, avg, p5, p50, p95);
+        } else {
+            printf("%s: %7d %6lld\n",
+                   p, s->count, avg);
+        }
+    }
+}
+
+static inline void print_cpi_summary(struct weighted_cpi_summary *s) {
+    if(s->count) {
+        float avg;
+
+        avg = (float)s->cycles / s->instructions;
+
+        if ( opt.sample_size ) {
+            float p5, p50, p95;
+            int data_size = s->count;
+
+            if(data_size > opt.sample_size)
+                data_size = opt.sample_size;
+
+            p50 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 50);
+            p5 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 5);
+            p95 = weighted_percentile(s->cpi, s->cpi_weight, data_size, 95);
+
+            printf("  CPI summary: %2.2f {%2.2f|%2.2f|%2.2f}\n",
+                   avg, p5, p50, p95);
+        } else {
+            printf("  CPI summary: %2.2f\n", avg);
+        }
+    }
+}
+
+static inline void print_cycle_percent_summary(struct cycle_summary *s,
+                                               tsc_t total, char *p) {
+    if(s->count) {
+        long long avg;
+        double percent, seconds;
+
+        avg = s->cycles / s->count;
+
+        seconds = ((double)s->cycles) / opt.cpu_hz;
+
+        percent = ((double)(s->cycles * 100)) / total;
+
+        if ( opt.sample_size ) {
+            long long p5, p50, p95;
+            int data_size = s->count;
+
+            if(data_size > opt.sample_size)
+                data_size = opt.sample_size;
+
+            p50 = self_weighted_percentile(s->sample, data_size, 50);
+            p5 = self_weighted_percentile(s->sample, data_size, 5);
+            p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+            printf("%s: %7d %5.2lfs %5.2lf%% %6lld {%6lld|%6lld|%6lld}\n",
+                   p, s->count,
+                   seconds,
+                   percent,
+                   avg, p5, p50, p95);
+        } else {
+            printf("%s: %7d %5.2lfs %5.2lf%% %6lld\n",
+                   p, s->count, 
+                   seconds,
+                   percent,
+                   avg);
+        }
+    }
+}
+
+static inline void print_cycle_summary(struct cycle_summary *s, char *p) {
+    if(s->count) {
+        long long avg;
+
+        avg = s->cycles / s->count;
+            
+        if ( opt.sample_size ) {
+            long long p5, p50, p95;
+            int data_size = s->count;
+
+            if(data_size > opt.sample_size)
+                data_size = opt.sample_size;
+
+            p50 = self_weighted_percentile(s->sample, data_size, 50);
+            p5 = self_weighted_percentile(s->sample, data_size, 5);
+            p95 = self_weighted_percentile(s->sample, data_size, 95);
+
+            printf("%s: %7d %5.2lfs %6lld {%6lld|%6lld|%6lld}\n",
+                   p, s->count, ((double)s->cycles)/opt.cpu_hz,
+                   avg, p5, p50, p95);
+        } else {
+            printf("%s: %7d %5.2lfs %6lld\n",
+                   p, s->count, ((double)s->cycles)/opt.cpu_hz, avg);
+        }
+    }
+}
+
+#define PRINT_SUMMARY(_s, _p...)                                        \
+    do {                                                                \
+        if((_s).count) {                                                \
+            if ( opt.sample_size ) {                                    \
+                unsigned long long p5, p50, p95;                        \
+                int data_size=(_s).cycles_count;                        \
+                if(data_size > opt.sample_size)                         \
+                    data_size=opt.sample_size;                          \
+                p50=percentile((_s).cycles_sample, data_size, 50);      \
+                p5=percentile((_s).cycles_sample, data_size, 5);        \
+                p95=percentile((_s).cycles_sample, data_size, 95);      \
+                printf(_p);                                             \
+                printf(" %7d %5.2lfs %5.2lf%% %5lld cyc {%5lld|%5lld|%5lld}\n", \
+                       (_s).count,                                      \
+                       ((double)(_s).cycles)/opt.cpu_hz,                \
+                       summary_percent_global(&(_s)),                   \
+                       (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0, \
+                       p5, p50, p95);                                   \
+            } else {                                                    \
+                printf(_p);                                             \
+                printf(" %7d %5.2lfs %5.2lf%% %5lld cyc\n",             \
+                       (_s).count,                                      \
+                       ((double)(_s).cycles)/opt.cpu_hz,                \
+                       summary_percent_global(&(_s)),                   \
+                       (_s).cycles_count ? (_s).cycles / (_s).cycles_count:0); \
+            }                                                           \
+        }                                                               \
+    } while(0)
+
+void __interval_cycle_percent_output(struct cycle_summary *s, tsc_t cycles) {
+    printf(" %.02lf",
+           __cycles_percent(s->interval_cycles, cycles));
+    clear_interval_cycles(s);
+}
+
+void interval_cycle_percent_output(struct cycle_summary *s) {
+    __interval_cycle_percent_output(s, opt.interval.cycles);
+}
+
+void interval_time_output(void) {
+    struct time_struct t;
+    abs_cycles_to_time(P.interval.start_tsc, &t);
+
+    printf("%lu.%09lu", t.s, t.ns);
+}
+
+void interval_cr3_schedule_time_header(void) {
+    if( opt.interval.cr3.count ) {
+        int i;
+
+        printf("time");
+        for(i=0; i<opt.interval.cr3.count; i++) {
+            printf(" %llx", opt.interval.cr3.values[i]);
+        }
+        printf("\n");
+    }
+    /* Can't see into the future, so no header if cr3 values are
+       not specified. */
+}
+
+void interval_cr3_schedule_time_tail(void) {
+    if( !opt.interval.cr3.count ) {
+        struct cr3_value_struct *cr3;
+
+        printf("time");
+        for(cr3=P.cr3.head; cr3; cr3 = cr3->gnext) {
+            printf(" %llx", cr3->gmfn);
+        }
+        printf("\n");
+    }
+}
+
+void interval_cr3_value_check(struct cr3_value_struct *cr3) {
+    if( opt.interval.cr3.count ) {
+        int i;
+
+        for(i=0; i<opt.interval.cr3.count; i++) {
+            if(cr3->gmfn == opt.interval.cr3.values[i]) {
+                if(P.interval.cr3.array[i]) {
+                    fprintf(stderr, "Fatal: duplicate cr3 value %llx!\n",
+                            cr3->gmfn);
+                    exit(1);
+                }
+                fprintf(stderr, "%s: found gmfn %llx\n",
+                        __func__, cr3->gmfn);
+                P.interval.cr3.array[i] = cr3;
+            }
+        }
+    }
+}
+
+void interval_cr3_schedule_time_output(void) {
+    struct cr3_value_struct *cr3;
+    int i;
+
+    interval_time_output();
+
+    if(opt.interval.cr3.count) {
+        for(i=0; i<opt.interval.cr3.count; i++) {
+            cr3 = P.interval.cr3.array[i];
+            if(cr3) {
+                printf(" %.02lf",
+                       __cycles_percent(cr3->total_time.interval_cycles,
+                                        opt.interval.cycles));
+                clear_interval_cycles(&cr3->total_time);
+            } else {
+                printf(" 0.0");
+            }
+        }
+    } else {
+        struct cr3_value_struct *cr3;
+        for(cr3 = P.cr3.head; cr3; cr3 = cr3->gnext) {
+            printf(" %.02lf",
+                   __cycles_percent(cr3->total_time.interval_cycles,
+                                    opt.interval.cycles));
+            clear_interval_cycles(&cr3->total_time);
+        }
+    }
+    printf("\n");
+}
+
+void interval_cr3_schedule_ordered_output(void) {
+    struct cr3_value_struct *p;
+    int i;
+
+    struct cr3_value_struct **qsort_array;
+    int N=0;
+
+    int cr3_time_compare(const void *_a, const void *_b) {
+        struct cr3_value_struct *a=*(typeof(&a))_a;
+        struct cr3_value_struct *b=*(typeof(&a))_b;
+
+        if(a->total_time.interval_cycles < b->total_time.interval_cycles)
+            return 1;
+        else if(b->total_time.interval_cycles == a->total_time.interval_cycles) {
+            if(a->total_time.interval_count < b->total_time.interval_count)
+                return 1;
+            else if(a->total_time.interval_count == b->total_time.interval_count)
+                return 0;
+            else
+                return -1;
+        } else
+            return -1;
+    }
+
+    for(p=P.cr3.head; p; p=p->gnext)
+        N++;
+
+    if(!N)
+        return;
+
+    qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+    for(i=0, p=P.cr3.head; p; p=p->gnext, i++)
+        qsort_array[i]=p;
+
+    qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+          cr3_time_compare);
+
+    interval_time_output();
+
+    for(i=0; i<N; i++) {
+        p = qsort_array[i];
+        /* Rounding down means this will get ..1]% */
+        if(p->total_time.interval_cycles > 0) {
+            printf(" %8llx: %.02lf %c\n",
+                   p->gmfn,
+                   __cycles_percent(p->total_time.interval_cycles,
+                                    opt.interval.cycles),
+                   (p->first_time > P.interval.start_tsc)?'*':' ');
+        }
+        clear_interval_cycles(&p->total_time);
+    }
+
+    free(qsort_array);
+}
+
+void interval_cr3_short_summary_header(void) {
+    int i;
+
+    printf("time guest");
+    for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+        printf(" %s", hvm_short_summary_name[i]);
+    printf("\n");
+}
+
+void interval_cr3_short_summary_output(void) {
+    struct cycle_summary *hss_array;
+    int i;
+
+    if(P.interval.cr3.array[0]) {
+        struct cr3_value_struct *p = P.interval.cr3.array[0];
+
+        interval_time_output();
+    
+        hss_array = p->hvm.s;
+
+        printf(" %.02lf",
+               __cycles_percent(p->total_time.interval_cycles,
+                                opt.interval.cycles));
+
+        for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) 
+            __interval_cycle_percent_output(hss_array + i,
+                                            p->total_time.interval_cycles);
+
+        clear_interval_cycles(&p->total_time);
+
+        printf("\n");
+    }
+}
+
+void interval_domain_short_summary_header(void) {
+    int i;
+
+    printf("time running");
+    for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++)
+        printf(" %s", hvm_short_summary_name[i]);
+    printf("\n");
+}
+
+void interval_domain_short_summary_output(void) {
+
+    if(P.interval.domain.d) {
+        struct domain_data *d;
+        tsc_t interval_cycles;
+        int i;
+
+        d=P.interval.domain.d;
+
+        interval_cycles = d->total_time.interval_cycles;
+
+        interval_time_output();
+
+        interval_cycle_percent_output(&d->total_time);
+
+        for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) 
+            __interval_cycle_percent_output(d->hvm_short.s + i,
+                                            interval_cycles);
+
+        printf("\n");
+    }
+}
+
+void interval_domain_guest_interrupt(struct hvm_data *h, int vector) {
+    struct domain_data *d = h->v->d;
+    int i;
+
+    /* Check to see if this vector is in the "print list" */
+    for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+        if(P.interval.domain.guest_vector[i] == 0) {
+            P.interval.domain.guest_vector[i] = vector;
+            break;
+        }
+        if(P.interval.domain.guest_vector[i] == vector)
+            break;
+    }
+
+    if(i == INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX) {
+        fprintf(stderr, "FATAL: used up all %d guest interrupt slots!\n",
+                INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX);
+        exit(1);
+    }
+
+    d->guest_interrupt[vector]++;
+}
+
+void interval_domain_guest_interrupt_tail(void) {
+    int i;
+
+    printf("time running");
+    for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+        if(P.interval.domain.guest_vector[i] == 0)
+            break;
+        printf(" %d", P.interval.domain.guest_vector[i]);
+    }
+    printf("\n");
+}
+
+void interval_domain_guest_interrupt_output(void) {
+
+    if(P.interval.domain.d) {
+        struct domain_data *d;
+        int i;
+
+        d=P.interval.domain.d;
+
+        interval_time_output();
+
+        for(i=0; i<INTERVAL_DOMAIN_GUEST_INTERRUPT_MAX; i++) {
+            int v = P.interval.domain.guest_vector[i];
+
+            if(v == 0)
+                break;
+
+            printf(" %d", d->guest_interrupt[v]);
+
+            d->guest_interrupt[v]=0;
+        }
+
+        printf("\n");
+    }
+        
+}
+
+void interval_domain_grant_maps_output(void) {
+
+    if(P.interval.domain.d) {
+        struct domain_data *d;
+
+        d=P.interval.domain.d;
+
+        interval_time_output();
+
+        printf(" %d", d->memops.done_for_interval[MEM_PAGE_GRANT_MAP]);
+
+        d->memops.done_for_interval[MEM_PAGE_GRANT_MAP] = 0;
+
+        printf("\n");
+    }
+}
+         
+/* General interval gateways */
+
+void interval_callback(void) {
+    switch(opt.interval.output) {
+    case INTERVAL_CR3_SCHEDULE_TIME:
+        interval_cr3_schedule_time_output();
+        break;
+    case INTERVAL_CR3_SCHEDULE_ORDERED:
+        interval_cr3_schedule_ordered_output();
+        break;
+    case INTERVAL_CR3_SHORT_SUMMARY:
+        interval_cr3_short_summary_output();
+        break;
+    case INTERVAL_DOMAIN_SHORT_SUMMARY:
+        interval_domain_short_summary_output();
+        break;
+    case INTERVAL_DOMAIN_GUEST_INTERRUPT:
+        interval_domain_guest_interrupt_output();
+        break;
+    case INTERVAL_DOMAIN_GRANT_MAPS:
+        interval_domain_grant_maps_output();
+        break;
+    }
+}
+
+void interval_header(void) {
+    switch(opt.interval.output) {
+    case INTERVAL_CR3_SCHEDULE_TIME:
+        interval_cr3_schedule_time_header();
+        break;
+    case INTERVAL_CR3_SHORT_SUMMARY:
+        interval_cr3_short_summary_header();
+        break;
+    case INTERVAL_DOMAIN_SHORT_SUMMARY:
+        interval_domain_short_summary_header();
+        break;
+    default:
+        break;
+    }
+}
+
+void interval_tail(void) {
+    switch(opt.interval.output) {
+    case INTERVAL_CR3_SCHEDULE_TIME:
+        interval_cr3_schedule_time_tail();
+        break;
+    case INTERVAL_DOMAIN_GUEST_INTERRUPT:
+        interval_domain_guest_interrupt_tail();
+        break;
+    default:
+        break;
+    }
+}
+
+/* -- Eip list data -- */
+
+void update_eip(struct eip_list_struct **head, unsigned long long eip,
+                unsigned long long cycles, int type, void * extra) {
+    struct eip_list_struct *p, **last=head;
+    
+    for(p=*head; p; last = (&p->next), p=p->next)
+        if(p->eip >= eip)
+            break;
+    
+    if(!p || p->eip != eip) {
+        p=malloc(sizeof(*p));
+        if(!p) {
+            perror("malloc failed");
+            exit(1);
+        }
+
+        bzero(p, sizeof(*p));
+
+        p->eip=eip;
+        p->type = type;
+
+        if(eip_list_type[type].new) {
+            eip_list_type[type].new(p, extra);
+        }
+        p->next = *last;
+        *last=p;
+    } else if(p->type != type) {
+        fprintf(stderr, "WARNING, mixed types! %d %d\n", p->type, type);
+    } else if(eip_list_type[type].update) {
+        eip_list_type[type].update(p, extra);
+    }
+
+    update_summary(&p->summary, cycles);
+}
+
+void dump_eip(struct eip_list_struct *head) {
+    struct eip_list_struct *p;
+    int i;
+
+    struct eip_list_struct **qsort_array;
+    int N=0;
+
+    int eip_compare(const void *_a, const void *_b) {
+        struct eip_list_struct *a=*(typeof(&a))_a;
+        struct eip_list_struct *b=*(typeof(&a))_b;
+
+        if(a->summary.cycles < b->summary.cycles)
+            return 1;
+        else if(b->summary.cycles == a->summary.cycles) {
+            if(a->summary.count < b->summary.count)
+                return 1;
+            else if(a->summary.count == b->summary.count)
+                return 0;
+            else
+                return -1;
+        } else
+            return -1;
+    }
+
+    for(p=head; p; p=p->next)
+        N++;
+
+    if(!N)
+        return;
+
+    qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+    for(i=0, p=head; p; p=p->next, i++)
+        qsort_array[i]=p;
+
+    qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+          eip_compare);
+
+    /* WARNING: don't use N after this point unless you copy this variable */
+#if 0
+    if(opt.summary_eip_limit && opt.summary_eip_limit < N)
+        N=opt.summary_eip_limit;
+#endif
+
+    for(i=0; i<N; i++) {
+        p = qsort_array[i];
+        PRINT_SUMMARY(p->summary, "   %8llx%-45s: ",
+                      p->eip,
+                      find_symbol(p->eip));
+        if(eip_list_type[p->type].dump) {
+            eip_list_type[p->type].dump(p);
+        }
+    }
+
+    free(qsort_array);
+}
+
+/* -- HVM code -- */
+struct hvm_pf_xen_record {
+    //unsigned vcpu:16, domain:16;
+    union {
+        struct {
+            unsigned long long va;
+            unsigned long error_code;
+        } x64;
+        struct {
+            unsigned long va;
+            unsigned long error_code;
+        } x32;
+    };
+};
+
+void hvm_update_short_summary(struct hvm_data *h, int element) {
+    struct vcpu_data *v = h->v;
+
+    if(v->cr3.data) 
+        update_cycles(&v->cr3.data->hvm.s[element], h->arc_cycles);
+
+    update_cycles(&v->d->hvm_short.s[element], h->arc_cycles);
+
+    h->short_summary_done=1;
+}
+
+void hvm_short_summary(struct hvm_short_summary_struct *hss,
+                       tsc_t total, char *prefix) {
+    char desc[80];
+    int i;
+
+    for(i=0; i<HVM_SHORT_SUMMARY_MAX; i++) {
+        snprintf(desc, 80, "%s%s", prefix, hvm_short_summary_name[i]);
+        print_cycle_percent_summary(hss->s + i, total, desc);
+    }
+}
+
+void hvm_set_summary_handler(struct hvm_data *h, void (*s)(struct hvm_data *h)) {
+    /* Set summary handler */
+    if(h->exit_reason < h->exit_reason_max)
+    {
+        if(h->exit_reason_summary_handler[h->exit_reason])
+        {
+            if(h->exit_reason_summary_handler[h->exit_reason]!= s)
+                fprintf(warn, "Strange, unexpected summary handler for exit_reason %s (%d!)\n",
+                        h->exit_reason_name[h->exit_reason], h->exit_reason);
+        }
+        else
+        {
+            h->exit_reason_summary_handler[h->exit_reason] = s;
+            fprintf(warn, "Setting exit_reason %s (%d) summary handler\n",
+                    h->exit_reason_name[h->exit_reason], h->exit_reason);
+        }
+    }
+}
+
+#define SIGN_EXTENDED_BITS (~((1ULL<<48)-1))
+#define HIGH_BIT(_v) ((_v) & (1ULL<<47))
+static inline int is_valid_addr64(unsigned long long va)
+{
+    if(HIGH_BIT(va))
+        return ((va & SIGN_EXTENDED_BITS) == SIGN_EXTENDED_BITS);
+    else
+        return ((va & SIGN_EXTENDED_BITS) == 0);
+}
+
+void hvm_pf_xen_summary(struct hvm_data *h) {
+    int i,j, k;
+    
+    for(i=0; i<PF_XEN_MAX; i++)
+    {
+        PRINT_SUMMARY(h->summary.pf_xen[i],
+                      "   %-25s ", pf_xen_name[i]);
+        switch(i){
+        case PF_XEN_NON_EMULATE:
+            for(j=0; j<PF_XEN_NON_EMUL_MAX; j++)
+                PRINT_SUMMARY(h->summary.pf_xen_non_emul[j],
+                              "    *%-13s ", pf_xen_non_emul_name[j]);
+            break;
+        case PF_XEN_EMULATE:
+            for(j=0; j<PF_XEN_EMUL_MAX; j++) {
+                PRINT_SUMMARY(h->summary.pf_xen_emul[j],
+                              "    *%-13s ", pf_xen_emul_name[j]);
+                if(j == PF_XEN_EMUL_EARLY_UNSHADOW) {
+                    int k;
+                    for(k=0; k<5; k++) {
+                        PRINT_SUMMARY(h->summary.pf_xen_emul_early_unshadow[k],
+                                      "      +[%d] ", k);
+                    }
+                }
+            }
+            break;
+        case PF_XEN_FIXUP:
+            for(j=0; j<PF_XEN_FIXUP_MAX; j++) {
+                PRINT_SUMMARY(h->summary.pf_xen_fixup[j],
+                              "    *%-13s ", pf_xen_fixup_name[j]);
+                if(j == PF_XEN_FIXUP_UNSYNC ) {
+                    for(k=0; k<PF_XEN_FIXUP_UNSYNC_RESYNC_MAX; k++) {
+                        PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k],
+                                      "      +[%3d] ", k);
+                    }
+                    PRINT_SUMMARY(h->summary.pf_xen_fixup_unsync_resync[k],
+                                  "      +[max] ");
+                }
+            }
+            break;
+        }
+    }
+}
+
+void pf_preprocess(struct pf_xen_extra *e, int guest_paging_levels)
+{
+    switch(guest_paging_levels) {
+        /* Select a subfield of _bits bits starting at bit _shift from _x */
+#define _SUBFIELD(_bits, _shift, _x)                \
+        (((_x)>>(_shift)) & ((1ULL<<(_bits))-1))
+    case 4:
+        /* Verify sign-extension */
+        if((HIGH_BIT(e->va)
+            &&((e->va & SIGN_EXTENDED_BITS) != SIGN_EXTENDED_BITS))
+           || (!HIGH_BIT(e->va)
+               && ((e->va & SIGN_EXTENDED_BITS) != 0))) {
+            fprintf(warn, "Strange, va %llx not properly sign extended for 4-level pagetables\n",
+                    e->va);
+        }
+        e->pt_index[4]=_SUBFIELD(9,39,e->va);
+        e->pt_index[3]=_SUBFIELD(9,30,e->va);
+        e->pt_index[2]=_SUBFIELD(9,21,e->va);
+        e->pt_index[1]=_SUBFIELD(9,12,e->va);
+        /* These are only useful for the linear-pagetable code */
+        e->pt_index[0]=_SUBFIELD(9,3,e->va);
+        if(e->va & 0x4)
+            e->pt_is_lo=0;
+        break;
+    case 3:
+        e->pt_index[3]=_SUBFIELD(2,30,e->va);
+        e->pt_index[2]=_SUBFIELD(9,21,e->va);
+        e->pt_index[1]=_SUBFIELD(9,12,e->va);
+        /* These are only useful for the linear-pagetable code */
+        e->pt_index[0]=_SUBFIELD(9,3,e->va);
+        if(e->va & 0x4)
+            e->pt_is_lo=0;
+        break;
+    case 2:
+        e->pt_index[2]=_SUBFIELD(10,22,e->va);
+        e->pt_index[1]=_SUBFIELD(10,12,e->va);
+        /* This is only useful for the linear pagetable code */
+        e->pt_index[0]=_SUBFIELD(10,2,e->va);
+        break;
+    case 0:
+        break;
+    default:
+        fprintf(warn, "Don't know how to handle %d-level pagetables\n",
+                guest_paging_levels);
+    }
+
+    e->corresponding_va = CORR_VA_INVALID;
+    e->pt_level = 0;
+    
+    /* Detect accesses to Windows linear pagetables */
+    switch(guest_paging_levels)
+    {
+    case 2:
+        if(e->pt_index[2] == 768) {
+            if(e->pt_index[1] == 768)
+            {
+                e->pt_level = 2;
+                e->corresponding_va=((1UL<<22)-1)
+                    | e->pt_index[0]<<22;
+            }
+            else
+            {
+                e->pt_level = 1;
+                e->corresponding_va = ((1UL<<12)-1)
+                    | e->pt_index[1]<<22
+                    | e->pt_index[0]<<12;
+            }
+        } 
+        break;
+    case 3:
+        if(e->pt_index[3]==3 && (e->pt_index[2]>>2==0))
+        {
+            if(e->pt_index[2]==3 && e->pt_index[1]>>2==0)
+            {
+                if(e->pt_index[1] == 3 && e->pt_index[0]>>2==0)
+                {
+                    e->pt_level = 3;
+                    e->corresponding_va=((1UL<<30)-1)
+                        | e->pt_index[0]<<30;
+                }
+                else
+                {
+                    e->pt_level = 2;
+                    e->corresponding_va=((1UL<<21)-1)
+                        | e->pt_index[1]<<30
+                        | e->pt_index[2]<<21;
+                }
+            }
+            else
+            {
+                e->pt_level = 1;
+                e->corresponding_va = ((1UL<<12)-1)
+                    | e->pt_index[0]<<12
+                    | e->pt_index[1]<<21
+                    | e->pt_index[2]<<30;
+            }
+        } 
+        break;
+    case 4:
+        if(e->pt_index[4] == 0x1ed)
+        {
+            if(e->pt_index[3] == 0x1ed)
+            {
+                if(e->pt_index[2] == 0x1ed)
+                {
+                    if(e->pt_index[1] == 0x1ed)
+                    {
+                        e->pt_level = 4;
+                        e->corresponding_va = ((1ULL<<39)-1)
+                            | (unsigned long long)e->pt_index[0]<<39;
+                    }
+                    else
+                    {
+                        e->pt_level = 3;
+                        e->corresponding_va = ((1ULL<<30)-1)
+                            | (unsigned long long)e->pt_index[0]<<30
+                            | (unsigned long long)e->pt_index[1]<<39;
+                    }
+                }
+                else
+                { 
+                    e->pt_level = 2;
+                    e->corresponding_va = ((1ULL<<21)-1)
+                        | (unsigned long long)e->pt_index[0]<<21
+                        | (unsigned long long)e->pt_index[1]<<30
+                        | (unsigned long long)e->pt_index[2]<<39;
+                }
+            }
+            else
+            {
+                e->pt_level = 1;
+                e->corresponding_va = ((1ULL<<12)-1)
+                    | (unsigned long long)e->pt_index[0]<<12
+                    | (unsigned long long)e->pt_index[1]<<21
+                    | (unsigned long long)e->pt_index[2]<<30
+                    | (unsigned long long)e->pt_index[3]<<39;
+            }
+            
+            if(HIGH_BIT(e->corresponding_va))
+                e->corresponding_va |= SIGN_EXTENDED_BITS;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+void hvm_pf_xen_preprocess(unsigned event, struct hvm_data *h) {
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    struct hvm_pf_xen_record *r = (typeof(r))h->d;
+
+    if(event == TRC_HVM_PF_XEN64)
+    {
+        if(!is_valid_addr64(r->x64.va))
+            fprintf(warn, "%s: invalid va %llx",
+                    __func__, r->x64.va);
+        e->va = r->x64.va;
+        e->error_code = r->x64.error_code;
+    }
+    else
+    {
+        e->va = r->x32.va;
+        e->error_code = r->x32.error_code;
+    }
+
+    if(e->mmio_data_valid)
+        e->pf_case = PF_XEN_MMIO;
+    else
+    {
+        pf_preprocess(e, h->v->guest_paging_levels);
+
+        /* On rio traces, we try to infer emulation by looking for accesses
+           in the linear pagetable */
+        if(e->pt_level > 0)
+            e->pf_case = PF_XEN_EMULATE;
+        else
+            e->pf_case = PF_XEN_NON_EMULATE;
+    }
+}
+
+static inline int is_kernel(int paging_levels, unsigned long long va) {
+    switch(paging_levels) {
+    case 2:
+    case 3:
+        if(va & 0x80000000)
+            return 1;
+        else
+            return 0;
+        break;
+    case 4:
+        if(HIGH_BIT(va))
+            return 1;
+        else return 0;
+    default:
+        return 0;
+    }
+
+}
+
+void hvm_pf_xen_postprocess(struct hvm_data *h) {
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    if(opt.summary_info) {
+        update_summary(&h->summary.pf_xen[e->pf_case],
+                       h->arc_cycles);
+        switch(e->pf_case)
+        {
+        case PF_XEN_EMULATE:
+            update_eip(&h->v->d->emulate_eip_list,
+                       h->rip,
+                       h->arc_cycles,
+                       0, NULL);
+            break;
+        case PF_XEN_NON_EMULATE:
+            if(is_kernel(h->v->guest_paging_levels, h->rip))
+                update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_KERNEL],
+                               h->arc_cycles);
+            else
+                update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_EIP_USER],
+                               h->arc_cycles);
+            if(is_kernel(h->v->guest_paging_levels, e->va))
+                update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_KERNEL],
+                               h->arc_cycles);
+
+            else
+                update_summary(&h->summary.pf_xen_non_emul[PF_XEN_NON_EMUL_VA_USER],
+                               h->arc_cycles);
+        }
+
+        /* Set summary handler */
+        hvm_set_summary_handler(h, hvm_pf_xen_summary);
+    }
+
+    if(opt.dump_cooked)
+    {
+        switch(e->pf_case)
+        {
+        case PF_XEN_EMULATE:
+            printf(" %s pf_xen:emulate va %llx ec %lx eip %llx%s lvl %d corr %llx\n",
+                   h->dump_header, e->va, e->error_code,
+                   h->rip, find_symbol(h->rip),
+                   e->pt_level, e->corresponding_va);
+            break;
+        case PF_XEN_MMIO:
+            printf(" %s pf_xen:mmio va %llx ec %lx eip %llx%s data %lx\n",
+                   h->dump_header, e->va, e->error_code,
+                   h->rip, find_symbol(h->rip),
+                   e->data);
+            break;
+        default:
+            printf(" %s pf_xen va %llx ec %lx eip %llx%s\n",
+                   h->dump_header, e->va, e->error_code,
+                   h->rip, find_symbol(h->rip));
+            break;
+        }
+    }
+}
+
+void hvm_pf_xen_process(struct record_info *ri, struct hvm_data *h) {
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    if(ri->event == TRC_HVM_PF_XEN64
+        && h->v->guest_paging_levels != 4)
+        fprintf(warn, "Strange, PF_XEN64 but guest_paging_levels %d!\n",
+                h->v->guest_paging_levels);
+    else if(ri->event == TRC_HVM_PF_XEN
+            && h->v->guest_paging_levels == 4)
+        fprintf(warn, "Strange, PF_XEN but guest_paging_levels %d!\n",
+                h->v->guest_paging_levels);
+
+    hvm_pf_xen_preprocess(ri->event, h);
+    
+    if(opt.dump_all)
+    {
+        if(e->pf_case == PF_XEN_EMULATE)
+            printf("]%s pf_xen:emulate va %llx ec %lx level %d corr %llx e->pt_index[%d %d %d %d %d]\n",
+                   ri->dump_header, e->va, e->error_code,
+                   e->pt_level, e->corresponding_va,
+                   e->pt_index[0], e->pt_index[1], e->pt_index[2],
+                   e->pt_index[3],
+                   e->pt_index[4]);
+        else
+            printf("]%s pf_xen va %llx ec %lx e->pt_index[%d %d %d %d %d]\n",
+                   ri->dump_header, e->va, e->error_code,
+                   e->pt_index[0], e->pt_index[1], e->pt_index[2],
+                   e->pt_index[3],
+                   e->pt_index[4]);
+    }
+
+    h->post_process = hvm_pf_xen_postprocess;
+}
+
+char * hvm_vlapic_icr_dest_shorthand_name[4] = {
+    "dest_field", "self", "all-inc", "all-exc"
+};
+
+void clear_vlapic(struct vlapic_struct *vla) {
+        vla->outstanding_ipis = 0;
+        vla->first_ipi_tsc = 0;
+        vla->ipi_virq_injected = 0;
+}
+
+void hvm_vlapic_icr_handler(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    union {
+        unsigned long val;
+        struct {
+            unsigned vec:8,
+                delivery_mode:3,
+                dest_mode:1,
+                delivery_status:1,
+                _res1:1,
+                level:1,
+                trigger:1,
+                _res2:2,
+                dest_shorthand:2;
+        };
+    } icr = { .val = e->data };
+
+    if(e->mmio_is_write) {
+        if(opt.dump_all || opt.dump_cooked) {
+            printf("              vlapic icr vec %d %s\n",
+                   icr.vec,
+                   hvm_vlapic_icr_dest_shorthand_name[icr.dest_shorthand]);
+        }
+
+        if(icr.dest_shorthand == 3 && icr.vec == 0xd1)
+        {
+            struct vcpu_data *ov, *v = h->v;
+            struct vlapic_struct *vla;
+            struct domain_data *d = v->d;
+            int i;
+
+            for(i=0; i<MAX_CPUS; i++)
+            {
+                ov = d->vcpu[i];
+                if(!ov || ov == v)
+                    continue;
+
+                if(ov->runstate.state == RUNSTATE_LOST) {
+                    if(opt.dump_all)
+                        fprintf(warn, "%s: v%d in state RUNSTATE_LOST, not counting ipi\n",
+                                __func__, ov->vid);
+                    continue;
+                }
+
+                vla = &ov->vlapic;
+
+                if(!vla->first_ipi_tsc)
+                {
+                    struct record_info *ri;
+                    ri = &v->p->ri;
+                    vla->first_ipi_tsc = ri->tsc;
+                }
+
+                vla->outstanding_ipis++;
+
+                if((opt.dump_all || opt.dump_cooked) &&
+                   (ov->runstate.state != RUNSTATE_RUNNING))
+                    printf(" v%d in state %s (outstanding ipis %d)\n",
+                           ov->vid, runstate_name[ov->runstate.state],
+                           vla->outstanding_ipis);
+            }
+        }
+    } else {
+        /* Read */
+        if(opt.dump_all || opt.dump_cooked) {
+            printf("              vlapic icr status %s\n",
+                   icr.delivery_status?"pending":"idle");
+        }
+    }
+
+}
+
+void hvm_vlapic_eoi_handler(struct hvm_data *h) {
+    if(opt.dump_all || opt.dump_cooked)
+        printf("              vlapic eoi\n");
+}
+
+void hvm_vlapic_handler(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    switch(e->gpa) {
+    case 0xfee00300:
+        hvm_vlapic_icr_handler(h);
+        break;
+    case 0xfee000b0:
+        hvm_vlapic_eoi_handler(h);
+        break;
+    }
+
+}
+
+
+#define HVM_MMIO_ASSIST_WRITE 0x200
+void hvm_mmio_assist_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    union {
+        struct {
+            unsigned long gpa;
+            unsigned long data;
+        } x32;
+        struct {
+            unsigned long long gpa;
+            unsigned long data;
+        } x64;
+    } *r = (typeof(r))h->d;
+
+    union {
+        unsigned event;
+        struct {
+            unsigned minor:8,
+                x64:1,
+                write:2;
+        };
+    } mevt = { .event = ri->event };
+
+    if(mevt.x64) {
+        e->gpa = r->x64.gpa;
+        e->data = r->x64.data;
+        if(ri->extra_words*(sizeof(unsigned long))==sizeof(r->x64))
+            e->mmio_data_valid=1;
+    } else {
+        e->gpa = r->x32.gpa;
+        e->data = r->x32.data;
+        if(ri->extra_words*(sizeof(unsigned long))==sizeof(r->x32))
+            e->mmio_data_valid=1;
+    }
+
+    e->mmio_is_write = mevt.write;
+
+    if(opt.dump_all)
+    {
+        if(e->mmio_data_valid)
+            printf("]%s mmio_assist %c gpa %llx data %lx\n", h->dump_header,
+                   mevt.write?'w':'r', e->gpa, e->data);
+        else
+            printf("]%s mmio_assist %c gpa %llx (no data)\n", h->dump_header,
+                   mevt.write?'w':'r', e->gpa);
+    }
+
+    if((e->gpa & 0xfffff000) == 0xfee00000)
+        hvm_vlapic_handler(h);
+}
+
+void hvm_inj_virq_process(struct record_info *ri, struct hvm_data *h) {
+    struct {
+        int vector, fake;
+    } *r = (typeof(r))h->d;
+
+    if(opt.dump_cooked | opt.dump_all) {
+        printf(" %3lu.%09lu %s inj_virq vec %u  %s\n",
+               ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+               r->vector, r->fake?"fake":"real");
+    }
+    
+    if(opt.summary_info)
+    {
+        int vector = r->vector;
+
+        if(vector >= GUEST_INTERRUPT_MAX)
+            vector = GUEST_INTERRUPT_MAX;
+        h->summary.guest_interrupt[vector].count++;
+
+        if(opt.interval.output == INTERVAL_DOMAIN_GUEST_INTERRUPT)
+            interval_domain_guest_interrupt(h, vector);
+    }
+
+    /* If we're waking, make this the wake vector */
+    if(r->vector < GUEST_INTERRUPT_MAX ) {
+        int vector = r->vector;
+        if ( h->w2h.waking && h->w2h.vector == 0 ) {
+            if(h->summary.guest_interrupt[vector].start_tsc) {
+                fprintf(warn, "Strange, d%dv%d waking && wake_vector 0 but vec %d start_tsc %lld!\n",
+                        h->v->d->did, h->v->vid,
+                        vector,
+                        h->summary.guest_interrupt[vector].start_tsc);
+                exit(1);
+            }
+            if(h->w2h.interrupts)
+                fprintf(warn, "Strange, waking && wake_vector 0 but interrupts_this_wait_to_halt %d!\n",
+                        h->w2h.interrupts);
+
+            if(opt.dump_all)
+                printf(" [w2h] d%dv%d Setting wake_vector %d\n",
+                       h->v->d->did, h->v->vid, vector);
+
+            h->w2h.vector = vector;
+            h->summary.guest_interrupt[vector].is_wake = 1;
+        }
+            
+        if( h->summary.guest_interrupt[vector].start_tsc == 0 ) {
+            /* Note that we want start_tsc set at the next vmentry */
+            h->summary.guest_interrupt[vector].start_tsc = 1;
+            h->w2h.interrupts_wanting_tsc++;
+            h->w2h.interrupts++;
+
+            if(opt.dump_all)
+                printf(" [w2h] d%dv%d Starting vec %d\n",
+                       h->v->d->did, h->v->vid, vector);
+        }
+    }
+
+
+    /* Vista bug */
+    if(r->vector == 0xd1 && h->v->vlapic.outstanding_ipis)
+        h->v->vlapic.ipi_virq_injected = 1;
+}
+
+/* I/O Handling */
+struct io_address {
+    struct io_address *next;
+    unsigned long pa;
+    struct event_cycle_summary summary[2];
+};
+
+void update_io_address(struct io_address ** list, unsigned long pa, int dir,
+                       tsc_t arc_cycles) {
+    struct io_address *p, *q=NULL;
+
+    /* Keep list in order */
+    for(p=*list; p && (p->pa != pa) && (p->pa < pa); q=p, p=p->next);
+    
+    /* If we didn't find it, make a new element. */
+    if(!p || (p->pa != pa)) {
+        if((p=malloc(sizeof(*p)))==NULL) {
+            fprintf(stderr, "malloc() failed.\n");
+            exit(1);
+        }
+        
+        bzero(p, sizeof(*p));
+
+        p->pa=pa;
+    
+        /* If we stopped in the middle or at the end, add it in */
+        if(q) {
+            p->next=q->next;
+            q->next=p;
+        } else {
+            /* Otherwise, we stopped after the first element; put it at the beginning */
+            p->next = *list;
+            *list = p;
+        }
+    }
+    update_summary(&p->summary[dir], arc_cycles);
+}
+
+void hvm_io_address_summary(struct io_address *list, char * s) {
+    if(!list)
+        return;
+
+    printf("%s\n", s);
+
+    for(; list; list=list->next) {
+        PRINT_SUMMARY(list->summary[0], "%8lx:[r] ", list->pa);
+        PRINT_SUMMARY(list->summary[1], "%8lx:[w] ", list->pa);
+    }
+}
+
+void hvm_io_write_postprocess(struct hvm_data *h)
+{
+    if(opt.dump_cooked)
+    {
+        printf(" %s io_write port %x size %u val %lx\n",
+               h->dump_header, h->inflight.io.port,
+               h->inflight.io.size,
+               h->inflight.io.val);
+    }
+    if(opt.with_pio_enumeration)
+        update_io_address(&h->summary.pio, h->inflight.io.port, 1, h->arc_cycles);
+}
+
+void hvm_io_write_process(struct hvm_data *h)
+{
+    struct {
+        unsigned long port;
+        int size;
+        unsigned long val;
+    } *r = (typeof(r))h->d;
+
+    unsigned long long mask = ((1ULL<<(r->size<<3))-1);
+
+    h->inflight.io.port = r->port;
+    h->inflight.io.size = r->size;
+    h->inflight.io.val = r->val & mask;
+
+    if(opt.dump_all)
+    {
+        printf(" %s io_write port %lx size %d val %lx\n",
+               h->dump_header,
+               r->port, r->size, r->val);
+    }
+
+    h->post_process = hvm_io_write_postprocess;
+}
+
+void hvm_io_read_postprocess(struct hvm_data *h)
+{
+    if(opt.dump_cooked)
+    {
+        printf(" %s io_read port %x size %u val %lx\n",
+               h->dump_header, h->inflight.io.port,
+               h->inflight.io.size,
+               h->inflight.io.val);
+    }
+    if(opt.with_pio_enumeration)
+        update_io_address(&h->summary.pio, h->inflight.io.port, 0, h->arc_cycles);
+    if(opt.scatterplot_io && h->inflight.io.port == opt.scatterplot_io_port)
+        scatterplot_vs_time(h->exit_tsc, P.now - h->exit_tsc);
+}
+
+void hvm_io_assist_process(struct hvm_data *h)
+{
+    struct {
+        unsigned long val;
+    } *r = (typeof(r))h->d;
+
+    int size = h->inflight.io.size;
+    unsigned long long mask = ((1ULL<<(size<<3))-1);
+
+    h->inflight.io.val = r->val & mask;
+
+    if(opt.dump_all)
+    {
+        printf(" %s io_assist val %lx\n",
+               h->dump_header,
+               r->val);
+    }
+
+
+    if(!h->post_process)
+        fprintf(warn, "Strange, io_assist but post_process not set!\n");
+}
+
+void hvm_io_read_process(struct hvm_data *h)
+{
+    struct {
+        unsigned long port;
+        int size;
+    } *r = (typeof(r))h->d;
+
+    h->inflight.io.port = r->port;
+    h->inflight.io.size = r->size;
+    h->inflight.io.val = 0;
+
+    if(opt.dump_all)
+    {
+        printf(" %s io_read port %lx size %d\n",
+               h->dump_header,
+               r->port, r->size);
+    }
+
+    h->post_process = hvm_io_read_postprocess;
+}
+
+/* cr_write */
+/* CR3 list */
+void cr3_switch(unsigned long long val, struct hvm_data *h) {
+    struct vcpu_data *v = h->v;
+    /* Really only need absolute tsc here.  Later change to global time. */
+    unsigned long long now = P.now;
+    unsigned long long gmfn = val >> 12;
+
+    if(opt.with_cr3_enumeration) {
+        if(v->cr3.data) {
+            struct cr3_value_struct *cur = v->cr3.data;
+            unsigned long long cycles = now - v->cr3.start_time;
+
+            if(opt.summary_info)
+                update_cycles(&cur->total_time, cycles);
+
+            cur->last_time = now;
+        }
+
+        if(gmfn) {
+            struct cr3_value_struct *p, **last=&v->d->cr3_value_head;
+        
+            /* Always add to tail, so that we get consistent interval
+               ouptut as the number of cr3s grow */
+            for(p=*last; p; last = (&p->next), p=p->next)
+                if(p->gmfn == gmfn)
+                    break;
+
+            if(!p) {
+                if((p=malloc(sizeof(*p)))==NULL) {
+                    fprintf(stderr, "malloc() failed.\n");
+                    exit(1);
+                }
+
+                bzero(p, sizeof(*p));
+
+                p->gmfn = gmfn;
+                p->cr3_id = P.cr3.id;
+                p->first_time = now;
+
+                p->next=*last;
+                *last=p;
+
+                p->gnext = NULL;
+                if(P.cr3.head)
+                    *P.cr3.tail = p;
+                else
+                    P.cr3.head = p;
+                P.cr3.tail = &p->gnext;
+                
+                P.cr3.id++;
+
+                /* Add to the interval list if appropriate */
+                if(v->d->did != DEFAULT_DOMAIN)
+                    interval_cr3_value_check(p);
+            }
+
+            if(p->prealloc_unpin.now) {
+                fprintf(warn, "Re-promoting previously unpinned cr3 %llx!\n",
+                        p->gmfn);
+                p->prealloc_unpin.now = 0;
+                h->inflight.cr_write.repromote = 1;
+            }
+
+            /* Accounting for new toplevel */
+            v->cr3.start_time = now;
+            p->switch_count++;
+            if(p->destroy.callback)
+                p->destroy.switch_count++;
+            v->cr3.data = p;
+        } else {
+            v->cr3.data = NULL;
+        }
+
+        if (opt.scatterplot_cr3_switch) {
+            scatterplot_vs_time(h->exit_tsc,
+                                v->cr3.data ? (v->cr3.data->cr3_id) : 0);
+        }
+    } else {
+        if (opt.scatterplot_cr3_switch)
+            scatterplot_vs_time(h->exit_tsc, gmfn);
+    }
+
+    v->cr3.val = val;
+};
+
+void cr3_prealloc_unpin(struct vcpu_data *v, unsigned long long gmfn) {
+    struct cr3_value_struct *cr3;
+
+    /* Look for it in the list */
+    for(cr3 = v->d->cr3_value_head; cr3; cr3=cr3->next) 
+        if(cr3->gmfn == gmfn)
+            break;
+
+    if(!cr3)
+        return;
+
+    if(cr3->prealloc_unpin.now) 
+        fprintf(warn, "Strange, gmfn %llx multiple unpins w/o access!\n",
+                gmfn);
+
+    cr3->prealloc_unpin.now = 1;
+    cr3->prealloc_unpin.count++;
+    
+    if(opt.dump_all)
+        printf(" cr3 %llx unpinned %d times\n",
+               gmfn, cr3->prealloc_unpin.count);
+}
+
+void cr3_dump_list(struct cr3_value_struct *head){
+    struct cr3_value_struct *p;
+    struct cr3_value_struct **qsort_array;
+    int i, N=0;
+
+    int cr3_compare(const void *_a, const void *_b) {
+        struct cr3_value_struct *a=*(typeof(&a))_a;
+        struct cr3_value_struct *b=*(typeof(&a))_b;
+
+        if(a->total_time.cycles < b->total_time.cycles)
+            return 1;
+        else if(b->total_time.cycles == a->total_time.cycles) {
+            if(a->total_time.count < b->total_time.count)
+                return 1;
+            else if(a->total_time.count == b->total_time.count)
+                return 0;
+            else
+                return -1;
+        } else
+            return -1;
+    }
+
+    if(!head)
+        return;
+
+    /* Count the number of elements */
+    for(p=head; p; p=p->next)
+        N++;
+
+    if(!N)
+        return;
+
+    /* Alloc a struct of the right size */
+    qsort_array = malloc(N * sizeof(struct eip_list_struct *));
+
+    /* Point the array into it */
+    for(i=0, p=head; p; p=p->next, i++)
+        qsort_array[i]=p;
+
+    /* Sort the array by time */
+    qsort(qsort_array, N, sizeof(struct eip_list_struct *),
+          cr3_compare);
+
+    /* WARNING: don't use N after this point unless you copy this variable */
+#if 0
+    if(opt.summary_eip_limit && opt.summary_eip_limit < N)
+        N=opt.summary_eip_limit;
+#endif
+
+    /* Now print the results */
+    printf("    cr3 values:\n");
+    for(i=0; i<N; i++) {
+        char desc[30];
+        struct time_struct first, last;
+
+        p = qsort_array[i];
+
+        abs_cycles_to_time(p->first_time, &first);
+        abs_cycles_to_time(p->last_time, &last);
+
+        
+        snprintf(desc, 30, "  %8llx (id %d)", p->gmfn, p->cr3_id);
+        print_cycle_summary(&p->total_time, desc);
+        snprintf(desc, 30, "          guest");
+        print_cycle_percent_summary(&p->guest_time, p->run_time, desc);
+        snprintf(desc, 30, "          hv   ");
+        print_cycle_percent_summary(&p->hv_time, p->run_time, desc);
+
+        hvm_short_summary(&p->hvm, p->run_time, "           + ");
+#if 0
+        printf("            Seen: %4lu.%09lu-%4lu.%09lu switch %d flush %d\n",
+               first.s, first.ns,
+               last.s, last.ns,
+               p->switch_count, p->flush_count);
+        if(p->destroy.callback)
+            printf("          destroy: flush %u switch %u fixup %u emulate %u\n",
+                   p->destroy.flush_count,
+                   p->destroy.switch_count,
+                   p->destroy.fixup_user,
+                   p->destroy.emulate_corr_user);
+#endif
+    }
+
+    free(qsort_array);
+}
+
+void hvm_cr3_write_summary(struct hvm_data *h) {
+    int j;
+
+    for(j=0; j<RESYNCS_MAX; j++)
+        PRINT_SUMMARY(h->summary.cr3_write_resyncs[j],
+                      "     *[%3d] ", j);
+    PRINT_SUMMARY(h->summary.cr3_write_resyncs[j],
+                  "     *[MAX] ");
+}
+
+void hvm_cr_write_summary(struct hvm_data *h)
+{
+    int i;
+
+    for(i=0; i<CR_MAX; i++)
+    {
+        PRINT_SUMMARY(h->summary.cr_write[i],
+                      "   cr%d ", i);
+        switch(i) {
+        case 3:
+            hvm_cr3_write_summary(h);
+            break;
+        }
+    }
+}
+
+void hvm_cr_write_postprocess(struct hvm_data *h)
+{
+    if(h->inflight.cr_write.cr == 3) {
+        struct vcpu_data *v = h->v;
+        unsigned long long new_val = h->inflight.cr_write.val;
+        unsigned long long oval;
+        int flush=0;
+
+        if(v->cr3.val) {
+            oval = v->cr3.val; 
+
+            if(new_val == oval) {
+                if(v->cr3.data) {
+                    v->cr3.data->flush_count++;
+                    if(v->cr3.data->destroy.callback)
+                        v->cr3.data->destroy.flush_count++;
+                }
+                flush=1;
+            }
+
+            if(opt.dump_cooked)
+            {
+                printf(" %s cr_write cr3 val %llx oval %llx (%d resyncs) %s\n",
+                       h->dump_header, 
+                       new_val,
+                       oval,
+                       h->resyncs,
+                       flush?"flush":"");
+            }
+        } else {
+            if(opt.dump_cooked)
+            {
+                printf(" %s cr_write cr3 val %llx (%d resyncs)\n",
+                       h->dump_header, 
+                       h->inflight.cr_write.val,
+                       h->resyncs);
+            }
+        }
+
+        if(opt.summary_info) {
+            int resyncs = h->resyncs;
+
+            if(resyncs > RESYNCS_MAX)
+                resyncs = RESYNCS_MAX;
+
+            update_summary(&h->summary.cr3_write_resyncs[resyncs],
+                           h->arc_cycles);
+
+            update_summary(&h->summary.cr_write[3],
+                           h->arc_cycles);
+
+            hvm_update_short_summary(h, HVM_SHORT_SUMMARY_CR3);
+        }
+
+        if(!flush)
+            cr3_switch(new_val, h);
+    } else {
+        if(opt.dump_cooked)
+        {
+            printf(" %s cr_write cr%d val %llx\n",
+                   h->dump_header, 
+                   h->inflight.cr_write.cr,
+                   h->inflight.cr_write.val);
+        }
+
+        if(opt.summary_info)
+        {
+            if(h->inflight.cr_write.cr < CR_MAX)
+                update_summary(&h->summary.cr_write[h->inflight.cr_write.cr],
+                               h->arc_cycles);
+
+        }
+    }
+
+    /* Set summary handler */
+    /* FIXME - deal with cr_read_summary */
+    if(h->exit_reason < h->exit_reason_max)
+    {
+        if ( opt.svm_mode ) {
+            /* For svm, only need a summary for cr3 */
+            if ( h->exit_reason == VMEXIT_CR3_WRITE )
+                hvm_set_summary_handler(h, hvm_cr3_write_summary);
+        } else {
+            hvm_set_summary_handler(h, hvm_cr3_write_summary);
+        }
+    }
+
+}
+
+void hvm_cr_write_process(struct hvm_data *h)
+{
+    struct {
+        /* FIXME -- 64-bit values */
+        unsigned cr;
+        unsigned val;
+    } *r = (typeof(r))h->d;
+
+    h->inflight.cr_write.cr = r->cr;
+    h->inflight.cr_write.val = r->val;
+
+    h->post_process = hvm_cr_write_postprocess;
+
+    if(opt.dump_all)
+    {
+        if(r->cr == 3 && h->v->cr3.val) {
+            printf("]%s cr_write cr3 val %llx oval %llx %s\n",
+                   h->dump_header, 
+                   (unsigned long long)r->val,
+                   h->v->cr3.val,
+                   (h->v->cr3.val == r->val)?"flush":"switch");
+        } else {
+            printf(" %s cr_write cr%d val %llx\n",
+                   h->dump_header, 
+                   r->cr, (unsigned long long)r->val);
+
+        }
+    }
+
+}
+
+/* msr_write */
+void hvm_msr_write_summary(struct hvm_data *h)
+{
+}
+
+void hvm_msr_write_postprocess(struct hvm_data *h)
+{
+    if(opt.dump_cooked)
+    {
+        printf(" %s msr_write msr %d val %llx\n",
+               h->dump_header, 
+               h->inflight.msr.addr,
+               h->inflight.msr.val);
+    }
+
+    if(opt.summary_info) {
+    }
+
+    /* Set summary handler */
+    hvm_set_summary_handler(h, hvm_msr_write_summary);
+}
+
+void hvm_msr_write_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned long long val;
+        unsigned long addr;
+    } *r = (typeof(r))h->d;
+
+    if(ri->extra_words != (sizeof(*r)/sizeof(unsigned long)))
+    {
+        fprintf(warn, "FATAL: msr_write extra_words %d, expected %d!\n",
+                ri->extra_words, sizeof(*r)/sizeof(unsigned long));
+        exit(1);
+    } 
+
+    h->inflight.msr.addr = r->addr;
+    h->inflight.msr.val = r->val;
+
+    if(opt.dump_all)
+    {
+        printf(" %s msr_write addr %lx val %llx\n",
+               h->dump_header,
+               r->addr, r->val);
+    }
+
+    h->post_process = hvm_msr_write_postprocess;
+}
+
+/* msr_read */
+void hvm_msr_read_summary(struct hvm_data *h)
+{
+}
+
+void hvm_msr_read_postprocess(struct hvm_data *h)
+{
+    if(opt.dump_cooked)
+    {
+        printf(" %s msr_read msr %d val %llx\n",
+               h->dump_header, 
+               h->inflight.msr.addr,
+               h->inflight.msr.val);
+    }
+
+    if(opt.summary_info) {
+    }
+
+    /* Set summary handler */
+    hvm_set_summary_handler(h, hvm_msr_read_summary);
+}
+
+void hvm_msr_read_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned long long val;
+        unsigned long addr;
+    } *r = (typeof(r))h->d;
+
+    if(ri->extra_words != (sizeof(*r)/sizeof(unsigned long)))
+    {
+        fprintf(warn, "FATAL: msr_read extra_words %d, expected %d!\n",
+                ri->extra_words, sizeof(*r)/sizeof(unsigned long));
+        exit(1);
+    } 
+
+    h->inflight.msr.addr = r->addr;
+    h->inflight.msr.val = r->val;
+
+    if(opt.dump_all)
+    {
+        printf(" %s msr_read addr %lx val %llx\n",
+               h->dump_header,
+               r->addr, r->val);
+    }
+
+    h->post_process = hvm_msr_read_postprocess;
+}
+
+void hvm_vmcall_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned long eax;
+    } *r = (typeof(r))h->d;
+
+    if(opt.dump_all) {
+        if(r->eax < HYPERCALL_MAX)
+            printf(" %s vmcall %2lx (%s)\n",
+                   h->dump_header,
+                   r->eax,
+                   hypercall_name[r->eax]);
+        else
+            printf(" %s vmcall %2lx\n",
+                   h->dump_header,
+                   r->eax);
+    }
+
+#if 0
+    if(opt.summary)
+        hvm_set_summary_handler(h, hvm_vmcall_summary);
+#endif
+}
+
+void hvm_inj_exc_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned vec, ec;
+    } *r = (typeof(r))h->d;
+
+    if ( opt.dump_cooked || opt.dump_all )
+    {
+        if(r->vec < HVM_TRAP_MAX)
+            printf(" %3lu.%09lu %s inj_exc trap %s ec %x\n",
+                   ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+                   hvm_trap_name[r->vec], r->ec);
+        else
+            printf(" %3lu.%09lu %s inj_exc trap %u ec %x\n",
+                   ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+                   r->vec, r->ec);
+    }
+    
+}
+
+void hvm_intr_summary(struct hvm_data *h)
+{
+    int i;
+
+    for(i=0; i<EXTERNAL_INTERRUPT_MAX; i++)
+        if(h->summary.extint[i])
+        {
+            if(hvm_extint_vector_name[i])
+                printf("  %10s(%3d): %d\n",
+                       hvm_extint_vector_name[i],
+                       i,
+                       h->summary.extint[i]);
+            else
+                printf("            [%3d]: %d\n",
+                       i,
+                       h->summary.extint[i]);
+        }
+    if(h->summary.extint[EXTERNAL_INTERRUPT_MAX])
+        printf("  Other:         : %d\n",
+               h->summary.extint[EXTERNAL_INTERRUPT_MAX]);
+}
+
+
+void hvm_intr_process(struct hvm_data *h)
+{
+    unsigned vec = *(unsigned *)h->d;
+
+    if( (h->rip >> ADDR_SPACE_BITS) != 00
+        && (h->rip >> ADDR_SPACE_BITS) != ((0ULL-1)>> ADDR_SPACE_BITS) ) {
+        fprintf(stderr, "Unexpected rip %llx (shift %llx)\n",
+                h->rip,
+                h->rip >> ADDR_SPACE_BITS);
+        exit(1);
+    }
+
+    if ( opt.dump_cooked || opt.dump_all )
+    {
+        if ( vec < EXTERNAL_INTERRUPT_MAX &&
+             hvm_extint_vector_name[vec] )
+            printf(" %s intr vec %s(%x)\n",
+                   h->dump_header,
+                   hvm_extint_vector_name[vec],
+                   vec);
+        else
+            printf(" %s intr vec %x\n",
+                   h->dump_header, vec);
+    }
+
+    if(opt.scatterplot_interrupt_eip
+       && vec == opt.scatterplot_interrupt_vector)
+    {
+        /* Truncate to 40 bits */
+        unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1);
+
+        /* Want absolute tsc to global tsc */
+        scatterplot_vs_time(h->exit_tsc, rip);
+    }
+
+    if(opt.histogram_interrupt_eip
+       && vec == opt.histogram_interrupt_vector)
+    {
+        /* Truncate to 40 bits */
+        unsigned long long rip = h->rip & ((1ULL << ADDR_SPACE_BITS)-1);
+        unsigned index = rip / opt.histogram_interrupt_increment;
+        
+        h->summary.extint_histogram[index]++;
+    }
+
+    if(opt.summary_info) {
+        if(opt.summary)
+            hvm_set_summary_handler(h, hvm_intr_summary);
+
+        if(vec < EXTERNAL_INTERRUPT_MAX)
+            h->summary.extint[vec]++;
+        else
+            h->summary.extint[EXTERNAL_INTERRUPT_MAX]++;
+    }
+}
+
+void hvm_pf_inject_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned cr2, ec;
+    } *r = (typeof(r))h->d;
+
+    if ( opt.dump_cooked || opt.dump_all )
+    {
+            printf(" %3lu.%09lu %s pf_inject guest_cr2 %x  guest_ec %x\n",
+                   ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+                   r->cr2, r->ec);
+    }
+}
+
+void hvm_exception_nmi_generic_postprocess(struct hvm_data *h)
+{
+    static int warned = 0;
+    if(opt.dump_cooked)
+    {
+        printf(" %s exnmi no_handler\n",
+               h->dump_header);
+    }
+
+    if(opt.summary_info)
+        update_summary(&h->summary.pf_xen[PF_XEN_NO_HANDLER],
+                       h->arc_cycles);
+
+    if(!warned) {
+        warned++;
+        fprintf(warn, "Strange, no handler for exnmi!\n");
+    }
+}
+
+void hvm_generic_postprocess(struct hvm_data *h)
+{
+    if(opt.summary_info) {
+        /* HLT checked at hvm_vmexit_close() */
+    }
+
+    if(opt.dump_cooked)
+    {
+        int i, evt = h->event_handler;
+
+        if(evt < HVM_EVENT_HANDLER_MAX)
+            printf(" %s %s [",
+                   h->dump_header,
+                   hvm_event_handler_name[evt]);
+        else
+            printf(" %s %d [",
+                   h->dump_header,
+                   evt);
+        
+        for(i=0; i<4; i++) {
+            printf(" %x", h->inflight.generic.d[i]);
+        }
+
+        printf(" ]\n");
+    }
+}
+
+void hvm_generic_dump(struct record_info *ri, char * prefix)
+{
+    struct {
+        unsigned vcpu:16, domain:16;
+        unsigned d[4];
+    } *cr = (typeof(cr))ri->d;
+
+    char *evt_string, evt_number[256];
+    unsigned *d;
+    int i, evt = ri->event - TRC_HVM_HANDLER;
+
+    if(evt < HVM_EVENT_HANDLER_MAX)
+    {
+        evt_string = hvm_event_handler_name[evt];
+    }
+    else
+    {
+        snprintf(evt_number, 256, "hvm_handler %d", evt);
+        evt_string = evt_number;
+    }
+
+    d = ri->d;
+    printf("%s%s %s [",
+           prefix,
+           ri->dump_header,
+           evt_string);
+        
+    for(i=0; i<4; i++) 
+    {
+        printf(" %x", d[i]);
+    }
+
+    printf(" ]\n");
+}
+
+void hvm_handler_process(struct record_info *ri, struct hvm_data *h) {
+    /* Wait for first vmexit to initialize */
+    if(!h->init)
+    {
+        if(opt.dump_all)
+            hvm_generic_dump(ri,"!");
+        return;
+    }
+
+    h->d = ri->d;
+
+    /* Handle things that don't need a vmexit */
+    switch(ri->event) {
+    default:
+        goto needs_vmexit;
+        /* Records about changing guest state */
+    case TRC_HVM_PF_INJECT:
+        hvm_pf_inject_process(ri, h);
+        break;
+    case TRC_HVM_REINJ_VIRQ:
+        if ( opt.dump_cooked || opt.dump_all )
+        {
+            printf(" %3lu.%09lu %s inj_virq vec %u\n",
+                   ri->t.s, ri->t.ns, pcpu_string(ri->cpu),
+                   *(unsigned*)h->d);
+        }
+        break;
+    case TRC_HVM_INJ_EXC:
+        hvm_inj_exc_process(ri, h);
+        break;
+    case TRC_HVM_INJ_VIRQ:
+        hvm_inj_virq_process(ri, h);
+        break;
+    case TRC_HVM_OP_DESTROY_PROC:
+        if(h->v->cr3.data) {
+            struct cr3_value_struct *cur = h->v->cr3.data;
+            if(cur->destroy.callback)
+                fprintf(warn, "Strange, double callback for cr3 gmfn %llx!\n",
+                    cur->gmfn);
+            cur->destroy.callback = 1;
+        } else if(opt.with_cr3_enumeration) {
+            fprintf(warn, "Warning: destroy_proc: don't know current cr3\n");
+        }
+        if ( opt.dump_cooked || opt.dump_all )
+        {
+            printf(" %3lu.%09lu %s destroy_proc cur_cr3 %llx\n",
+                   ri->t.s, ri->t.ns, pcpu_string(ri->cpu), h->v->cr3.val);
+        }
+        break;
+    }
+
+    return;
+
+needs_vmexit:    
+    /* Wait for the next vmexit */
+    if(!h->vmexit_valid)
+    {
+        if(opt.dump_all)
+            hvm_generic_dump(ri,"!");
+        return;
+    }
+
+    /* Keep generic "event handler" info */
+    h->event_handler = ri->event - TRC_HVM_HANDLER;
+
+    switch(ri->event) {
+        /* Records adding to the vmexit reason */
+    case TRC_HVM_INTR:
+        hvm_intr_process(h);
+        break;
+    case TRC_HVM_PF_XEN:
+    case TRC_HVM_PF_XEN64:
+        hvm_pf_xen_process(ri, h);
+        break;
+    case TRC_HVM_IO_READ:
+        hvm_io_read_process(h);
+        break;
+    case TRC_HVM_IO_WRITE:
+        hvm_io_write_process(h);
+        break;
+    case TRC_HVM_IO_ASSIST:
+        hvm_io_assist_process(h);
+        break;
+    case TRC_HVM_MMIO_ASSIST:
+    case TRC_HVM_MMIO_ASSIST|HVM_MMIO_ASSIST_WRITE:
+        hvm_mmio_assist_process(ri, h);
+        break;
+    case TRC_HVM_CR_WRITE:
+        hvm_cr_write_process(h);
+        break;
+    case TRC_HVM_MSR_WRITE:
+      hvm_msr_write_process(ri, h);
+      break;
+    case TRC_HVM_MSR_READ:
+        hvm_msr_read_process(ri, h);
+      break;
+    case TRC_HVM_VMMCALL:
+        hvm_vmcall_process(ri, h);
+        break;
+    case TRC_HVM_CR_READ:
+    case TRC_HVM_DR_READ:
+    case TRC_HVM_DR_WRITE:
+    case TRC_HVM_CPUID:
+    case TRC_HVM_NMI:
+    case TRC_HVM_SMI:
+    case TRC_HVM_HLT:
+    case TRC_HVM_INVLPG:
+    case TRC_HVM_MCE:
+    case TRC_HVM_CLTS:
+    case TRC_HVM_LMSW:
+        if ( h->post_process != NULL )
+            fprintf(warn, "Strange, h->postprocess already set!\n");
+        h->inflight.generic.event = ri->event;
+        bcopy(h->d, h->inflight.generic.d, sizeof(unsigned int) * 4); 
+        h->post_process = hvm_generic_postprocess;
+        if(opt.dump_all)
+        {
+            hvm_generic_dump(ri, "]");
+        }
+        break;
+    }
+}
+
+void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc);
+void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
+                      tsc_t tsc, int new_runstate);
+struct vcpu_data * vcpu_find(int did, int vid);
+void lose_vcpu(struct vcpu_data *v, tsc_t tsc);
+
+int domain_runstate(struct domain_data *d) {
+    int i;
+    int runstates[RUNSTATE_MAX];
+    int ret=-1;
+    
+    if(d->did == DEFAULT_DOMAIN)
+        return 0;
+
+    for(i=0; i<RUNSTATE_MAX; i++)
+        runstates[i]=0;
+
+    for(i=0; i<=d->max_vid; i++)
+        if(d->vcpu[i])
+            runstates[d->vcpu[i]->runstate.state]++;
+
+    if(runstates[RUNSTATE_LOST])
+        ret=DOMAIN_RUNSTATE_LOST;
+    else if(runstates[RUNSTATE_RUNNING])
+    {
+        if(runstates[RUNSTATE_RUNNABLE])
+            ret=DOMAIN_RUNSTATE_CONCURRENCY_HAZARD;
+        else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+            ret= DOMAIN_RUNSTATE_PARTIAL_RUN;
+        else
+            ret= DOMAIN_RUNSTATE_FULL_RUN;
+    }
+    else if(runstates[RUNSTATE_RUNNABLE])
+    {
+        if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+            ret= DOMAIN_RUNSTATE_PARTIAL_CONTENTION;
+        else
+            ret= DOMAIN_RUNSTATE_FULL_CONTENTION;
+    }
+    else if(runstates[RUNSTATE_BLOCKED]||runstates[RUNSTATE_OFFLINE])
+    {
+        ret= DOMAIN_RUNSTATE_BLOCKED;
+    } else {
+        fprintf(warn, "Strange, no meaningful runstates for d%d!\n",
+                d->did);
+    }
+
+#if 0
+    printf(" Max vid: %d\n", d->max_vid);
+    for(i=0; i<=d->max_vid; i++)
+        if(d->vcpu[i])
+            fprintf(warn, " v%d: %s\n",
+                    i, runstate_name[d->vcpu[i]->runstate]);
+
+    for(i=0; i<RUNSTATE_MAX; i++)
+        fprintf(warn, " %s: %d\n",
+                runstate_name[i], runstates[i]);
+#endif     
+                   
+    if(ret >= 0)
+        return ret;
+    else
+        exit(1);
+}
+
+static inline void runstate_update(struct vcpu_data *v, int new_runstate,
+                                   tsc_t tsc)
+{
+    struct domain_data *d = v->d;
+    if(v->runstate.tsc > 0 && v->runstate.tsc < tsc) {
+        update_cycles(v->runstates + v->runstate.state, tsc - v->runstate.tsc);
+
+        if(v->runstate.state == RUNSTATE_RUNNING)
+            update_cycles(&v->d->total_time, tsc - v->runstate.tsc);
+
+        /* How much did dom0 run this buffer? */
+        if(v->d->did == 0) {
+            int i;
+            for(i=0; i<MAX_CPUS; i++) {
+                struct pcpu_info * p = P.pcpu + i;
+                tsc_t start_tsc;
+                if(!p->active)
+                    continue;
+                start_tsc = (p->volume.buffer_first_tsc > v->runstate.tsc) ?
+                    p->volume.buffer_first_tsc :
+                    v->runstate.tsc;
+                p->volume.buffer_dom0_runstate_cycles[v->runstate.state]
+                    += tsc - start_tsc;
+#if 0
+                printf(" - updated p%d dom0_runstate %s to %lld cycles (+%lld)\n",
+                       p->pid, runstate_name[v->runstate.state],
+                       p->volume.buffer_dom0_runstate_cycles[v->runstate.state],
+                       tsc - start_tsc);
+#endif
+                p->volume.buffer_dom0_runstate = new_runstate;
+                p->volume.buffer_dom0_runstate_tsc = tsc;
+            }
+        }
+    }
+    v->runstate.state = new_runstate;
+    v->runstate.tsc = tsc;
+
+    /* Determine the domain runstate */
+    if(d->runstate_tsc > 0 && d->runstate_tsc < tsc) 
+        update_cycles(d->runstates + d->runstate, tsc - d->runstate_tsc);
+
+    d->runstate = domain_runstate(d);
+
+    d->runstate_tsc = tsc;
+}
+
+void hvm_vmexit_process(struct record_info *ri, struct hvm_data *h,
+                        struct vcpu_data *v) {
+    struct {
+        union {
+            struct {
+                unsigned long long rip;
+                unsigned long exit_reason;
+            } x64;
+            struct {
+                unsigned long eip;
+                unsigned long exit_reason;
+            } x32;
+        };
+    } *r;
+
+    if(ri->extra_words != 2 && ri->extra_words != 3)
+    {
+        fprintf(warn, "FATAL: vmexit has unexpected extra words %d!\n",
+                ri->extra_words);
+        dump_generic(warn, ri);
+        exit(1);
+    }
+    
+
+    r = (typeof(r))ri->d;
+
+    if(!h->init)
+        init_hvm_data(h, v);
+
+    h->vmexit_valid=1;
+
+    if(ri->event == TRC_HVM_VMEXIT64) {
+        if(v->guest_paging_levels != 4)
+        {
+            fprintf(warn, "%s: VMEXIT64, but guest_paging_levels %d.  Switching to 4.\n",
+                    __func__, v->guest_paging_levels);
+            v->guest_paging_levels = 4;
+        }
+        if(!is_valid_addr64(r->x64.rip))
+            fprintf(warn, "%s: invalid va %llx",
+                    __func__, r->x64.rip);
+        h->rip = r->x64.rip;
+        h->exit_reason = r->x64.exit_reason;
+    } else {
+        if(v->guest_paging_levels == 4)
+        {
+            int new_paging_levels = opt.default_guest_paging_levels;
+            if(new_paging_levels == 4)
+                new_paging_levels = 2; /* Wild guess */
+            fprintf(warn, "%s: VMEXIT, but guest_paging_levels %d.  Switching to %d(default).\n",
+                    __func__, v->guest_paging_levels, new_paging_levels);
+            v->guest_paging_levels = new_paging_levels;
+        }
+        h->rip = r->x32.eip;
+        h->exit_reason = r->x32.exit_reason;
+    }
+
+    if(h->exit_reason > h->exit_reason_max)
+    {
+        fprintf(warn, "h->exit_reason %lx > exit_reason_max %lx!\n",
+                (unsigned long)h->exit_reason,
+                (unsigned long)h->exit_reason_max);
+        exit(1);
+    }
+
+    if(opt.dump_all) {
+        if ( h->exit_reason < h->exit_reason_max ) 
+            printf("]%s vmexit exit_reason %s eip %llx%s\n",
+                   ri->dump_header,
+                   h->exit_reason_name[h->exit_reason],
+                   h->rip,
+                   find_symbol(h->rip));
+        else
+            printf("]%s vmexit exit_reason %x eip %llx%s\n",
+                   ri->dump_header,
+                   h->exit_reason,
+                   h->rip,
+                   find_symbol(h->rip));
+    }
+
+    if(h->v->cr3.data && h->entry_tsc) {
+        update_cycles(&h->v->cr3.data->guest_time,
+                      ri->tsc - h->entry_tsc);
+        h->v->cr3.data->run_time += (ri->tsc - h->entry_tsc);
+    }
+
+    h->exit_tsc = ri->tsc;
+    h->entry_tsc = 0;
+    h->resyncs = 0;
+    h->prealloc_unpin = 0;
+    h->short_summary_done = 0;
+
+    if(!opt.svm_mode && h->exit_reason == EXIT_REASON_EXCEPTION_NMI)
+    {
+        bzero(&h->inflight.pf_xen, sizeof(h->inflight.pf_xen));
+        h->post_process = hvm_exception_nmi_generic_postprocess;
+    } else {
+        h->post_process = NULL;
+    }
+  
+    {
+        struct time_struct t;
+
+        abs_cycles_to_time(h->exit_tsc, &t);
+
+        if(t.time)
+        {
+            snprintf(h->dump_header,
+                     DUMP_HEADER_MAX,
+                     "%3lu.%09lu %s", t.s, t.ns, pcpu_string(ri->cpu));
+        }
+        else
+        {
+            snprintf(h->dump_header,
+                     DUMP_HEADER_MAX,
+                     "               %s", pcpu_string(ri->cpu));
+        }
+    }
+}
+
+void hvm_close_vmexit(struct hvm_data *h, tsc_t tsc) {
+
+    if(h->exit_tsc) {
+        if(h->exit_tsc > tsc)
+            h->arc_cycles = 0;
+        else {
+            h->arc_cycles = tsc - h->exit_tsc;
+
+            if(opt.summary_info) {
+                update_summary(&h->summary.exit_reason[h->exit_reason],
+                               h->arc_cycles);
+                h->summary_info = 1;
+            }
+        }
+    }
+
+    if(h->post_process)
+        (h->post_process)(h);
+
+    if(h->arc_cycles) {
+        if(opt.summary_info && !h->short_summary_done) {
+            switch(h->event_handler) {
+            case HVM_EVENT_HANDLER_VMCALL:
+                hvm_update_short_summary(h, HVM_SHORT_SUMMARY_VMCALL);
+                break;
+            case HVM_EVENT_HANDLER_INTR:
+                hvm_update_short_summary(h, HVM_SHORT_SUMMARY_INTERRUPT);
+                break;
+            case HVM_EVENT_HANDLER_HLT:
+                hvm_update_short_summary(h, HVM_SHORT_SUMMARY_HLT);
+                break;
+            default:
+                hvm_update_short_summary(h, HVM_SHORT_SUMMARY_OTHER);
+                break;
+            }
+        }
+        
+
+        if(h->v->cr3.data) {
+            h->v->cr3.data->run_time += h->arc_cycles;
+
+            if(opt.summary_info) 
+                update_cycles(&h->v->cr3.data->hv_time,
+                              h->arc_cycles);
+        }
+    }
+
+    h->exit_tsc = 0;
+    h->vmexit_valid = 0;
+    h->post_process = NULL;
+
+}
+
+void hvm_vmentry_process(struct record_info *ri, struct hvm_data *h) {
+    if(!h->init)
+    {
+        if(opt.dump_all)
+            printf("!%s vmentry\n",
+                   ri->dump_header);
+        return;
+    }
+
+    /* Vista bug 
+     * This has to be done here because irqs are injected on the path out
+     * to vmexit. */
+    if(h->v->vlapic.ipi_virq_injected)
+    {
+        struct vcpu_data *v = h->v;
+        struct vlapic_struct *vla = &v->vlapic;
+        unsigned long long lat=0;
+
+        if(ri->tsc > vla->first_ipi_tsc)
+            lat = ri->tsc - vla->first_ipi_tsc;
+        else
+            fprintf(stderr, "Strange, first_ipi_tsc %lld > ri->tsc %lld!\n",
+                    vla->first_ipi_tsc, ri->tsc);
+
+        if((opt.dump_all || opt.dump_cooked) && vla->outstanding_ipis > 1)
+        {
+            struct time_struct t;
+            cycles_to_time(lat, &t);
+            printf(" d%dv%d received %d ipis, latency %lld (%lu.%09lu s)\n",
+                   v->d->did, v->vid, vla->outstanding_ipis, lat,
+                   t.s, t.ns);
+        }
+
+        if(opt.summary_info)
+        {
+            update_summary(&h->summary.ipi_latency, lat);
+            h->summary.ipi_count[vla->outstanding_ipis]++;
+            
+        }
+
+        if(opt.dump_cooked || opt.dump_ipi_latency) {
+            struct time_struct t;
+            abs_cycles_to_time(ri->tsc, &t);
+            printf("Ipis: %d latency: %lld (time %lu.%09lu s)\n",
+                   vla->outstanding_ipis, lat, t.s, t.ns);
+        }
+
+        clear_vlapic(vla);
+    }
+
+    if(h->w2h.waking && opt.dump_all) 
+        printf(" [w2h] d%dv%d Finishing waking\n",
+               h->v->d->did, h->v->vid);
+
+    h->w2h.waking = 0;
+
+    if ( h->w2h.interrupts_wanting_tsc ) {
+        int i;
+        for(i=0; i<GUEST_INTERRUPT_MAX; i++)
+        {
+            if ( h->summary.guest_interrupt[i].start_tsc == 1 )
+            {
+                if(opt.dump_all)
+                    printf(" [w2h] d%dv%d Setting vec %d tsc to %lld\n",
+                           h->v->d->did, h->v->vid, i, ri->tsc);
+                h->summary.guest_interrupt[i].start_tsc = ri->tsc;
+                h->w2h.interrupts_wanting_tsc--;
+                if ( h->w2h.interrupts_wanting_tsc == 0 )
+                    break;
+            }
+        }
+    }
+
+    if(!h->vmexit_valid)
+    {
+        if(opt.dump_all)
+            printf("!%s vmentry\n",
+                   ri->dump_header);
+        return;
+    }
+
+    if(opt.dump_all) {
+        printf("]%s vmentry\n",
+               ri->dump_header);
+    }
+
+    hvm_close_vmexit(h, ri->tsc);
+    h->entry_tsc = ri->tsc;
+}
+
+void hvm_process(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+    struct vcpu_data *v = p->current;
+    struct hvm_data *h = &v->hvm;
+
+    if(v->d->did == IDLE_DOMAIN) {
+        fprintf(warn, "%s: Strange, hvm record for idle domain!\n",
+                __func__);
+        return;
+    }
+
+    if(!v->data_type)
+        v->data_type = VCPU_DATA_HVM;
+    else if(v->data_type != VCPU_DATA_HVM) {
+        fprintf(warn, "Non-hvm data type %d for d%dv%d on proc %d, skipping\n",
+                v->data_type, v->d->did, v->vid, p->pid);
+        dump_generic(warn, ri);
+        return;
+    }
+        
+    
+    if(ri->evt.sub == 2)
+    {
+        UPDATE_VOLUME(p, hvm[HVM_VOL_HANDLER], ri->size);
+        hvm_handler_process(ri, h);
+    }
+    else
+    {
+        switch(ri->event) {
+            /* HVM */
+        case TRC_HVM_VMEXIT:
+        case TRC_HVM_VMEXIT64:
+            UPDATE_VOLUME(p, hvm[HVM_VOL_VMEXIT], ri->size);
+            hvm_vmexit_process(ri, h, v);
+            break;
+        case TRC_HVM_VMENTRY:
+            UPDATE_VOLUME(p, hvm[HVM_VOL_VMENTRY], ri->size);
+            hvm_vmentry_process(ri, &p->current->hvm);
+            break;
+        default:
+            fprintf(warn, "Unknown hvm event: %x", ri->event);
+        }
+    }
+}
+
+void hvm_summary(struct hvm_data *h) {
+   int i;
+
+   if(!h->summary_info)
+       return;
+
+   printf("Exit reasons:\n");
+   for(i=0; i<h->exit_reason_max; i++) {
+       PRINT_SUMMARY(h->summary.exit_reason[i],
+                     " %-20s ", h->exit_reason_name[i]);
+       if(h->exit_reason_summary_handler[i])
+           h->exit_reason_summary_handler[i](h);
+   }
+
+   printf("Guest interrupt counts:\n");
+   for(i=0; i<GUEST_INTERRUPT_MAX; i++)
+       if(h->summary.guest_interrupt[i].count) {
+           int j;
+           printf("  [%3d] %d\n",
+                  i, h->summary.guest_interrupt[i].count);
+           for(j=1; j<GUEST_INTERRUPT_CASE_MAX; j++) {
+               char desc[80];
+               snprintf(desc, 80, "   * %s", guest_interrupt_case_name[j]);
+               print_cycle_summary(h->summary.guest_interrupt[i].runtime+j, desc);
+           }
+       }
+   if(h->summary.guest_interrupt[i].count)
+       printf("  [%d+] %d\n",
+              i, h->summary.guest_interrupt[i].count);
+
+   if(opt.histogram_interrupt_eip)
+   {
+       unsigned max = ((1ULL<<ADDR_SPACE_BITS)/opt.histogram_interrupt_increment);
+       printf("Interrupt eip histogram:\n");
+       for(i=0; i<max; i++)
+           if(h->summary.extint_histogram[i])
+           {
+               printf("[%llx-%llx]: %d\n",
+                      opt.histogram_interrupt_increment * i,
+                      (opt.histogram_interrupt_increment * (i+1)) - 1,
+                      h->summary.extint_histogram[i]);
+           }
+   }
+
+   PRINT_SUMMARY(h->summary.ipi_latency,
+                 "IPI latency \n");
+   for(i=0; i<256; i++)
+       if(h->summary.ipi_count[i])
+           printf("    [%3d] %10d\n",
+                  i, h->summary.ipi_count[i]);
+   hvm_io_address_summary(h->summary.pio, "IO address summary:");
+}
+
+/* ---- Shadow records ---- */
+union shadow_event 
+{
+    unsigned event;
+    struct {
+        unsigned minor:8,
+            paging_levels:4;
+    };
+};
+
+/* WARNING - not thread safe */
+#define FLAGSTRING(_name, _char) \
+    if(e->flag_ ## _name)        \
+        flagstring[i] = _char; \
+    i++;
+
+char * flag_string(struct pf_xen_extra *e)
+{
+    static char flagstring[32];
+    int i=0;
+
+    for(i=0; i<32; i++)
+        flagstring[i]='-';
+
+    i=0;
+    
+    if(e->flag_set_ad)
+        flagstring[i]='d';
+    else if(e->flag_set_a)
+        flagstring[i]='a';
+    i++;
+
+    FLAGSTRING(shadow_l1_get_ref,  'g');
+    FLAGSTRING(shadow_l1_put_ref,  'p');
+    //FLAGSTRING(l2_propagate,      '2');
+    FLAGSTRING(demote,             'D');
+    FLAGSTRING(promote,            'P');
+    FLAGSTRING(wrmap,              'w');
+    FLAGSTRING(wrmap_guess_found,  'G');
+    //FLAGSTRING(wrmap_brute_force, 'b');
+    FLAGSTRING(early_unshadow,     'e');
+    FLAGSTRING(prealloc_unhook,    'H');
+    FLAGSTRING(unsync,             'u');
+    FLAGSTRING(oos_fixup_add,      'a');
+    FLAGSTRING(oos_fixup_evict,    'x');
+
+    flagstring[i]=0;
+
+    return flagstring;
+}
+
+void shadow_emulate_postprocess(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    if ( opt.summary_info )
+    {
+        update_eip(&h->v->d->emulate_eip_list,
+                   h->rip,
+                   h->arc_cycles,
+                   0, NULL);
+        update_summary(&h->summary.pf_xen[PF_XEN_EMULATE], h->arc_cycles);
+        update_summary(&h->summary.pf_xen_emul[e->pt_level], h->arc_cycles);
+        if(h->prealloc_unpin)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNPIN], h->arc_cycles);
+        if(e->flag_prealloc_unhook)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PREALLOC_UNHOOK], h->arc_cycles);
+        if(e->flag_early_unshadow)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_EARLY_UNSHADOW], h->arc_cycles);
+        if(e->flag_set_changed)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_CHANGED], h->arc_cycles);
+        else
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_UNCHANGED], h->arc_cycles);
+        if(e->flag_set_flush)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_FLUSH], h->arc_cycles);
+        if(e->flag_set_error)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_SET_ERROR], h->arc_cycles);
+        if(e->flag_promote)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_PROMOTE], h->arc_cycles);
+        if(e->flag_demote)
+            update_summary(&h->summary.pf_xen_emul[PF_XEN_EMUL_DEMOTE], h->arc_cycles);
+        /* more summary info */
+
+        hvm_update_short_summary(h, HVM_SHORT_SUMMARY_EMULATE);
+    }
+
+    if ( opt.dump_cooked )
+    {
+        printf(" %s emulate va %llx eip %llx%s lvl %d/%d corr %llx wval %llx flags %s\n",
+               h->dump_header, e->va,
+               h->rip, find_symbol(h->rip),
+               e->pt_level, h->v->guest_paging_levels,
+               e->corresponding_va,
+               e->wval,
+               flag_string(e));
+    }
+
+
+    if(opt.scatterplot_unpin_promote) {
+        if(e->flag_early_unshadow)
+            scatterplot_vs_time(h->exit_tsc, -10);
+        if(h->prealloc_unpin)
+            scatterplot_vs_time(h->exit_tsc, 0);
+        if(e->flag_promote) {
+            if(opt.with_cr3_enumeration) {
+                if(h->v->cr3.data)
+                    scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id);
+            } else
+                scatterplot_vs_time(h->exit_tsc, 2);
+        }
+    }
+
+
+}
+
+void shadow_emulate_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    union {
+        /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+           so put it first for alignment sake. */
+        struct {
+            unsigned gl1e, write_val;
+            unsigned va;
+            unsigned flags:29, emulation_count:3;
+        } gpl2;
+        struct {
+            unsigned long long gl1e, write_val;
+            unsigned va;
+            unsigned flags:29, emulation_count:3;
+        } gpl3;
+        struct {
+            unsigned long long gl1e, write_val;
+            unsigned long long va;
+            unsigned flags:29, emulation_count:3;
+        } gpl4;
+    } *r = (typeof(r))ri->d;
+    int gpl = h->v->guest_paging_levels;
+
+    /* Fill in extended information */
+    switch(gpl)
+    {
+    case 2:
+        if(sizeof(r->gpl2) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl2.va;
+        e->flags = r->gpl2.flags;
+        e->gl1e = r->gpl2.gl1e;
+        e->wval = r->gpl2.write_val;
+        break;
+    case 3:
+        if(sizeof(r->gpl3) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl3), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl3.va;
+        e->flags = r->gpl3.flags;
+        e->gl1e = r->gpl3.gl1e;
+        e->wval = r->gpl3.write_val;
+        break;
+    case 4:
+        if(sizeof(r->gpl4) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl4.va;
+        e->flags = r->gpl4.flags;
+        e->gl1e = r->gpl4.gl1e;
+        e->wval = r->gpl4.write_val;
+        break;
+    }
+
+    pf_preprocess(e,gpl);
+
+    if(opt.dump_all)
+        printf("]%s emulate va %llx gl1e %8llx wval %8llx flags %s(%x) pt_level %d corr %8llx\n",
+               ri->dump_header,
+               e->va,
+               e->gl1e, e->wval,
+               flag_string(e), e->flags,
+               e->pt_level, e->corresponding_va);
+
+    h->post_process = shadow_emulate_postprocess;
+
+}
+
+struct shadow_emulate_other {
+    unsigned long long gfn, va;
+};
+
+#define SHADOW_OTHER_LOGS_GFN_NOT_GMFN 1
+
+void shadow_parse_other(struct record_info *ri,
+                        struct shadow_emulate_other *o,
+                        int gpl) {
+    union {
+        /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+           so put it first for alignment sake. */
+#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN
+        /* D'OH!  Accidentally used mfn_t in the struct, so gmfns are always
+           64-bit... :-/ */
+        struct {
+            unsigned long gfn, va;
+        } gpl2;
+#endif
+        struct {
+            unsigned long long gfn;
+            unsigned long va;
+        } gpl3;
+        struct {
+            unsigned long long gfn, va;
+        } gpl4;
+    } *r = (typeof(r))ri->d;
+
+
+    switch(gpl)
+    {
+#if SHADOW_OTHER_LOGS_GFN_NOT_GMFN
+    case 2:
+        if(sizeof(r->gpl2) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl2), gpl,
+                    ri->extra_words * 4);
+            break;
+        }
+        o->va = r->gpl2.va;
+        o->gfn = r->gpl2.gfn;
+        break;
+#else
+    case 2:
+        /* FALLTHRU */
+#endif
+    case 3:
+        if(sizeof(r->gpl3) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl3), gpl,
+                    ri->extra_words * 4);
+            break;
+        }
+        o->va = r->gpl3.va;
+        o->gfn = r->gpl3.gfn;
+        break;
+    case 4:
+        if(sizeof(r->gpl4) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl4), gpl,
+                    ri->extra_words * 4);
+            break;
+        }
+        o->va = r->gpl4.va;
+        o->gfn = r->gpl4.gfn;
+        break;
+    }
+}
+
+#if 0
+void shadow_unsync_postprocess(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    
+    if(h->resyncs > 1)
+        fprintf(warn, "Strange, %d resyncs for an unsync!\n",
+                h->resyncs);
+
+    if(opt.summary_info) {
+        update_summary(&h->summary.pf_xen[PF_XEN_EMULATE_UNSYNC],
+                       h->arc_cycles);
+        if(h->resyncs <= 1)
+            update_summary(&h->summary.pf_xen_unsync[h->resyncs],
+                           h->arc_cycles);
+    }
+
+    if(opt.dump_cooked)
+    {
+        printf(" %s unsync gfn %llx %s (%d resyncs)\n",
+               h->dump_header, e->gfn,
+               h->resyncs?"(resync)":"",
+               h->resyncs);
+    }
+}
+
+
+void shadow_unsync_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    struct shadow_emulate_other r;
+
+    shadow_parse_other(ri, &r, h->v->guest_paging_levels);
+
+    e->gmfn = r.gmfn;
+    e->va = r.va;
+
+    pf_preprocess(e, h->v->guest_paging_levels);
+
+    if(opt.dump_all)
+        printf("]%s shadow unsync gmfn %llx va %llx pt_level %d corr %llx\n",
+               h->dump_header,
+               e->gmfn,
+               e->va,
+               e->pt_level,
+               e->corresponding_va);
+
+    h->post_process = shadow_unsync_postprocess;
+}
+#endif
+
+void shadow_fault_generic_postprocess(struct hvm_data *h);
+
+void shadow_emulate_other_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    struct shadow_emulate_other r;
+    union shadow_event sevt = { .event = ri->event };
+
+    shadow_parse_other(ri, &r, h->v->guest_paging_levels);
+
+    e->gfn = r.gfn;
+    e->va = r.va;
+
+    pf_preprocess(e, h->v->guest_paging_levels);
+
+    if(opt.dump_all)
+        printf("]%s shadow %s gfn %llx va %llx\n",
+               h->dump_header,
+               pf_xen_name[sevt.minor],
+               e->gfn,
+               e->va);
+
+    h->post_process = shadow_fault_generic_postprocess;
+}
+
+void shadow_fixup_postprocess(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    if ( opt.summary_info )
+    {
+        update_summary(&h->summary.pf_xen[PF_XEN_FIXUP], h->arc_cycles);
+        if(h->prealloc_unpin) {
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PREALLOC_UNPIN], h->arc_cycles);
+        }
+        if(e->flag_unsync) {
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UNSYNC], h->arc_cycles);
+            if(h->resyncs < PF_XEN_FIXUP_UNSYNC_RESYNC_MAX)
+                update_summary(&h->summary.pf_xen_fixup_unsync_resync[h->resyncs],
+                               h->arc_cycles);
+            else
+                update_summary(&h->summary.pf_xen_fixup_unsync_resync[PF_XEN_FIXUP_UNSYNC_RESYNC_MAX],
+                               h->arc_cycles);
+        }
+        if(e->flag_oos_fixup_add)
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_ADD], h->arc_cycles);
+        if(e->flag_oos_fixup_evict)
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_OOS_EVICT], h->arc_cycles);
+        if(e->flag_promote) {
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_PROMOTE], h->arc_cycles);
+            if(e->flag_wrmap) {
+                update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_WRMAP], h->arc_cycles);
+                if(e->flag_wrmap_brute_force)
+                    update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_BRUTE_FORCE], h->arc_cycles);
+            } else if(e->flag_wrmap_brute_force) {
+                fprintf(warn, "Strange: wrmap_bf but not wrmap!\n");
+                
+            }
+
+        } else if(e->flag_wrmap)
+            fprintf(warn, "Strange, wrmap but not promote!\n");
+
+        if(!(e->flag_promote || h->prealloc_unpin || e->flag_unsync)) 
+            update_summary(&h->summary.pf_xen_fixup[PF_XEN_FIXUP_UPDATE_ONLY], h->arc_cycles);
+        /* more summary info */
+
+        if(e->flag_unsync)
+            hvm_update_short_summary(h, HVM_SHORT_SUMMARY_UNSYNC);
+        else
+            hvm_update_short_summary(h, HVM_SHORT_SUMMARY_FIXUP);
+    }
+
+    if ( opt.dump_cooked )
+    {
+        printf(" %s fixup%s va %llx eip %llx%s gl1e %llx flags %s\n",
+               h->dump_header,
+               e->flag_unsync?":unsync":"",
+               e->va,
+               h->rip, find_symbol(h->rip),
+               e->gl1e,
+               flag_string(e));
+    }
+
+    if(opt.scatterplot_unpin_promote) {
+        if(h->prealloc_unpin)
+            scatterplot_vs_time(h->exit_tsc, 0);
+        if(e->flag_promote) {
+            if(opt.with_cr3_enumeration) {
+                if(h->v->cr3.data)
+                    scatterplot_vs_time(h->exit_tsc, h->v->cr3.data->cr3_id);
+            } else
+                scatterplot_vs_time(h->exit_tsc, 2);
+        }
+    }
+}
+
+void shadow_fixup_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    union {
+        /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+           so put it first for alignment sake. */
+        struct {
+            unsigned long gl1e, va, flags;
+        } gpl2;
+        struct {
+            unsigned long long gl1e;
+            unsigned long va, flags;
+        } gpl3;
+        struct {
+            unsigned long long gl1e, va;
+            unsigned long flags;
+        } gpl4;
+    } *r = (typeof(r))ri->d;
+    int gpl = h->v->guest_paging_levels;
+
+    switch(gpl)
+    {
+    case 2:
+        if(sizeof(r->gpl2) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl2.va;
+        e->flags = r->gpl2.flags;
+        e->gl1e = r->gpl2.gl1e;
+        break;
+    case 3:
+        if(sizeof(r->gpl3) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl3), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl3.va;
+        e->flags = r->gpl3.flags;
+        e->gl1e = r->gpl3.gl1e;
+        break;
+    case 4:
+        if(sizeof(r->gpl4) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl4.va;
+        e->flags = r->gpl4.flags;
+        e->gl1e = r->gpl4.gl1e;
+        break;
+    }
+
+    if(opt.dump_all)
+        printf("]%s fixup%s va %llx gl1e %llx flags (%x)%s\n",
+               h->dump_header,
+               e->flag_unsync?":unsync":"",
+               e->va, e->gl1e, e->flags,
+               flag_string(e));
+
+    h->post_process = shadow_fixup_postprocess;
+}
+
+void shadow_mmio_postprocess(struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+
+    if ( opt.summary_info )
+    {
+        update_summary(&h->summary.pf_xen[e->pf_case], h->arc_cycles);
+
+        hvm_update_short_summary(h, HVM_SHORT_SUMMARY_MMIO);
+    }
+
+    if ( opt.dump_cooked )
+    {
+        if(e->mmio_data_valid)
+            printf(" %s %smmio %s va %llx eip %llx%s gpa %llx data %lx\n",
+                   h->dump_header,
+                   (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+                   e->mmio_is_write?"write":"read",
+                   e->va,
+                   h->rip, find_symbol(h->rip),
+                   e->gpa,
+                   e->data);
+        else
+            printf(" %s %smmio %s va %llx eip %llx%s gpa %llx (no data)\n",
+                   h->dump_header,
+                   (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+                   e->mmio_is_write?"write":"read",
+                   e->va,
+                   h->rip, find_symbol(h->rip),
+                   e->gpa);
+    }
+
+}
+
+void shadow_mmio_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct pf_xen_extra *e = &h->inflight.pf_xen;
+    union {
+        /* for PAE, guest_l1e may be 64 while guest_va may be 32;
+           so put it first for alignment sake. */
+        struct {
+            unsigned long va;
+        } gpl2;
+        struct {
+            unsigned long long va;
+        } gpl4;
+    } *r = (typeof(r))ri->d;
+    int gpl = h->v->guest_paging_levels;
+
+    switch(gpl)
+    {
+    case 2:
+    case 3:
+        if(sizeof(r->gpl2) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl2), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl2.va;
+        break;
+    case 4:
+        if(sizeof(r->gpl4) != ri->extra_words * 4)
+        {
+            fprintf(warn, "%s: expected %d bytes for %d-level guest, got %d!\n",
+                    __func__, sizeof(r->gpl4), h->v->guest_paging_levels,
+                    ri->extra_words * 4);
+            break;
+        }
+        e->va = r->gpl4.va;
+        break;
+    }
+
+    if(opt.dump_all)
+        printf("]%s %smmio va %llx\n",
+                h->dump_header,
+                (e->pf_case==PF_XEN_FAST_MMIO)?"fast ":"",
+                e->va);
+
+    h->post_process = shadow_mmio_postprocess;
+}
+
+void shadow_fault_generic_dump(unsigned long event, uint32_t *d, char *prefix,
+                         char * dump_header)
+{
+    char *evt_string, evt_number[10];
+    union shadow_event sevt = { .event = event };
+    int i;
+
+    if(sevt.minor < PF_XEN_MAX)
+    {
+        evt_string = pf_xen_name[sevt.minor];
+    }
+    else
+    {
+        snprintf(evt_number, 10, "%d", sevt.minor);
+        evt_string = evt_number;
+    }
+
+    printf("%s%s shadow %s gl %d [",
+           prefix,
+           dump_header,
+           evt_string,
+           sevt.paging_levels);
+        
+    for(i=0; i<4; i++) 
+    {
+        printf(" %x", d[i]);
+    }
+
+    printf(" ]\n");
+}
+
+void shadow_fault_generic_postprocess(struct hvm_data *h)
+{
+    union shadow_event sevt = { .event = h->inflight.generic.event };
+    if(opt.summary_info) {
+        update_summary(&h->summary.pf_xen[sevt.minor],
+                       h->arc_cycles);
+
+        hvm_update_short_summary(h, HVM_SHORT_SUMMARY_PROPAGATE);
+    }
+
+    if(opt.dump_cooked)
+    {
+        shadow_fault_generic_dump(h->inflight.generic.event,
+                            h->inflight.generic.d,
+                            " ", h->dump_header);
+    }
+}
+
+void shadow_fault_generic_process(struct record_info *ri, struct hvm_data *h)
+{
+    union shadow_event sevt = { .event = ri->event };
+
+    /* pf-case traces, vs others */
+    h->inflight.generic.event = ri->event;
+    bcopy(ri->d, h->inflight.generic.d, sizeof(unsigned int) * 4); 
+
+    if(opt.dump_all)
+        shadow_fault_generic_dump(h->inflight.generic.event,
+                                  h->inflight.generic.d,
+                                  "]", ri->dump_header);
+
+    h->inflight.pf_xen.pf_case = sevt.minor;
+    h->post_process = shadow_fault_generic_postprocess;
+}
+
+void shadow_resync_process(struct record_info *ri, struct hvm_data *h)
+{
+    struct {
+        unsigned long long gfn;
+    } *r = (typeof(r))ri->d;
+
+    if(opt.dump_all || opt.dump_cooked)
+        printf(" %s oos resync %s gfn %llx\n",
+               ri->dump_header,
+               (ri->event == TRC_SHADOW_RESYNC_FULL)?"full":"only",
+               r->gfn);
+
+    h->resyncs++;
+}
+
+void shadow_prealloc_unpin_process(struct record_info *ri, struct hvm_data *h) {
+    struct {
+        unsigned long long gfn;
+    } *r = (typeof(r))ri->d;
+
+    if(opt.dump_all || opt.dump_cooked)
+        printf(" %s prealloc-unpin gfn %llx\n",
+               ri->dump_header, r->gfn);
+
+    if(h->prealloc_unpin)
+        fprintf(warn, "Strange, more than one prealloc_unpin per arc!\n");
+
+    h->prealloc_unpin = 1;
+
+    if(opt.with_cr3_enumeration)
+        cr3_prealloc_unpin(h->v, r->gfn);
+}
+
+void shadow_process(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+    struct hvm_data *h = &p->current->hvm;
+    
+    union shadow_event sevt = { .event = ri->event };
+
+    int gpl = sevt.paging_levels + 2;
+
+    if(!h->init || !h->vmexit_valid)
+    {
+        if(opt.dump_all)
+            shadow_fault_generic_dump(ri->event,
+                                      ri->d,
+                                      "!", ri->dump_header);
+        return;
+    }
+
+    if(sevt.minor <= PF_XEN_NOT_SHADOW) {
+        if(p->current->guest_paging_levels != gpl)
+        {
+            fprintf(warn, "%s: Changing guest paging levels to %d\n",
+                    __func__, gpl);
+            p->current->guest_paging_levels = gpl;
+        }
+    }
+
+    if(sevt.minor <= PF_XEN_LAST_FAULT)  {
+        h->inflight.pf_xen.pf_case = sevt.minor;
+        if(opt.summary) {
+            hvm_set_summary_handler(h, hvm_pf_xen_summary);
+        }
+    }
+
+    /* FIXME - mask out paging levels */
+    switch(sevt.minor)
+    {
+    case PF_XEN_EMULATE:
+        shadow_emulate_process(ri, h);
+        break;
+    case PF_XEN_FIXUP:
+        shadow_fixup_process(ri, h);
+        break;
+    case PF_XEN_MMIO:
+    case PF_XEN_FAST_MMIO:
+        shadow_mmio_process(ri, h);
+        break;
+    case PF_XEN_EMULATE_UNSHADOW_USER:
+    case PF_XEN_EMULATE_UNSHADOW_EVTINJ:
+    case PF_XEN_EMULATE_UNSHADOW_UNHANDLED:
+        shadow_emulate_other_process(ri, h);
+        break;
+#if 0
+    case PF_XEN_EMULATE_UNSYNC:
+        shadow_unsync_process(ri, h);
+        break;
+#endif
+    case SHADOW_RESYNC_FULL:
+    case SHADOW_RESYNC_ONLY:
+        shadow_resync_process(ri, h);
+        break;
+    case SHADOW_PREALLOC_UNPIN:
+        shadow_prealloc_unpin_process(ri, h);
+        break;
+    default:
+        if(sevt.minor <= PF_XEN_LAST_FAULT) {
+            shadow_fault_generic_process(ri, h);
+        } else {
+            fprintf(warn, "Warning: processing shadow as generic\n");
+            process_generic(ri);
+        }
+        break;
+    }
+}
+
+/* ---- PV guests ---- */
+union pv_event {
+    unsigned event;
+    struct {
+        unsigned minor:8,
+            x64:1,
+            unused1:3,
+            sub:4,
+            main:12,
+            unused:4;
+    };
+};
+
+void pv_hypercall_process(struct record_info *ri, struct pv_data *pv) {
+    union {
+        struct {
+            uint32_t eip, eax;
+        } x32;
+        struct {
+            uint64_t eip;
+            uint32_t eax;
+        } x64;
+    } * r = (typeof(r)) ri->d;
+    union pv_event pevt = { .event = ri->event };
+    unsigned long long eip;
+    unsigned long eax;
+
+    if(pevt.x64) {
+        eip = r->x64.eip;
+        eax = r->x64.eax;
+    } else {
+        eip = r->x32.eip;
+        eax = r->x32.eax;
+    }
+
+    if(opt.summary_info) {
+        if(eax < PV_HYPERCALL_MAX) 
+            pv->hypercall_count[eax]++;
+    }
+
+    if(opt.dump_cooked || opt.dump_all) {
+        if(eax < HYPERCALL_MAX)
+            printf(" %s hypercall %2lx (%s) eip %llx\n",
+                   ri->dump_header, eax,
+                   hypercall_name[eax], eip);
+        else
+            printf(" %s hypercall %lx eip %llx\n",
+                   ri->dump_header, eax, eip);
+    }
+}
+
+void pv_trap_process(struct record_info *ri, struct pv_data *pv) {
+    union {
+        struct {
+            unsigned long eip;
+            unsigned trapnr:15,
+                use_error_code:1,
+                error_code:16;
+        } x32;
+        struct {
+            unsigned long long eip;
+            unsigned trapnr:15,
+                use_error_code:1,
+                error_code:16;
+        } x64;
+    } * r = (typeof(r)) ri->d;
+    union pv_event pevt = { .event = ri->event };
+    unsigned long long eip;
+    unsigned trapnr, use_ec, ec;
+
+    if(pevt.x64) {
+        eip = r->x64.eip;
+        trapnr = r->x64.trapnr;
+        use_ec = r->x64.use_error_code;
+        ec = r->x64.error_code;
+    } else {
+        eip = r->x32.eip;
+        trapnr = r->x32.trapnr;
+        use_ec = r->x32.use_error_code;
+        ec = r->x32.error_code;
+    }
+
+    if(opt.summary_info) {
+        if(trapnr < PV_TRAP_MAX) 
+            pv->trap_count[trapnr]++;
+    }
+
+    if(opt.dump_cooked || opt.dump_all) {
+        printf(" %s trap %x eip %llx",
+               ri->dump_header, trapnr, eip);
+        if(use_ec)
+            printf(" ec %x\n", ec);
+        else
+            printf("\n");
+    }
+}
+
+void pv_generic_process(struct record_info *ri, struct pv_data *pv) {
+    union pv_event pevt = { .event = ri->event };
+    if ( opt.dump_cooked || opt.dump_all ) {
+        if(pevt.minor < PV_MAX && pv_name[pevt.minor])
+            printf(" %s %s",
+                   ri->dump_header,
+                   pv_name[pevt.minor]);
+        else
+            printf(" %s PV-%d ",
+                   ri->dump_header, pevt.minor);
+        if(ri->extra_words) {
+            int i;
+            printf("[ ");
+            for(i=0; i<ri->extra_words; i++) {
+                printf("%x ", (unsigned)ri->d[i]);
+            }
+            printf("]\n");
+                
+        }
+    }
+}
+
+void pv_summary(struct pv_data *pv) {
+    int i, j;
+
+    if(!pv->summary_info)
+        return;
+
+    printf("PV events:\n");
+    for(i=0; i<PV_MAX; i++) {
+        if(pv->count[i])
+            printf("  %s  %d\n", pv_name[i], pv->count[i]);
+        switch(i) {
+        case PV_HYPERCALL:
+            for(j=0; j<PV_HYPERCALL_MAX; j++) {
+                if(pv->hypercall_count[j])
+                    printf("    %-29s[%2d]: %6d\n",
+                           hypercall_name[j],
+                           j,
+                           pv->hypercall_count[j]);
+            }
+            break;
+        case PV_TRAP:
+            for(j=0; j<PV_TRAP_MAX; j++) {
+                if(pv->trap_count[j])
+                    printf("    [%d] %d\n",
+                           j, pv->trap_count[j]);
+            }
+            break;
+        }
+    }
+}
+
+void pv_process(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+    struct vcpu_data *v = p->current;
+    struct pv_data *pv = &v->pv;
+    
+    union pv_event pevt = { .event = ri->event };
+
+    if(v->d->did == IDLE_DOMAIN) {
+        fprintf(warn, "%s: Strange, pv record for idle domain!\n",
+                __func__);
+        return;
+    }
+
+    if(!v->data_type)
+        v->data_type = VCPU_DATA_PV;
+    else if(v->data_type != VCPU_DATA_PV) {
+        fprintf(warn, "ERROR: non-pv data type %d for d%dv%d\n",
+                v->data_type, v->d->did, v->vid);
+        exit(1);
+    }
+        
+    if(opt.summary_info) {
+        pv->summary_info=1;
+
+        if(pevt.minor == PV_PTWR_EMULATION_PAE)
+            pv->count[PV_PTWR_EMULATION]++;
+        else
+            pv->count[pevt.minor]++;
+    }
+
+    switch(pevt.minor)
+    {
+    case PV_HYPERCALL:
+        pv_hypercall_process(ri, pv);
+        break;
+    case PV_TRAP:
+        pv_trap_process(ri, pv);
+        break;
+    case PV_PAGE_FAULT:
+        //pv_pf_process(ri, pv);
+        //break;
+    case PV_PTWR_EMULATION:
+    case PV_PTWR_EMULATION_PAE:
+        //pv_ptwr_emulation_process(ri, pv);
+        //break;
+    default:
+        pv_generic_process(ri, pv);
+        break;
+    }
+}
+
+/* ---- Schedule ---- */
+struct vcpu_data * vcpu_create(struct domain_data *d, int vid)
+{
+    struct vcpu_data *v;
+
+    assert(d->vcpu[vid] == NULL);
+
+    fprintf(warn, "Creating vcpu %d for dom %d\n", vid, d->did);
+
+    if((v=malloc(sizeof(*v)))==NULL)
+    {
+        fprintf(stderr, "%s: malloc %d failed!\n", __func__, sizeof(*d));
+        exit(1);
+    }
+
+    bzero(v, sizeof(*v));
+
+    v->vid = vid;
+    v->d = d;
+    v->p = NULL;
+    v->runstate.state = RUNSTATE_INIT;
+    v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
+
+    d->vcpu[vid] = v;
+
+    assert(v == v->d->vcpu[v->vid]);
+
+    if(vid > d->max_vid)
+        d->max_vid = vid;
+
+    return v;
+}
+
+/* Called by both domain_create and sched_default_domain_init */
+void domain_init(struct domain_data *d, int did)
+{
+    bzero(d, sizeof(*d));
+
+    d->did = did;
+    d->next = NULL;
+
+    if((opt.interval.output == INTERVAL_DOMAIN_SHORT_SUMMARY
+        || opt.interval.output == INTERVAL_DOMAIN_GUEST_INTERRUPT
+        || opt.interval.output == INTERVAL_DOMAIN_GRANT_MAPS)
+       && opt.interval.domain.did == did) {
+        P.interval.domain.d = d;
+    }
+}
+
+struct domain_data * domain_create(int did)
+{
+    struct domain_data *d;
+    
+    fprintf(warn, "Creating domain %d\n", did);
+
+    if((d=malloc(sizeof(*d)))==NULL)
+    {
+        fprintf(stderr, "%s: malloc %d failed!\n", __func__, sizeof(*d));
+        exit(1);
+    }
+
+    /* Initialize domain & vcpus */
+    domain_init(d, did);
+
+    return d;
+ }
+
+struct domain_data * domain_find(int did)
+{
+    struct domain_data *d, *n, **q;
+
+    /* Look for domain, keeping track of the last pointer so we can add
+       a domain if we need to. */
+    for ( d = domain_list, q=&domain_list ;
+          d && (d->did < did) ;
+          q = &d->next, d=d->next ) ;
+
+    if(d && d->did == did)
+        return d;
+
+    /* Make a new domain */
+    n = domain_create(did);
+    
+    /* Insert it into the list */
+    n->next = d;
+    *q = n;
+
+    return n;
+}
+
+struct vcpu_data * vcpu_find(int did, int vid)
+{
+    struct domain_data *d;
+    struct vcpu_data *v;
+
+    d = domain_find(did);
+
+    v = d->vcpu[vid];
+
+    if(!v)
+        v = vcpu_create(d, vid);
+
+    return v;
+}
+
+void vcpu_prev_update(struct pcpu_info *p, struct vcpu_data *prev,
+                      tsc_t tsc, int new_runstate)
+{
+    assert(prev == prev->d->vcpu[prev->vid]);
+
+    if(prev->p != p)
+    {
+        fprintf(warn, "Strange, sched_switch on pcpu %d, prev->pcpu %d!\n",
+                p->pid, prev->p->pid);
+        prev->runstate.tsc = 0;
+        goto set;
+    }
+
+    assert(p->current);
+
+    if(p->current != prev)
+    {
+        fprintf(warn, "Strange, sched_switch prev d%dv%d, pcpu %d current d%dv%d!\n",
+                prev->d->did, prev->vid,
+                p->pid, p->current->d->did, p->current->vid);
+        prev->runstate.tsc = 0;
+        goto set;
+    }
+        
+    if(prev->runstate.state != RUNSTATE_RUNNING)
+    {
+        fprintf(warn, "Strange, prev d%dv%d not running!\n",
+                prev->d->did, prev->vid);
+        prev->runstate.tsc = 0;
+        goto set;
+    }
+
+set:
+    p->current = NULL;
+    runstate_update(prev, new_runstate, tsc);
+}
+
+void vcpu_next_update(struct pcpu_info *p, struct vcpu_data *next, tsc_t tsc)
+{
+    assert(next == next->d->vcpu[next->vid]);
+    assert(p->current == NULL);
+
+    if(next->activated)
+    {
+        /* We may get lost records at start-of-day, so ignore
+           setting runstate of default vcpus */
+        if(next->runstate.state == RUNSTATE_RUNNING
+           && next->d->did != DEFAULT_DOMAIN)
+        {
+            fprintf(warn, "Strange, next d%dv%d already running on proc %d!\n",
+                    next->d->did, next->vid,
+                    next->p->pid);
+            next->runstate.tsc = 0;
+        }
+    }
+    else
+    {
+        next->guest_paging_levels = opt.default_guest_paging_levels;
+        next->activated = 1;
+    }
+
+    runstate_update(next, RUNSTATE_RUNNING, tsc);
+
+    /* If we're moving from one pcpu to another, record change & update tsc */
+    if(next->p != p) {
+        if(next->pcpu_tsc) 
+            update_cycles(&next->cpu_affinity, tsc - next->pcpu_tsc);
+        next->pcpu_tsc = tsc;
+    } 
+
+    next->p = p;
+    p->current = next;
+}
+
+/* If current is the default domain, we're fixing up from something
+ * like start-of-day.  Update what we can. */
+void vcpu_start(struct pcpu_info *p, struct vcpu_data *v) {
+    if( !p->current || p->current->d->did != DEFAULT_DOMAIN) {
+        fprintf(stderr, "Strange, p->current not default domain!\n");
+        exit(1);
+    }
+
+    if(!p->first_tsc) {
+        fprintf(stderr, "Strange, p%d first_tsc 0!\n", p->pid);
+        exit(1);
+    }
+
+    if(p->first_tsc <= p->current->runstate.tsc) {
+        fprintf(stderr, "Strange, first_tsc %llx < default_domain runstate tsc %llx!\n",
+                p->first_tsc,
+                p->current->runstate.tsc);
+        exit(1);
+    }
+
+    /* Change default domain to 'queued' */
+    runstate_update(p->current, RUNSTATE_QUEUED, p->first_tsc);
+        
+    /* FIXME: Copy over data from the default domain this interval */
+    fprintf(warn, "Using first_tsc for d%dv%d (%lld cycles)\n",
+            v->d->did, v->vid, p->last_tsc - p->first_tsc);
+
+    /* Simulate the time since the first tsc */
+    runstate_update(v, RUNSTATE_RUNNING, p->first_tsc);
+    p->current = v;
+    v->p = p;
+}
+
+void sched_runstate_change_process(struct pcpu_info *p)
+{
+    struct vcpu_data *v;
+    struct record_info *ri = &p->ri;
+    struct {
+        unsigned vcpu:16, dom:16;
+        unsigned long long p1, p2;
+    } * r = (typeof(r))ri->d;
+    union {
+        unsigned long event;
+        struct {
+            unsigned lo:4,
+                new_runstate:4,
+                old_runstate:4,
+                sub:4,
+                main:12,
+                unused:4;
+        };
+    } sevt = { .event = ri->event };
+    int perfctrs;
+    struct last_oldstate_struct last_oldstate;
+
+    perfctrs = (ri->extra_words == 5);
+
+    if(opt.dump_cooked || opt.dump_all) {
+        if( perfctrs ) {
+            printf(" %s runstate_change_process {%lld,%lld} d%uv%u %s->%s\n",
+                   ri->dump_header,
+                   r->p1, r->p2,
+                   r->dom, r->vcpu,
+                   runstate_name[sevt.old_runstate],
+                   runstate_name[sevt.new_runstate]);
+        } else {
+            printf(" %s runstate_change_process d%uv%u %s->%s\n",
+                   ri->dump_header,
+                   r->dom, r->vcpu,
+                   runstate_name[sevt.old_runstate],
+                   runstate_name[sevt.new_runstate]);
+        }
+    }
+
+    /* Sanity check: expected transitions */
+    if( (sevt.new_runstate == RUNSTATE_RUNNING
+         && sevt.old_runstate != RUNSTATE_RUNNABLE)
+        || (sevt.new_runstate == RUNSTATE_BLOCKED
+            && sevt.old_runstate == RUNSTATE_RUNNABLE ) )
+    {
+        fprintf(warn, "Strange, d%dv%d unexpected runstate transition %s->%s\n",
+                r->dom, r->vcpu,
+                runstate_name[sevt.old_runstate],
+                runstate_name[sevt.new_runstate]);
+    }
+    
+    if(r->vcpu > MAX_CPUS)
+    {
+        fprintf(warn, "%s: vcpu %u > MAX_VCPUS %d!\n",
+                __func__, r->vcpu, MAX_CPUS);
+        return;
+    }
+
+    v = vcpu_find(r->dom, r->vcpu);
+
+    /* We want last_oldstate reset every time; so copy the last one and use
+     * that locally, clobbering the one in the vcpu struct.  If it needs to
+     * be reset, it will be reset below. */
+    last_oldstate = v->runstate.last_oldstate;
+    v->runstate.last_oldstate.wrong = RUNSTATE_INIT;
+
+    /* Close vmexits when the putative reason for blocking / &c stops.
+     * This way, we don't account cpu contention to some other overhead. */
+    if(sevt.new_runstate == RUNSTATE_RUNNABLE
+       && v->data_type == VCPU_DATA_HVM
+       && v->hvm.vmexit_valid) {
+        if(opt.dump_cooked)
+            printf("%s: d%dv%d changing to state runnable, closing vmexit\n",
+                   __func__, r->dom, r->vcpu);
+        hvm_close_vmexit(&v->hvm, ri->tsc);
+    }
+     
+    /* Track waking state */
+    if ( v->data_type == VCPU_DATA_HVM && v->runstate.state != RUNSTATE_LOST ) {
+        if ( sevt.new_runstate == RUNSTATE_RUNNABLE
+             && sevt.old_runstate == RUNSTATE_BLOCKED )
+        {
+            /* Hmm... want to make sure we're not in some weird
+               vmexit state... have to look later. */
+            if(opt.dump_all)
+                printf(" [w2h] d%dv%d Setting waking\n", v->d->did, v->vid);
+            v->hvm.w2h.waking = 1;
+        }
+        else if ( sevt.new_runstate != RUNSTATE_RUNNING
+                  || sevt.old_runstate != RUNSTATE_RUNNABLE )
+        {
+            if( v->hvm.w2h.waking
+                && sevt.old_runstate == RUNSTATE_RUNNING
+                && sevt.new_runstate != RUNSTATE_OFFLINE )
+            {
+                fprintf(warn, "Strange, unexpected waking transition for d%dv%d: %s -> %s\n",
+                        v->d->did, v->vid,
+                        runstate_name[sevt.old_runstate],
+                        runstate_name[sevt.new_runstate]);
+                v->hvm.w2h.waking = 0;
+            }
+
+            /* Close wake-to-halt summary */
+            if (sevt.new_runstate == RUNSTATE_BLOCKED
+                && sevt.old_runstate == RUNSTATE_RUNNING
+                && v->hvm.w2h.interrupts ) {
+                int i;
+                for(i=0; i<GUEST_INTERRUPT_MAX; i++) {
+                    struct hvm_gi_struct *g=v->hvm.summary.guest_interrupt + i;
+                    tsc_t start_tsc = g->start_tsc;
+                    if(start_tsc) {
+                        tsc_t t = (start_tsc == 1) ? 0 : ri->tsc - start_tsc;
+                        if(opt.dump_all)
+                            printf(" [w2h] Halting vec %d is_wake %d time %lld\n",
+                                   i,
+                                   g->is_wake,
+                                   t);
+                                   
+                        if(opt.scatterplot_wake_to_halt
+                           && t
+                           && g->is_wake)
+                            scatterplot_vs_time(ri->tsc, t);
+                        
+                        if(opt.summary && t) {
+                            if(g->is_wake) {
+                                if(v->hvm.w2h.interrupts==1)
+                                    update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ALONE],
+                                                  t);
+                                update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_WAKE_TO_HALT_ANY],
+                                              t);
+                            } else {
+                                update_cycles(&g->runtime[GUEST_INTERRUPT_CASE_INTERRUPT_TO_HALT],
+                                              t);
+                            }
+                        }
+                        g->start_tsc = 0;
+                        g->is_wake = 0;
+                    }
+                }
+                v->hvm.w2h.interrupts = 0;
+                v->hvm.w2h.vector = 0;
+            }
+        }
+    }
+
+    /* Sanity checks / tsc skew detection */
+    if( v->runstate.state != sevt.old_runstate
+        && v->runstate.state != RUNSTATE_INIT )
+    {
+        if(v->runstate.state == RUNSTATE_LOST) {
+            if( sevt.new_runstate == RUNSTATE_RUNNING )
+                goto update;
+            else if(opt.dump_all)
+                fprintf(warn, "%s: d%dv%d in runstate lost, not updating to %s\n",
+                        __func__, v->d->did, v->vid,
+                        runstate_name[sevt.new_runstate]);
+            goto no_update;
+        } else if (last_oldstate.wrong == sevt.new_runstate
+                   && last_oldstate.actual == sevt.old_runstate) {
+            tsc_t lag, old_offset;
+            struct pcpu_info *p2;
+            if(ri->tsc < last_oldstate.tsc) {
+                fprintf(stderr, "FATAL: new tsc < detected runstate tsc!\n");
+                exit(1);
+            }
+            p2 = P.pcpu + last_oldstate.pid;
+            lag = ri->tsc
+                - last_oldstate.tsc;
+            old_offset = p2->tsc_skew.offset;
+            cpumask_union(&p2->tsc_skew.downstream, &p->tsc_skew.downstream);
+            cpumask_set(&p2->tsc_skew.downstream, p->pid);
+            if(cpumask_isset(&p2->tsc_skew.downstream, p2->pid)) {
+                fprintf(stderr, "FATAL: tsc skew dependency loop detected!\n");
+                exit(1);
+            }
+            p2->tsc_skew.offset += lag * 2;
+             
+            fprintf(warn, "TSC skew detected p%d->p%d, %lld cycles. Changing p%d offset from %lld to %lld\n",
+                    p->pid, p2->pid, lag,
+                    p2->pid,
+                    old_offset,
+                    p2->tsc_skew.offset);
+             
+            goto no_update;
+        } else {
+            fprintf(warn, "runstate_change old_runstate %s, d%dv%d runstate %s.  Possible tsc skew.\n",
+                    runstate_name[sevt.old_runstate],
+                    v->d->did, v->vid,
+                    runstate_name[v->runstate.state]);
+            v->runstate.last_oldstate.wrong = sevt.old_runstate;
+            v->runstate.last_oldstate.actual = v->runstate.state;
+            v->runstate.last_oldstate.tsc = ri->tsc;
+            v->runstate.last_oldstate.pid = p->pid;
+            goto update;
+        }
+        fprintf(stderr, "FATAL: Logic hole in %s\n", __func__);
+        exit(1);
+    }
+update:
+    /* Actually update the runstate.  Special things to do if we're starting
+     * or stopping actually running on a physical cpu. */
+    if ( sevt.old_runstate == RUNSTATE_RUNNING
+         || v->runstate.state == RUNSTATE_RUNNING )
+    {
+#if 0
+        /* A lot of traces include cpi that shouldn't... */
+        if(perfctrs && v->runstate.tsc) {
+            unsigned long long run_cycles, run_instr;
+            double cpi;
+            
+            //run_cycles = r->p1 - v->runstate_p1_start;
+            run_cycles = ri->tsc - v->runstate.tsc;
+            run_instr  = r->p2 - v->runstate.p2_start;            
+
+            cpi = ((double)run_cycles) / run_instr;
+
+            if(opt.dump_cooked || opt.dump_all) {
+                printf("   cpi: %2.2lf ( %lld / %lld )\n",
+                       cpi, run_cycles, run_instr);
+            }
+            
+            if(opt.scatterplot_cpi && v->d->did == 1)
+                printf("%lld,%2.2lf\n",
+                       ri->tsc, cpi);
+
+            if(opt.summary_info) 
+                update_cpi(&v->cpi, run_instr, run_cycles);
+        }
+#endif
+        /*
+         * Cases:
+         * old running, v running:
+         *   normal (prev update p, lost record check)
+         * v running, old ! running:
+         *   tsc skew (prev update v->p, lost record check)
+         * old running, v init:
+         start-of-day (fake update, prev p, lost record)
+         * old running, v !{running,init}:
+         *   # (should never happen)
+         */
+        if( sevt.old_runstate == RUNSTATE_RUNNING ) {
+            if( v->runstate.state == RUNSTATE_INIT ) {
+                /* Start-of-day; account first tsc -> now to v */
+                vcpu_start(p, v);
+            } else if( v->runstate.state != RUNSTATE_RUNNING) {
+                /* This should never happen. */
+                fprintf(stderr, "FATAL: sevt.old_runstate running, but d%dv%d runstate %s!\n",
+                        v->d->did, v->vid, runstate_name[v->runstate.state]);
+                exit(1);
+            }
+
+            vcpu_prev_update(p, v, ri->tsc, sevt.new_runstate);
+        } else {
+            vcpu_prev_update(v->p, v, ri->tsc, sevt.new_runstate);
+        }
+        
+        if(P.lost_cpus && v->d->did != IDLE_DOMAIN) {
+            if(opt.dump_all)
+                fprintf(warn, "%s: %d lost cpus, setting d%dv%d runstate to RUNSTATE_LOST\n",
+                        __func__, P.lost_cpus, v->d->did, v->vid);
+            lose_vcpu(v, ri->tsc);
+        }
+    }
+    else if ( sevt.new_runstate == RUNSTATE_RUNNING )
+    {
+        if(perfctrs) {
+            v->runstate.p1_start = r->p1;
+            v->runstate.p2_start = r->p2;
+        }
+
+        vcpu_next_update(p, v, ri->tsc);
+    }
+    else
+    {
+        runstate_update(v, sevt.new_runstate, ri->tsc);
+    }
+
+no_update:
+    return;
+}
+
+void sched_switch_process(struct pcpu_info *p)
+{
+    struct vcpu_data *prev, *next;
+    struct record_info *ri = &p->ri;
+    struct {
+        unsigned long prev_dom, prev_vcpu, next_dom, next_vcpu;
+    } * r = (typeof(r))ri->d;
+
+    if(opt.dump_cooked || opt.dump_all)
+        printf("%s sched_switch prev d%luv%lu next d%luv%lu\n",
+               ri->dump_header,
+               r->prev_dom, r->prev_vcpu,
+               r->next_dom, r->next_vcpu);
+
+    if(r->prev_vcpu > MAX_CPUS)
+    {
+        fprintf(warn, "%s: prev_vcpu %lu > MAX_VCPUS %d!\n",
+                __func__, r->prev_vcpu, MAX_CPUS);
+        return;
+    }
+
+    if(r->next_vcpu > MAX_CPUS)
+    {
+        fprintf(warn, "%s: next_vcpu %lu > MAX_VCPUS %d!\n",
+                __func__, r->next_vcpu, MAX_CPUS);
+        return;
+    }
+
+    prev = vcpu_find(r->prev_dom, r->prev_vcpu);
+    next = vcpu_find(r->next_dom, r->next_vcpu);
+
+    vcpu_prev_update(p, prev, ri->tsc, RUNSTATE_QUEUED); /* FIXME */
+
+    vcpu_next_update(p, next, ri->tsc);
+}
+
+void sched_default_vcpu_activate(struct pcpu_info *p)
+{
+    struct vcpu_data *v = default_domain.vcpu[p->pid];
+
+    if(!v)
+        v = vcpu_create(&default_domain, p->pid);
+
+    assert(v == v->d->vcpu[v->vid]);
+
+    v->activated = 1;
+    v->guest_paging_levels = opt.default_guest_paging_levels;
+    v->p = p;
+    v->runstate.state = RUNSTATE_RUNNING;
+
+    p->current = v;
+}
+
+void sched_default_domain_init(void)
+{
+    struct domain_data *d = &default_domain;
+
+    domain_init(d, DEFAULT_DOMAIN);
+}
+
+void runstate_clear(tsc_t * runstate_cycles)
+{
+    int i;
+    for(i=0; i<RUNSTATE_MAX; i++)
+        runstate_cycles[i]=0;
+}
+
+void runstate_summary(tsc_t * runstate_cycles)
+{
+    int i;
+    for(i=0; i<RUNSTATE_MAX; i++)
+        if(runstate_cycles[i]) {
+            struct time_struct t;
+            cycles_to_time(runstate_cycles[i], &t);
+            printf("  %s: %lu.%09lu s\n",
+                   runstate_name[i], t.s, t.ns);
+        }
+}
+
+void sched_summary_vcpu(struct vcpu_data *v)
+{
+    int i;
+    char desc[30];
+
+    printf(" Runstates:\n");
+    for(i=0; i<RUNSTATE_MAX; i++) {
+        snprintf(desc,30, "  %8s", runstate_name[i]); 
+        print_cycle_summary(v->runstates+i, desc);
+    }
+    print_cpi_summary(&v->cpi);
+    print_cpu_affinity(&v->cpu_affinity, " cpu affinity");
+}
+
+void sched_summary_domain(struct domain_data *d)
+{
+    int i;
+    char desc[30];
+
+    printf(" Runstates:\n");
+    for(i=0; i<DOMAIN_RUNSTATE_MAX; i++) {
+        snprintf(desc,30, "  %8s", domain_runstate_name[i]); 
+        print_cycle_summary(d->runstates+i, desc);
+    }
+}
+
+
+void sched_process(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+
+    if(ri->evt.sub == 0xf) {
+        switch(ri->event)
+        {
+        case TRC_SCHED_SWITCH:
+            sched_switch_process(p);
+            break;
+        default:
+            process_generic(&p->ri);
+        }
+    } else {
+        if(ri->evt.sub == 1)
+            sched_runstate_change_process(p);
+        else {
+            UPDATE_VOLUME(p, sched_verbose, ri->size);
+            process_generic(&p->ri);
+        }
+    }
+}
+
+/* ---- Memory ---- */
+void mem_summary_domain(struct domain_data *d) {
+    int i;
+
+    printf(" Grant table ops:\n");
+
+    printf("  Done by:\n");
+    for(i=0; i<MEM_MAX; i++)
+        if(d->memops.done[i])
+            printf("   %-14s: %d\n",
+                   mem_name[i],
+                   d->memops.done[i]);
+
+    printf("  Done for:\n");
+    for(i=0; i<MEM_MAX; i++)
+        if(d->memops.done_for[i])
+            printf("   %-14s: %d\n",
+                   mem_name[i],
+                   d->memops.done_for[i]);
+}
+
+void mem_process(struct pcpu_info *p) {
+    struct record_info *ri = &p->ri;
+    struct {
+        int dom;
+    } *r = (typeof(r))ri->d;
+
+    int minor = ri->evt.minor;
+    
+    if(opt.dump_all || opt.dump_cooked) {
+        if(minor < MEM_MAX) 
+            printf(" %s %s d%d\n",
+                   ri->dump_header,
+                   mem_name[minor],
+                   r->dom);
+        else 
+            dump_generic(stdout, ri);
+    }
+
+    if(opt.summary_info && minor < MEM_MAX) {
+        struct domain_data *d;
+
+        if(p->current->d) {
+            p->current->d->memops.done[minor]++;
+            p->current->d->memops.done_interval[minor]++;
+        }
+        if((d=domain_find(r->dom))) {
+            d->memops.done_for[minor]++;
+            d->memops.done_for_interval[minor]++;
+        }
+    }
+}
+
+/* ---- Base ----- */
+void dump_generic(FILE * f, struct record_info *ri)
+{
+    int i;
+
+    fprintf(f, "]%s %7x(%x:%x:%x) %u [",
+           ri->dump_header,
+           ri->event,
+           ri->evt.main,
+           ri->evt.sub,
+           ri->evt.minor, 
+           ri->extra_words);
+
+    for(i=0; i<ri->extra_words; i++) {
+        fprintf(f, " %x", ri->d[i]);
+    }
+
+    fprintf(f, " ]\n");
+}
+
+void process_generic(struct record_info *ri) {
+
+    if(opt.dump_cooked || opt.dump_all) {
+        dump_generic(stdout, ri);
+    }
+}
+
+void lose_vcpu(struct vcpu_data *v, tsc_t tsc)
+{
+    if(v->data_type == VCPU_DATA_HVM)
+        v->hvm.vmexit_valid=0;
+    runstate_update(v, RUNSTATE_LOST, tsc);
+    clear_vlapic(&v->vlapic);
+
+    if(v->data_type == VCPU_DATA_HVM) {
+        int i;
+        if(opt.dump_all)
+            printf(" [w2h] Clearing w2h state for d%dv%d\n",
+                   v->d->did, v->vid);
+        v->hvm.w2h.interrupts=0;
+        v->hvm.w2h.vector=0;
+        v->hvm.w2h.waking = 0;
+        for(i=0; i<GUEST_INTERRUPT_MAX; i++)  {
+            if(opt.dump_all && v->hvm.summary.guest_interrupt[i].start_tsc) {
+                printf("  Interrupt %d clearing start_tsc %lld\n",
+                       i, v->hvm.summary.guest_interrupt[i].start_tsc);
+            }
+            v->hvm.summary.guest_interrupt[i].start_tsc = 0;
+        }
+    }
+}
+
+struct lost_record_struct {
+        int lost_records;
+        unsigned did:16,vid:16;
+        tsc_t first_tsc;
+};
+
+void process_lost_records(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+    struct lost_record_struct *r = (typeof(r))ri->d;
+    tsc_t first_tsc; /* TSC of first record that was lost */
+
+    /* Sanity checks */
+    if(ri->extra_words != 4)
+    {
+        fprintf(warn, "FATAL: Lost record has unexpected extra words %d!\n",
+                ri->extra_words);
+        exit(1);
+    }
+    else
+        first_tsc = r->first_tsc;
+
+    if(opt.dump_cooked || opt.dump_all)
+    {
+        printf(" %s lost_records count %d d%uv%u (cur d%dv%d) first_tsc %lld\n",
+               ri->dump_header, r->lost_records,
+               r->did, r->vid,
+               p->current->d->did, p->current->vid,
+               r->first_tsc);
+    }
+
+#if 0
+    if(opt.dump_trace_volume_on_lost_record)
+        volume_summary(&p->volume.last_buffer);
+#endif
+
+    clear_vlapic(&p->current->vlapic);
+    if(p->current->data_type == VCPU_DATA_HVM) {
+        p->current->hvm.vmexit_valid=0;
+        cr3_switch(0, &p->current->hvm);
+    }
+
+    /* We may lose scheduling records; so we need to:
+     * - Point all records until now to the next schedule in the
+     * "default" domain
+     * - Make sure there are no warnings / strangeness with the
+     * current vcpu (if it gets scheduled elsewhere).
+     */
+    vcpu_prev_update(p, p->current, first_tsc, RUNSTATE_LOST);
+
+#if 0
+    vcpu_next_update(p, default_domain.vcpu[p->pid], first_tsc);
+    if(p->current->data_type == VCPU_DATA_HVM) {
+        p->current->hvm.vmexit_valid=0;
+    }
+#endif
+
+    /* The lost record trace is processed early -- i.e.,
+     * After the last good record, rather than when the next
+     * record is processed.  Between the time it's processed and
+     * the time it actually went in, the vcpu may be scheduled on
+     * other processors.  So we can't switch vcpus until the first
+     * TSC'd record after the lost record. */
+    if(!p->lost_record.active) {
+        P.lost_cpus++;
+        if(P.lost_cpus > P.max_active_pcpu + 1) {
+            fprintf(warn, "ERROR: P.lost_cpus %d > P.max_active_pcpu + 1 %d!\n",
+                    P.lost_cpus, P.max_active_pcpu + 1);
+            exit(1);
+        }
+    } else
+        fprintf(warn, "Strange, lost record for pcpu %d, but lost_record still active!\n",
+                p->pid);
+
+    p->lost_record.active = 1;
+    p->lost_record.tsc = first_tsc;
+    
+    {
+        /* Any vcpu which is not actively running may be scheduled on the
+         * lost cpu.  To avoid mis-accounting, we need to reset */
+        struct domain_data *d;
+        int i;
+        for(d=domain_list ; d; d=d->next)
+        {
+            if(d->did != DEFAULT_DOMAIN) {
+                for(i=0; i<MAX_CPUS; i++)
+                    if(d->vcpu[i] &&
+                       d->vcpu[i]->runstate.state != RUNSTATE_RUNNING) {
+                        if(opt.dump_all)
+                            fprintf(warn, "%s: setting d%dv%d to RUNSTATE_LOST\n",
+                                    __func__, d->did, i);
+                        lose_vcpu(d->vcpu[i], first_tsc);
+                    }
+            }
+        }
+    }
+
+    p->lost_record.domain_valid=1;
+    p->lost_record.did=r->did;
+    p->lost_record.vid=r->vid;
+}
+
+
+void process_lost_records_end(struct pcpu_info *p)
+{
+    struct record_info *ri = &p->ri;
+    struct lost_record_struct *r = (typeof(r))ri->d;
+
+    if(!p->lost_record.active) {
+        fprintf(warn, "FATAL: lost_records_end but pid %d not lost!\n",
+                p->pid);
+        exit(1);
+    }
+
+    /* Lost records.  If this is the first record on a pcpu after the loss,
+     * Update the information. */
+    if(ri->tsc > p->lost_record.tsc)
+    {
+        if(opt.dump_cooked || opt.dump_all)
+            printf("               %s lost_records end ---\n",
+                   pcpu_string(p->pid));
+        if(p->lost_record.domain_valid) {
+            int did = p->lost_record.did,
+                vid = p->lost_record.vid;
+            if(p->current)
+            {
+                fprintf(warn, "FATAL: lost_record valid (d%dv%d), but current d%dv%d!\n",
+                        did, vid,
+                        p->current->d->did, p->current->vid);
+                exit(1);
+            }
+
+            if(opt.dump_all)
+                fprintf(warn, "Changing p%d current to d%dv%d\n",
+                        p->pid, did, vid);
+            vcpu_next_update(p,
+                             vcpu_find(did, vid),
+                             ri->tsc);
+            p->lost_record.domain_valid=0;
+        }
+        
+        
+        p->lost_record.active = 0;
+        P.lost_cpus--;
+        if(P.lost_cpus < 0) {
+            fprintf(warn, "ERROR: lost_cpus fell below 0 for pcpu %d!\n",
+                    p->pid);
+            exit(1);
+        }
+    }
+}
+
+void base_process(struct pcpu_info *p) {
+    struct record_info *ri = &p->ri;
+    switch(ri->event)
+    {
+    case TRC_LOST_RECORDS:
+        process_lost_records(p);
+        break;
+    case TRC_LOST_RECORDS_END:
+        process_lost_records_end(p);
+        break;
+    case TRC_TRACE_VIRQ:
+        if(opt.dump_all || opt.dump_cooked) {
+            printf(" %s trace_virq\n", ri->dump_header);
+        }
+        P.buffer_trace_virq_tsc = ri->tsc;
+        break;
+    default:
+        process_generic(ri);
+    }
+ }
+
+
+
+/* Non-compat only */
+struct cpu_change_data {
+    int cpu;
+    unsigned window_size;
+};
+
+void activate_early_eof(void) {
+    struct pcpu_info *p;
+    int i;
+
+    fprintf(warn, "Short cpu_change window, activating early_eof\n");
+
+    P.early_eof = 1;
+
+    for(i=0; i<=P.max_active_pcpu; i++) {
+        p = P.pcpu + i;
+        if(p->active && p->file_offset > P.last_epoch_offset) {
+            fprintf(warn, " deactivating pid %d\n",
+                    p->pid);
+            p->active = 0;
+        }
+    }
+}
+
+loff_t scan_for_new_pcpu(loff_t offset) {
+    ssize_t r;
+    struct trace_record rec;
+    struct cpu_change_data *cd;
+    
+    r=__read_record(G.fd, &rec, offset);
+
+    if(r==0)
+        return 0;
+
+    if(rec.event != TRC_TRACE_CPU_CHANGE
+       || rec.cycle_flag)
+    {
+        fprintf(stderr, "%s: Unexpected record event %x!\n",
+                __func__, rec.event);
+        exit(1);
+    }
+
+    cd = (typeof(cd))rec.u.notsc.data;
+        
+    if(cd->cpu > P.max_active_pcpu) {
+        struct pcpu_info *p = P.pcpu + cd->cpu;
+
+        fprintf(warn, "%s: Activating pcpu %d at offset %lld\n",
+                __func__, cd->cpu, offset);
+
+        p->active = 1;
+        /* Process this cpu_change record first */
+        p->ri.rec = rec;
+        p->ri.size = r;
+        __fill_in_record_info(p);
+
+        p->file_offset = offset;
+        p->next_cpu_change_offset = offset;
+
+        offset += r + cd->window_size;
+
+        sched_default_vcpu_activate(p);
+
+        P.max_active_pcpu = cd->cpu;
+
+        return offset;
+    } else {
+        return 0;
+    }
+}
+
+/* 
+ * Conceptually, when we reach a cpu_change record that's not for our pcpu,
+ * we want to scan forward through the file until we reach one that's for us.
+ * However, looping through involves reading the file, which we'd rather
+ * do in one place.  Because cpu_change records don't include a tsc,
+ * the same pcpu will be processed repeatedly until the cpu_change
+ * equals p->pid.
+ *
+ * There are two additional things we need to do in this algorithm:
+ * + Detect new pcpus as they come online
+ * + De-activate pcpus which don't have any more records
+ *
+ * Detecting new pcpus which are less than P.max_active_pcpu is straight-
+ * forward: when max_active_pcpu is searching for its next cpu window,
+ * it will pass by the new cpu's window, and can activate it then.
+ *
+ * Detecting new pcpus greater than P.max_active_pcpu is a little harder;
+ * When max_active_pcpu is scanning for its next cpu window, after it's found
+ * it, we need to scan one more window forward to see if its' an already-active
+ * pcpu; if not, activate it.
+ *
+ * We also need to deal with truncated files, where records from one pcpu may
+ * be present but not from another pcpu due to lack of disk space.  The best
+ * thing to do is to find the last "epoch" and essentially truncate the file
+ * to that.
+ */
+void process_cpu_change(struct pcpu_info *p) {
+    struct record_info *ri = &p->ri;
+    struct cpu_change_data *r = (typeof(r))ri->d;
+
+    if(opt.dump_all) {
+        printf("]%s cpu_change this-cpu %u record-cpu %u window_size %u(0x%08x)\n",
+               ri->dump_header, p->pid, r->cpu, r->window_size,
+               r->window_size);
+    }
+
+    /* File sanity check */
+    if(p->file_offset != p->next_cpu_change_offset) {
+        fprintf(warn, "Strange, pcpu %d expected offet %llx, actual %llx!\n",
+                p->pid, p->next_cpu_change_offset, p->file_offset);
+    }
+
+    if(r->cpu > MAX_CPUS)
+    {
+        fprintf(stderr, "FATAL: cpu %d > MAX_CPUS %d.\n",
+                r->cpu, MAX_CPUS);
+        exit(1);
+    }
+
+    /* Detect beginning of new "epoch" while scanning thru file */
+    if((p->last_cpu_change_pid > r->cpu)
+       && (p->file_offset > P.last_epoch_offset)) {
+        P.last_epoch_offset = p->file_offset;
+    }
+
+    /* If that pcpu has never been activated, activate it. */
+    if(!P.pcpu[r->cpu].active && P.pcpu[r->cpu].file_offset == 0)
+    {
+        struct pcpu_info * p2 = P.pcpu + r->cpu;
+
+        p2->active = 1;
+        if(r->cpu > P.max_active_pcpu)
+            P.max_active_pcpu = r->cpu;
+
+        fprintf(warn, "%s: Activating pcpu %d at offset %llx\n",
+               __func__, r->cpu, p->file_offset);
+        
+        sched_default_vcpu_activate(p2);
+
+        /* Taking this record as the first record should make everything
+         * run swimmingly. */
+        p2->ri = *ri;
+        p2->ri.cpu = r->cpu;
+        p2->ri.d = p2->ri.rec.u.notsc.data;
+        p2->file_offset = p->file_offset;
+        p2->next_cpu_change_offset = p->file_offset;
+    }
+
+    p->last_cpu_change_pid = r->cpu;
+
+    /* If this isn't the cpu we're looking for, skip the whole bunch */
+    if(p->pid != r->cpu)
+    {
+        p->file_offset += ri->size + r->window_size;
+        p->next_cpu_change_offset = p->file_offset;
+
+        if(p->file_offset > G.file_size) {
+            activate_early_eof();
+        } else if(P.early_eof && p->file_offset > P.last_epoch_offset) {
+            fprintf(warn, "%s: early_eof activated, pcpu %d past last_epoch_offset %llx, deactivating.\n",
+                    __func__, p->pid, P.last_epoch_offset);
+            p->active = 0;
+        }
+    } 
+    else
+    {
+        /* Track information about dom0 scheduling and records */
+        if(opt.dump_trace_volume_on_lost_record) {
+            tsc_t cycles;
+            struct time_struct t;
+
+            /* Update dom0 runstates */
+            cycles = (p->volume.buffer_first_tsc > p->volume.buffer_dom0_runstate_tsc) ?
+                p->volume.buffer_first_tsc :
+                p->volume.buffer_dom0_runstate_tsc;
+            p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate]
+                += ri->tsc - cycles;
+            
+            printf(" - updated p%d dom0_runstate %s to %lld cycles (+%lld)\n",
+                   p->pid, runstate_name[p->volume.buffer_dom0_runstate],
+                   p->volume.buffer_dom0_runstate_cycles[p->volume.buffer_dom0_runstate],
+                   ri->tsc - cycles);
+
+            /* print info */
+            cycles = ri->tsc - p->volume.buffer_first_tsc;
+            cycles_to_time(cycles, &t);
+            printf("Buffer time: %lu.%09lu (%lld cycles)\n",
+                   t.s, t.ns, cycles);
+            if(p->volume.buffer_size)
+                printf("Rate: %lld cycles / byte\n",
+                       cycles / p->volume.buffer_size);
+            if(P.buffer_trace_virq_tsc)
+            {
+                cycles = ri->tsc - P.buffer_trace_virq_tsc;
+                cycles_to_time(cycles, &t);
+                printf("trace_virq latency: %lu.%09lu (%lld cycles)\n",
+                       t.s, t.ns, cycles);
+                P.buffer_trace_virq_tsc = 0;
+            }
+            else
+            {
+                printf("No trace_virq record found.\n");
+            }
+            printf("Dom0 runstates this buffer:\n");
+            runstate_summary(p->volume.buffer_dom0_runstate_cycles);
+            volume_summary(&p->volume.last_buffer);
+
+            /* reset info */
+            p->volume.buffer_first_tsc = 0;
+            p->volume.buffer_size = r->window_size;
+            runstate_clear(p->volume.buffer_dom0_runstate_cycles);
+            volume_clear(&p->volume.last_buffer);
+        }
+
+        p->file_offset += ri->size;
+        p->next_cpu_change_offset = p->file_offset + r->window_size;
+
+        if(p->next_cpu_change_offset > G.file_size)
+            activate_early_eof();
+        else if(p->pid == P.max_active_pcpu)
+            scan_for_new_pcpu(p->next_cpu_change_offset);
+
+    }
+}
+
+void process_record(struct pcpu_info *p) {
+    struct record_info *ri = &p->ri;
+    int toplevel;
+
+    /* Process only TRC_TRACE_CPU_CHANGE */
+    if(ri->event == TRC_TRACE_CPU_CHANGE) {
+        process_cpu_change(p);
+        return;
+    }
+
+    if ( opt.dump_no_processing )
+        goto out;
+
+    p->summary = 1;
+
+    if( opt.dump_raw_process )
+    {
+        if(ri->rec.cycle_flag)
+            printf("* %x %d %lld\n",
+                   ri->event, ri->extra_words, ri->tsc);
+        else
+            printf("* %x %d -\n",
+                   ri->event, ri->extra_words);
+    }
+
+    /* Find the first tsc set */
+    if(ri->tsc && ri->tsc >= P.f.first_tsc) {
+        /* We use the order_tsc to account for the second processing of
+         * a lost record.  */
+        tsc_t tsc = p->order_tsc;
+
+        if(P.f.first_tsc == 0) {
+            P.f.first_tsc = tsc;
+            if ( opt.interval_mode ) {
+                P.interval.start_tsc = tsc;
+            }
+        } else {
+            if ( opt.interval_mode ) {
+                if(P.interval.start_tsc > tsc) {
+                    fprintf(warn, "FATAL: order_tsc %lld < interval.start_tsc %lld!\n",
+                            tsc, P.interval.start_tsc);
+                    exit(1);
+                }
+                while ( tsc - P.interval.start_tsc > opt.interval.cycles ) {
+                    interval_callback();
+                    P.interval.start_tsc += opt.interval.cycles;
+                }
+            }
+        }
+
+        P.f.last_tsc=tsc;
+
+        P.f.total_cycles = P.f.last_tsc - P.f.first_tsc;
+
+        P.now = tsc;
+    }
+    
+    abs_cycles_to_time(ri->tsc, &ri->t);
+
+    if(opt.dump_cooked || opt.dump_all) {
+        if(ri->t.time) {
+            snprintf(ri->dump_header,
+                     DUMP_HEADER_MAX,
+                     "%3lu.%09lu %s ", ri->t.s, ri->t.ns, pcpu_string(ri->cpu));
+        } else {
+            snprintf(ri->dump_header,
+                     DUMP_HEADER_MAX,
+                     "               %s ", pcpu_string(ri->cpu));
+        }
+    }
+
+    {
+        unsigned int i, count;
+        for(i=0, count=0; i<TOPLEVEL_MAX; i++)
+            if(ri->evt.main & (1<<i))
+            {
+                toplevel=i;
+                count++;
+            }
+        if(count != 1)
+        {
+            fprintf(warn, "FATAL: unexpected number bits(%d) in evt.main! event %x main %x sub %x minor %x\n",
+                    count,
+                    ri->event,
+                    ri->evt.main, ri->evt.sub, ri->evt.minor);
+            exit(1);
+        }
+    }
+
+    switch(toplevel) {
+    case TRC_GEN_MAIN:
+        base_process(p);
+        break;
+    case TRC_SCHED_MAIN:
+        sched_process(p);
+        break;
+    case TRC_HVM_MAIN:
+        hvm_process(p);
+        break;
+    case TRC_SHADOW_MAIN:
+        shadow_process(p);
+        break;
+    case TRC_PV_MAIN:
+        pv_process(p);
+        break;
+    case TRC_MEM_MAIN:
+        mem_process(p);
+        break;
+    case TRC_DOM0OP_MAIN:
+    default:
+        process_generic(ri);
+    }
+
+    UPDATE_VOLUME(p, toplevel[toplevel], ri->size);
+
+    if(!p->volume.buffer_first_tsc)
+        p->volume.buffer_first_tsc = ri->tsc;
+
+ out:
+    /* Lost records gets processed twice */
+    if(ri->event != TRC_LOST_RECORDS)
+        p->file_offset += ri->size;
+}
+
+static inline ssize_t get_rec_size(struct trace_record *rec) {
+    ssize_t s;
+    
+    s = sizeof(uint32_t);
+        
+    if(rec->cycle_flag)
+        s += sizeof(tsc_t);
+
+    s += rec->extra_words * sizeof(uint32_t);
+
+    return s;
+}
+
+#define STDIN 0
+
+void progress_child_exec(void) {
+    fclose(stdin);
+    dup2(G.progress.pipe[0], STDIN);
+
+    execlp("zenity", "zenity", "--progress", "--auto-close", "--title",
+           "Analyzing", "--text", G.trace_file, "--auto-kill", NULL);
+}
+
+void progress_init(void) {
+    int pid;
+
+    pipe(G.progress.pipe);
+
+    if(!(pid = fork())) {
+        progress_child_exec();
+        
+        fprintf(stderr, "%s: exec failed (%s), disabling progress bar\n",
+                __func__, strerror(errno));
+        opt.progress = 0;
+        exit(1);
+    } else if( pid < 0 ) {
+        fprintf(stderr, "%s: could not fork: %s, disabling progress bar\n",
+                __func__, strerror(errno));
+        opt.progress = 0;
+    }
+
+    if( (G.progress.out = fdopen(G.progress.pipe[1], "w")) < 0 ) {
+        fprintf(stderr, "%s: could not fdopen pipe: %s, disabling progress bar\n",
+                __func__, strerror(errno));
+        opt.progress = 0;
+    }
+
+}
+
+void progress_update(loff_t offset) {
+    long long p;
+
+    p = ( offset * 100 ) / G.file_size;
+
+    fprintf(G.progress.out, "%lld\n", p);
+    fflush(G.progress.out);
+
+    p += 1;
+
+    G.progress.update_offset = ( G.file_size * p ) / 100;
+
+#if 0
+    fprintf(stderr, "Progress: %lld %% Next update_offset: %lld\n",
+            p-1,
+            G.progress.update_offset);
+#endif
+}
+
+void progress_finish(void) {
+    int pid;
+
+    fprintf(G.progress.out, "100\n");
+    fflush(G.progress.out);
+    fclose(G.progress.out);
+
+    wait(NULL);
+
+    if(!(pid = fork())) {
+        /* Child */
+        char text[128];
+
+        snprintf(text, 128, "Finished analyzing %s",
+                 G.trace_file);
+        execlp("zenity", "zenity", "--info", "--text", text, NULL);
+    }
+}
+
+ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset)
+{
+    ssize_t r, rsize;
+
+    r=pread64(G.fd, rec, sizeof(*rec), offset);
+
+    if(r < 0) {
+        /* Read error */
+        perror("read");
+        fprintf(stderr, "offset %llx\n", (unsigned long long)offset);
+        return 0;
+    } else if(r==0) {
+        /* End-of-file */
+        return 0;
+    } else if(r < sizeof(uint32_t)) {
+        /* Full header not read */
+        fprintf(stderr, "%s: short read (%d bytes)\n",
+                __func__, r);
+        exit(1);
+    }
+
+    rsize=get_rec_size(rec);
+
+    if(r < rsize) {
+        /* Full record not read */
+        fprintf(stderr, "%s: short read (%d, expected %d)\n",
+                __func__, r, rsize);
+        return 0;
+    }
+
+    return rsize;
+}
+
+void __fill_in_record_info(struct pcpu_info *p)
+{
+    struct record_info *ri;
+    tsc_t tsc=0;
+
+    ri = &p->ri;
+
+    ri->event = ri->rec.event;
+    ri->extra_words = ri->rec.extra_words;
+
+    if(ri->rec.cycle_flag) {
+        tsc = (((tsc_t)ri->rec.u.tsc.tsc_hi) << 32)
+                | ri->rec.u.tsc.tsc_lo;
+
+        tsc += p->tsc_skew.offset;
+
+        ri->tsc = tsc;
+        ri->d = ri->rec.u.tsc.data;
+
+        if(p->first_tsc == 0)
+            p->first_tsc = tsc;
+
+        /* We process lost record twice: once at the first_tsc,
+           once at the time it was placed in the log */
+        if(ri->event == TRC_LOST_RECORDS && ri->extra_words == 4) {
+            struct lost_record_struct *r = (typeof(r))ri->d;
+            p->order_tsc = r->first_tsc + p->tsc_skew.offset;
+        } else
+            p->order_tsc = tsc;
+
+        p->last_tsc = tsc;
+    } else {
+        ri->tsc = p->last_tsc;
+        ri->d = ri->rec.u.notsc.data;
+    }
+
+    if ( opt.dump_raw_reads ) {
+        int i;
+        printf("R p%2d o%016llx %8lx %d ",
+               p->pid, p->file_offset,
+               (unsigned long)ri->rec.event, ri->rec.extra_words);
+
+        if(ri->rec.cycle_flag)
+            printf("t%016llx [ ", tsc);
+        else
+            printf("                  [ ");
+
+        for(i=0; i<ri->rec.extra_words; i++)
+            printf("%x ",
+                   ri->d[i]);
+        printf("]\n");
+    }
+
+    /* Updated tracing uses CPU_CHANGE.  If we hit one of these,
+     * it will process very next (since the tsc isn't updated), and
+     * we'll skip forward appropriately. */
+    ri->cpu = p->pid;
+}
+
+ssize_t read_record(int fd, struct pcpu_info * p) {
+    loff_t * offset;
+    struct record_info *ri;
+
+    offset = &p->file_offset;
+    ri = &p->ri;
+
+    ri->size = __read_record(fd, &ri->rec, *offset);
+    if(ri->size)
+    {
+        __fill_in_record_info(p);
+    }
+    else
+    {
+        fprintf(warn, "%s: read returned zero, deactivating pcpu %d\n",
+                __func__, p->pid);
+        p->active = 0;
+    }
+
+    return ri->size;
+}
+
+/* WARNING not thread-safe */
+char * pcpu_string(int pcpu)
+{
+    static char s[MAX_CPUS+1] = { 0 };
+    static int max_active_pcpu=-1, last_pcpu=-1;
+    
+    assert(P.max_active_pcpu < MAX_CPUS);
+    assert(pcpu <= P.max_active_pcpu);
+
+    if(P.max_active_pcpu > max_active_pcpu)
+    {
+        int i;
+        for(i=max_active_pcpu + 1; i<= P.max_active_pcpu; i++) {
+            if(P.pcpu[i].lost_record.active)
+                s[i]='l';
+            else
+                s[i]='-';
+        }
+    }
+
+    if(last_pcpu >= 0)
+        s[last_pcpu]='-';
+
+    s[pcpu]='x';
+    last_pcpu = pcpu;
+
+    return s;
+}
+
+struct pcpu_info * choose_next_record(void)
+{
+    int i, max_active_pcpu = -1;
+    struct pcpu_info * p, *min_p=NULL;
+    loff_t min_offset = 0;
+
+    for(i=0; i<=P.max_active_pcpu; i++)
+    {
+        p = P.pcpu+i;
+        if(!p->active)
+            continue;
+
+        max_active_pcpu = i;
+        if(!min_p || p->order_tsc < min_p->order_tsc)
+            min_p = p;
+
+        if(!min_offset || p->file_offset < min_offset)
+            min_offset = p->file_offset;
+    }
+
+    P.max_active_pcpu = max_active_pcpu;
+
+    if(opt.progress && min_offset >= G.progress.update_offset)
+        progress_update(min_offset);
+
+    /* If there are active pcpus, make sure we chose one */
+    assert(min_p || (max_active_pcpu==-1));
+
+    return min_p;
+}
+
+void process_records(void) {
+    while(1) {
+        struct pcpu_info *p = NULL;
+
+        if(!(p=choose_next_record()))
+            return;
+
+        process_record(p);
+
+        /* Lost records gets processed twice. */
+        if(p->ri.event == TRC_LOST_RECORDS) {
+            p->ri.event = TRC_LOST_RECORDS_END;
+            if(p->ri.tsc > p->order_tsc)
+                p->order_tsc = p->ri.tsc;
+            else {
+                fprintf(warn, "Strange, lost_record ri->tsc %lld !> p->order_tsc %lld!\n",
+                        p->ri.tsc, p->order_tsc);
+                exit(1);
+            }
+        }
+        else
+            read_record(G.fd, p);
+
+    }
+}
+
+void vcpu_summary(struct vcpu_data *v)
+{
+    printf("-- v%d --\n", v->vid);
+    sched_summary_vcpu(v);
+    switch(v->data_type) {
+    case VCPU_DATA_HVM:
+        hvm_summary(&v->hvm);
+        break;
+    case VCPU_DATA_PV:
+        pv_summary(&v->pv);
+        break;
+    default:
+        break;
+    }
+}
+
+void domain_summary(void)
+{
+    struct domain_data * d;
+    int i;
+
+    if(opt.show_default_domain_summary) {
+        d = &default_domain;
+        printf("|-- Default domain --|\n");
+    
+        for( i = 0; i < MAX_CPUS ; i++ )
+        {
+            if(d->vcpu[i])
+                vcpu_summary(d->vcpu[i]);
+        }
+    }
+
+    for ( d = domain_list ; d ; d=d->next )
+    {
+        int i;
+        printf("|-- Domain %d --|\n", d->did);
+
+        sched_summary_domain(d);
+
+        mem_summary_domain(d);
+
+        for( i = 0; i < MAX_CPUS ; i++ )
+        {
+            if(d->vcpu[i])
+                vcpu_summary(d->vcpu[i]);
+        }
+
+        printf("Emulate eip list\n");
+        dump_eip(d->emulate_eip_list);
+
+        cr3_dump_list(d->cr3_value_head);
+    }
+}
+
+char * stringify_cpu_hz(long long cpu_hz);
+
+void summary(void) {
+    int i;
+    printf("Total time: %.2lf seconds (using cpu speed %s)\n",
+           ((double)(P.f.total_cycles))/opt.cpu_hz,
+           stringify_cpu_hz(opt.cpu_hz));
+    printf("--- Log volume summary ---\n");
+    for(i=0; i<MAX_CPUS; i++)
+    {
+        struct pcpu_info *p = P.pcpu+i;
+        if(!p->summary)
+            continue;
+        printf(" - cpu %d -\n", i);
+        volume_summary(&p->volume.total);
+    }
+    domain_summary();
+}
+
+void init_pcpus(void) {
+    int i=0;
+    loff_t offset = 0;
+
+    for(i=0; i<MAX_CPUS; i++)
+    {
+        P.pcpu[i].pid=i;
+    }
+
+    P.max_active_pcpu = -1;
+
+    sched_default_domain_init();
+
+    /* Scan through the cpu_change recs until we see a duplicate */
+    do {
+        offset = scan_for_new_pcpu(offset);
+
+        if(!offset) {
+            fprintf(warn, "%s: through first trace write, done for now.\n",
+                   __func__);
+        }
+    } while(offset);
+
+}
+
+enum {
+    OPT_NULL=0,
+    /* Dumping info */
+    OPT_DUMP_COOKED,
+    OPT_DUMP_RAW_READS,
+    OPT_DUMP_RAW_PROCESS,
+    OPT_DUMP_NO_PROCESSING,
+    OPT_DUMP_IPI_LATENCY,
+    OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD,
+    /* Extra tracking functionality */
+    OPT_WITH_CR3_ENUMERATION,
+    OPT_WITH_PIO_ENUMERATION,
+    OPT_SCATTERPLOT_INTERRUPT_EIP,
+    OPT_SCATTERPLOT_CPI,
+    OPT_SCATTERPLOT_UNPIN_PROMOTE,
+    OPT_SCATTERPLOT_CR3_SWITCH,
+    OPT_SCATTERPLOT_WAKE_TO_HALT,
+    OPT_SCATTERPLOT_IO,
+    OPT_HISTOGRAM_INTERRUPT_EIP,
+    /* Interval options */
+    OPT_INTERVAL_CR3_SCHEDULE_TIME,
+    OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL,
+    OPT_INTERVAL_CR3_SCHEDULE_ORDERED,
+    OPT_INTERVAL_CR3_SHORT_SUMMARY,
+    OPT_INTERVAL_DOMAIN_SHORT_SUMMARY,
+    OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT,
+    OPT_INTERVAL_DOMAIN_GRANT_MAPS,
+    /* Summary info */
+    OPT_SHOW_DEFAULT_DOMAIN_SUMMARY,
+    OPT_SAMPLE_SIZE,
+    /* Guest info */
+    OPT_DEFAULT_GUEST_PAGING_LEVELS,
+    OPT_SYMBOL_FILE,
+    /* Hardware info */
+    OPT_SVM_MODE,
+    OPT_CPU_HZ,
+    /* Misc */
+    OPT_PROGRESS,
+    /* Specific letters */
+    OPT_DUMP_ALL='a',
+    OPT_INTERVAL_LENGTH='i',
+    OPT_SUMMARY='s',
+};
+
+enum {
+    OPT_GROUP_SUMMARY=1,
+    OPT_GROUP_DUMP,
+    OPT_GROUP_INTERVAL,
+    OPT_GROUP_EXTRA,
+    OPT_GROUP_GUEST,
+    OPT_GROUP_HARDWARE
+};
+
+#define xstr(x) str(x)
+#define str(x) #x
+
+#define GHZ 1000000000LL
+#define MHZ 1000000LL
+#define KHZ 1000LL
+
+void parse_cpu_hz(char * arg) {
+    float hz_base;
+    char * next_ptr;
+    
+    hz_base=strtof(arg, &next_ptr);
+    if(next_ptr == arg) {
+        fprintf(stderr, "Invalid cpu_hz %s\n", arg);
+        exit(1);
+    }
+    switch(*next_ptr) {
+    case '\0':
+        opt.cpu_hz=(long long)hz_base;
+        break;
+    case 'G':
+        opt.cpu_hz= hz_base * GHZ;
+        break;
+    case 'M':
+        opt.cpu_hz=hz_base * MHZ;
+        break;
+    case 'K':
+        opt.cpu_hz=hz_base * KHZ;
+        break;
+    default:
+        fprintf(stderr, "Unknown suffix %c\n", *next_ptr);
+        exit(1);
+    }
+}
+
+/* WARNING not thread-safe */
+char * stringify_cpu_hz(long long cpu_hz) {
+    static char cpu_string[20], suffix;
+    float hz;
+
+    if(cpu_hz > GHZ) {
+        hz = (float)cpu_hz / GHZ;
+        suffix = 'G';
+    } else if(cpu_hz > MHZ) {
+        hz = (float)cpu_hz / MHZ;
+        suffix = 'M';
+    } else if(cpu_hz > KHZ) {
+        hz = (float)cpu_hz / KHZ;
+        suffix = 'k';
+    } else {
+        hz = cpu_hz;
+        suffix = ' ';
+    }
+
+    snprintf(cpu_string, 20, "%1.2lf %cHz", hz, suffix);
+
+    return cpu_string;
+}
+
+int parse_cr3_array(char *arg) {
+    char *p, *q;
+    int n=1, i;
+
+    /* Count the number of commas (and thus the number of elements) */
+    for(p=arg; *p; p++)
+        if(*p == ',')
+            n++;
+
+    opt.interval.cr3.count = n;
+    opt.interval.cr3.values = malloc(n * sizeof(unsigned long long));
+    P.interval.cr3.array = malloc(n * sizeof(struct cr3_value_struct *));
+
+    if(!opt.interval.cr3.values || !P.interval.cr3.array) {
+        fprintf(stderr, "Malloc failed!\n");
+        exit(1);
+    }
+
+    bzero(P.interval.cr3.array, n*sizeof(struct cr3_value_struct *));
+
+    /* Now parse the elements */
+    p = q = arg;
+    for(i=0; i<n; i++) {
+        opt.interval.cr3.values[i] = strtoull(p, &q, 0);
+        if(p == q) {
+            fprintf(stderr, "Bad format: %s\n", q);
+            return -1;
+        }
+        if(*q == ',')
+            q++;
+        else if(*q != '\0') {
+            fprintf(stderr, "Bad format: %s\n", q);
+            return -1;
+        }
+        p=q;
+    }
+
+    return n;
+}
+
+error_t cmd_parser(int key, char *arg, struct argp_state *state)
+{
+    switch (key)
+    {
+        /* Dump group */
+    case OPT_DUMP_COOKED:
+        opt.dump_cooked = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_DUMP_ALL:
+        opt.dump_all = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_DUMP_RAW_READS:
+        opt.dump_raw_reads = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_DUMP_NO_PROCESSING:
+        opt.dump_no_processing = 1;
+        opt.dump_raw_reads = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_DUMP_RAW_PROCESS:
+        opt.dump_raw_process = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_DUMP_IPI_LATENCY:
+        opt.dump_ipi_latency = 1;
+        break;
+    case OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD:
+        opt.dump_trace_volume_on_lost_record = 1;
+        break;
+        /* Extra group */
+    case OPT_WITH_CR3_ENUMERATION:
+        opt.with_cr3_enumeration=1;
+        break;
+    case OPT_WITH_PIO_ENUMERATION:
+        opt.with_pio_enumeration=1;
+        break;
+    case OPT_SHOW_DEFAULT_DOMAIN_SUMMARY:
+        opt.show_default_domain_summary=1;
+        break;
+    case OPT_SAMPLE_SIZE:
+    {
+        char * inval;
+        opt.sample_size = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+        break;
+    }
+    case OPT_SCATTERPLOT_INTERRUPT_EIP:
+    {
+        char * inval;
+        G.output_defined = 1;
+        opt.scatterplot_interrupt_eip=1;
+        opt.scatterplot_interrupt_vector = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+    }
+    break;
+    case OPT_SCATTERPLOT_CPI:
+        G.output_defined = 1;
+        opt.scatterplot_cpi=1;
+        break;
+    case OPT_SCATTERPLOT_UNPIN_PROMOTE:
+        G.output_defined = 1;
+        opt.scatterplot_unpin_promote=1;
+        break;
+    case OPT_SCATTERPLOT_CR3_SWITCH:
+        G.output_defined = 1;
+        opt.scatterplot_cr3_switch=1;
+        break;
+    case OPT_SCATTERPLOT_WAKE_TO_HALT:
+        G.output_defined = 1;
+        opt.scatterplot_wake_to_halt=1;
+        break;
+    case OPT_SCATTERPLOT_IO:
+    {
+        char * inval;
+        G.output_defined = 1;
+        opt.scatterplot_io=1;
+        opt.scatterplot_io_port = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+    }
+    break;
+    case OPT_HISTOGRAM_INTERRUPT_EIP:
+    {
+        char * inval, *p;
+
+        opt.histogram_interrupt_eip=1;
+        opt.histogram_interrupt_vector = (int)strtol(arg, &inval, 0);
+
+        if( inval == arg )
+            argp_usage(state);
+
+        p = inval;
+
+        if(*p == ',')
+            opt.histogram_interrupt_increment = (unsigned long long)strtoull(p+1, &inval, 0);
+        else
+            opt.histogram_interrupt_increment = 0x1000000;
+
+        printf("Making histogram of eips at interrupt %d, increment %llx\n",
+               opt.histogram_interrupt_vector,
+               opt.histogram_interrupt_increment);
+    }
+    break;
+
+    case OPT_INTERVAL_LENGTH:
+    {
+        char * inval;
+
+        opt.interval.msec = (unsigned) (strtof(arg, &inval) * 1000);
+
+        if ( inval == arg )
+            argp_usage(state);
+
+        break;
+    }
+
+    case OPT_INTERVAL_CR3_SCHEDULE_TIME:
+    {
+        if(parse_cr3_array(arg) < 0)
+            goto usage;
+        opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    usage:
+        fprintf(stderr, "Invalid input for cr3_schedule_time\n");
+        argp_usage(state);
+        break;
+    }
+
+    case OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL:
+        opt.interval.output = INTERVAL_CR3_SCHEDULE_TIME;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    case OPT_INTERVAL_CR3_SCHEDULE_ORDERED:
+        opt.interval.output = INTERVAL_CR3_SCHEDULE_ORDERED;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+
+    case OPT_INTERVAL_CR3_SHORT_SUMMARY:
+    {
+        if(parse_cr3_array(arg) < 0 || opt.interval.cr3.count != 1)
+            goto usage;
+        opt.interval.output = INTERVAL_CR3_SHORT_SUMMARY;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    }
+
+    case OPT_INTERVAL_DOMAIN_SHORT_SUMMARY:
+    {
+        char *inval;
+
+        opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+
+        opt.interval.output = INTERVAL_DOMAIN_SHORT_SUMMARY;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    }
+
+    case OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT:
+    {
+        char *inval;
+
+        opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+
+        opt.interval.output = INTERVAL_DOMAIN_GUEST_INTERRUPT;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    }
+
+    case OPT_INTERVAL_DOMAIN_GRANT_MAPS:
+    {
+        char *inval;
+
+        opt.interval.domain.did = (int)strtol(arg, &inval, 0);
+        if( inval == arg )
+            argp_usage(state);
+
+        opt.interval.output = INTERVAL_DOMAIN_GRANT_MAPS;
+        opt.interval_mode = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+    }
+
+        /* Summary group */
+    case OPT_SUMMARY:
+        opt.summary = 1;
+        opt.summary_info = 1;
+        G.output_defined = 1;
+        break;
+        /* Guest info group */
+    case OPT_DEFAULT_GUEST_PAGING_LEVELS:
+    {
+        char *inval;
+        opt.default_guest_paging_levels = (int)strtol(arg, &inval, 0);
+        if ( inval == arg )
+            argp_usage(state);        
+    }
+    break;
+    case OPT_SYMBOL_FILE:
+        /* FIXME - strcpy */
+        G.symbol_file = arg;
+        break;
+        /* Hardware info group */
+    case OPT_SVM_MODE:
+        opt.svm_mode = 1;
+        break;
+    case OPT_CPU_HZ:
+        parse_cpu_hz(arg);
+        break;
+        break;
+
+    case OPT_PROGRESS:
+        opt.progress = 1;
+        break;
+
+    case ARGP_KEY_ARG:
+    {
+        /* FIXME - strcpy */
+        if (state->arg_num == 0)
+            G.trace_file = arg;
+        else
+            argp_usage(state);
+    }
+    break;
+    case ARGP_KEY_END:
+    {
+        if(opt.interval_mode) {
+            opt.interval.cycles = ( opt.interval.msec * opt.cpu_hz ) / 1000 ;
+            interval_header();
+        }
+
+        if(!G.output_defined)
+        {
+            fprintf(stderr, "No output defined, using summary.\n");
+            opt.summary = 1;
+            opt.summary_info = 1;
+        }
+        fprintf(stderr, "Using %s hardware-assisted virtualization.\n",
+                opt.svm_mode?"SVM":"VMX");
+    }
+    break;
+
+    default:
+        return ARGP_ERR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+const struct argp_option cmd_opts[] =  {
+    /* Dump group */
+    { .name = "dump-cooked",
+      .key = OPT_DUMP_COOKED,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump a sanitized summary of vmexit/vmentry.", },
+
+    { .name = "dump-all",
+      .key = OPT_DUMP_ALL,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump all records as they come in.", },
+
+    { .name = "dump-raw-reads",
+      .key = OPT_DUMP_RAW_READS,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump raw data as it's read from disk.  Useful mainly for debugging the analysis tool.", },
+
+    { .name = "dump-no-processing",
+      .key = OPT_DUMP_NO_PROCESSING,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Don't do any processing on records other than cpu changes.  Implies dump-raw-reads (or you wouldn't get anything).", },
+
+    { .name = "dump-raw-process",
+      .key = OPT_DUMP_RAW_PROCESS,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump raw data as it's processed.  Useful mainly for debugging the analysis tool.", },
+
+    { .name = "dump-ipi-latency",
+      .key = OPT_DUMP_IPI_LATENCY,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump IPI latency info as IPIs are delivered (vector 0xd1 only).", },
+
+    { .name = "dump-trace-volume-on-lost-record",
+      .key = OPT_DUMP_TRACE_VOLUME_ON_LOST_RECORD,
+      .group = OPT_GROUP_DUMP,
+      .doc = "Dump the volume of trace types in the previous cpu buffer when a lost record is created.", },
+
+    /* Extra processing group */
+    { .name = "with-cr3-enumeration",
+      .key = OPT_WITH_CR3_ENUMERATION,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Keep track of per-cr3 values", },
+
+    { .name = "with-pio-enumeration",
+      .key = OPT_WITH_PIO_ENUMERATION,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Report summary info on indiviaul IO addresses", },
+
+    { .name = "scatterplot-interrupt-eip",
+      .key = OPT_SCATTERPLOT_INTERRUPT_EIP,
+      .arg = "vector",
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of eips as a function of time.", },
+
+    { .name = "scatterplot-cpi",
+      .key = OPT_SCATTERPLOT_CPI,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of cpi.", },
+
+    { .name = "scatterplot-unpin-promote",
+      .key = OPT_SCATTERPLOT_UNPIN_PROMOTE,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of unpins and promotions.  If " \
+      "--with-cr3-enumeration is included, promotions include current cr3.", },
+
+    { .name = "scatterplot-cr3-switch",
+      .key = OPT_SCATTERPLOT_CR3_SWITCH,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of cr3 switches.", },
+
+    { .name = "scatterplot-wake-to-halt",
+      .key = OPT_SCATTERPLOT_WAKE_TO_HALT,
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of wake-to-halt.", },
+
+    { .name = "scatterplot-io",
+      .key = OPT_SCATTERPLOT_IO,
+      .arg = "port",
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output scatterplot of io latencies for givein address as a function of time.", },
+
+    { .name = "histogram-interrupt-eip",
+      .key = OPT_HISTOGRAM_INTERRUPT_EIP,
+      .arg = "vector[,increment]",
+      .group = OPT_GROUP_EXTRA,
+      .doc = "Output histograms of eips.", },
+
+    { .name = "interval",
+      .key = OPT_INTERVAL_LENGTH,
+      .arg = "sec",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Interval length to do time-based graphs, in seconds", },
+
+    { .name = "interval-cr3-schedule-time",
+      .key = OPT_INTERVAL_CR3_SCHEDULE_TIME,
+      .arg = "gmfn[,gmfn...]",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with the listed cr3 value(s) every interval.", },
+
+    { .name = "interval-cr3-schedule-time-all",
+      .key = OPT_INTERVAL_CR3_SCHEDULE_TIME_ALL,
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with all cr3 values every interval.", },
+
+    { .name = "interval-cr3-schedule-ordered",
+      .key = OPT_INTERVAL_CR3_SCHEDULE_ORDERED,
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print summary with the top 10 cr3 values every interval.", },
+
+    { .name = "interval-cr3-short-summary",
+      .key = OPT_INTERVAL_CR3_SHORT_SUMMARY,
+      .arg = "gmfn",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with the hvm short summary of cr3 value every interval.", },
+
+    { .name = "interval-domain-short-summary",
+      .key = OPT_INTERVAL_DOMAIN_SHORT_SUMMARY,
+      .arg = "domain-id",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with the hvm short summary of given domain every interval.", },
+
+    { .name = "interval-domain-guest-interrupt",
+      .key = OPT_INTERVAL_DOMAIN_GUEST_INTERRUPT,
+      .arg = "domain-id",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with the guest interrupt count of given domain every interval.", },
+
+    { .name = "interval-domain-grant-maps",
+      .key = OPT_INTERVAL_DOMAIN_GRANT_MAPS,
+      .arg = "domain-id",
+      .group = OPT_GROUP_INTERVAL,
+      .doc = "Print a csv with the grant maps done on behalf of a given domain every interval.", },
+
+    /* Summary group */
+    { .name = "show-default-domain-summary",
+      .key = OPT_SHOW_DEFAULT_DOMAIN_SUMMARY,
+      .group = OPT_GROUP_SUMMARY,
+      .doc = "Show default domain information on summary", },
+
+    { .name = "sample-size",
+      .key = OPT_SAMPLE_SIZE,
+      .arg = "size",
+      .group = OPT_GROUP_SUMMARY,
+      .doc = "Keep [size] samples for percentile purposes.  Enter 0 to " \
+      "disable.  Default 10240.", },
+
+    { .name = "summary",
+      .key = OPT_SUMMARY,
+      .group = OPT_GROUP_SUMMARY,
+      .doc = "Output a summary", },
+    
+    /* Guest info */
+    { .name = "default-guest-paging-levels",
+      .key = OPT_DEFAULT_GUEST_PAGING_LEVELS,
+      .group = OPT_GROUP_GUEST,
+      .arg = "L",
+      .doc = "Default guest paging levels.  Mainly necessary for Rio, as Miami traces include guest paging levels where appropriate.", },
+    
+    { .name = "symbol-file",
+      .key = OPT_SYMBOL_FILE,
+      .group = OPT_GROUP_GUEST,
+      .arg = "filename",
+      .doc = "A symbol file for interpreting guest eips.", },
+
+    /* Hardware info */
+    { .name = "cpu-hz",
+      .key = OPT_CPU_HZ,
+      .group = OPT_GROUP_HARDWARE,
+      .arg = "HZ",
+      .doc = "Cpu speed of the tracing host, used to convert tsc into seconds.", },
+
+    { .name = "svm-mode",
+      .key = OPT_SVM_MODE,
+      .group = OPT_GROUP_HARDWARE,
+      .doc = "Assume AMD SVM-style vmexit error codes.  (Default is Intel VMX.)", },
+
+    { .name = "progress",
+      .key = OPT_PROGRESS,
+      .doc = "Progress dialog.  Requires the zenity (GTK+) executable.", },
+
+    { 0 },
+};
+
+const struct argp parser_def = {
+    .options = cmd_opts,
+    .parser = cmd_parser,
+    .args_doc = "[trace file]",
+    .doc = "",
+};
+
+const char *argp_program_version = "xenalyze - XenServer Trunk";
+const char *argp_program_bug_address = "George Dunlap <george.dunlap@eu.citrix.com>";
+
+
+int main(int argc, char *argv[]) {
+
+    argp_parse(&parser_def, argc, argv, 0, NULL, NULL);
+
+    if (G.trace_file == NULL)
+        exit(1);
+
+    if ( (G.fd = open(G.trace_file, O_RDONLY|O_LARGEFILE)) < 0) {
+        perror("open");
+        exit(1);
+    } else {
+        struct stat s;
+        fstat(G.fd, &s);
+        G.file_size = s.st_size;
+    }
+    if (G.symbol_file != NULL)
+        parse_symbol_file(G.symbol_file);
+
+    if(opt.dump_cooked || opt.dump_all)
+        warn = stdout;
+    else
+        warn = stderr;
+        
+    init_pcpus();
+
+    if(opt.progress)
+        progress_init();
+
+    process_records();
+
+    if(opt.interval_mode)
+        interval_tail();
+
+    if(opt.summary)
+        summary();
+
+    if(opt.progress)
+        progress_finish();
+              
+    return 0;
+}
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/analyze.h b/analyze.h
new file mode 100644 (file)
index 0000000..a2d33c4
--- /dev/null
+++ b/analyze.h
@@ -0,0 +1,36 @@
+#ifndef __ANALYZE_H
+# define __ANALYZE_H
+#define TRC_GEN_MAIN     0
+#define TRC_SCHED_MAIN   1
+#define TRC_DOM0OP_MAIN  2
+#define TRC_HVM_MAIN     3
+#define TRC_MEM_MAIN     4
+#define TRC_PV_MAIN      5
+#define TRC_SHADOW_MAIN  6
+
+#define TRC_LOST_RECORDS_END    (TRC_GEN + 50)
+
+enum {
+    TRCE_SFLAG_SET_AD,
+    TRCE_SFLAG_SET_A,
+    TRCE_SFLAG_SHADOW_L1_GET_REF,
+    TRCE_SFLAG_SHADOW_L1_PUT_REF,
+    TRCE_SFLAG_L2_PROPAGATE,
+    TRCE_SFLAG_SET_CHANGED,
+    TRCE_SFLAG_SET_FLUSH,
+    TRCE_SFLAG_SET_ERROR,
+    TRCE_SFLAG_DEMOTE,
+    TRCE_SFLAG_PROMOTE,
+    TRCE_SFLAG_WRMAP,
+    TRCE_SFLAG_WRMAP_GUESS_FOUND,
+    TRCE_SFLAG_WRMAP_BRUTE_FORCE,
+    TRCE_SFLAG_EARLY_UNSHADOW,
+    TRCE_SFLAG_EMULATION_2ND_PT_WRITTEN,
+    TRCE_SFLAG_EMULATION_LAST_FAILED,
+    TRCE_SFLAG_EMULATE_FULL_PT,
+    TRCE_SFLAG_PREALLOC_UNPIN,
+    TRCE_SFLAG_PREALLOC_UNHOOK
+};
+
+#define TRC_HVM_OP_DESTROY_PROC (TRC_HVM_HANDLER + 0x100)
+#endif
diff --git a/code-intro.txt b/code-intro.txt
new file mode 100644 (file)
index 0000000..991a2e4
--- /dev/null
@@ -0,0 +1,75 @@
+Analyze is designed to gather useful data from traces, mostly for tracking 
+down performance issues in hvm domains.
+
+There are two key things that it looks at: vmexits and runstate changes.
+
+For HVM domains, we take a trace with a timestamp (i.e. rdtsc) at
+every vmexit and vmenter.  We also take a trace in between about what
+the vmexit was about: why it happened, and what Xen did as a result.
+This allows us to track how much time we spent inside Xen vs inside
+the guest, and for what reason.
+
+The key data structure for storing this kind of "summary" information
+is struct event_cycle_summary.  It contains a total count, total
+number of cycles, and a pseudo-random sample of cycles.  Each vmexit
+gets classified into a certain number of non-exclusive buckets, with
+one summary struct for each classification.  For example, we may take
+a page fault for which Xen promotes a page to being a shadow and fills
+the entry in, and it takes 8995 cycles.  This sample gets added into
+the following event_cycle_summary structs:
+ * EXNMI vmexit (the vmexit that a page fault causes)
+ * page_fault
+ * shadow fixup
+ * shadow fixup promotion
+
+When the tracing is finished, we report the count, total time, average
+number of cycles per exit, and 5th, 50th, and 95th percentiles of
+vmexits from the sample.
+
+Runstate changes allow us to tell which domain and vcpu to attribute
+to a certain vmexit.  It also allows us to see how often a domain was
+in what state -- running, waiting for a cpu, blocked, &c.  We can
+collect similar kinds of samples for runstate changes as for vmexits:
+i.e., how long after waking up did it take, on average, to get the
+scheduler?  And so on.
+
+There are two things that make this complicated.  First is that the
+files are generally so huge that we can't really afford to load the
+data into memory; we can only afford to pass through the file once.
+The second is that we are working in a multi-cpu world, where several
+things happen more or less concurrently -- however, the traces are
+written as "clumps" into the trace file by pcpu, and are therefore not
+in the file in the exact order that they were put into the trace buffer.
+
+Our first order of business, then, is to attempt to re-construct the
+order of traces as they were generated, and process them in that
+order.  This is the job of process_records().  We keep per host
+processor a pointer in the file to the next record from that processor
+to be processed.  We then look through each one, determine the next
+one to process (usually based on the tsc of the record), and call
+process_record().  Then, we search for the next record of that
+processor and write it into the struct, repeating until the file is
+empty.
+
+Process_record looks at the record, and classifies it according to its
+contents.  The processor struct also contains a pointer to "current",
+a record of what domain is currently running on the pcpu.  We
+determine this using the runstate change traces.
+
+Because we can only make one pass, we must build information about
+vmexits as we go.  The hvm data struct in the vcpu has "in-flight"
+information about the current vmexit which it fills in as it goes.  We
+don't know everything about a vmexit until the next vmenter* -- it's
+not until the vmenter that we know now many cycles this vmexit took.
+
+* I said vmenter here for simplicity, but it's really the vmexit
+  "close".  The close is either on the next vmenter, or when the vcpu
+  is scheduled out.  
+
+One of the things it fills in is a pointer to a function to call when
+the vmexit is over (either on vmenter, or when it gets preempted by
+the scheduler).  That way we can act on all the information.  An
+example of this is when we keep lists of eips at which emulations
+happened, and we keep summary information on the individual eips in
+the list.
+
diff --git a/dump-raw.c b/dump-raw.c
new file mode 100644 (file)
index 0000000..00bc26b
--- /dev/null
@@ -0,0 +1,211 @@
+#define _XOPEN_SOURCE 600
+#include <stdio.h>
+#include <stdlib.h>
+#include <argp.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <strings.h>
+#include <assert.h>
+#include "trace.h"
+
+typedef unsigned long long tsc_t;
+
+struct {
+    int fd;
+    int cur_cpu;
+    char * trace_file;
+} G = {
+    .fd=-1,
+    .cur_cpu = -1,
+    .trace_file = NULL,
+};
+
+/* -- on-disk trace buffer definitions -- */
+struct trace_record {
+    unsigned event:28,
+        extra_words:3,
+        cycle_flag:1;
+    union {
+        struct {
+            uint32_t tsc_lo, tsc_hi;
+            uint32_t data[7];
+        } tsc;
+        struct {
+            uint32_t data[7];
+        } notsc;
+    } u;
+};
+
+struct record_info {
+    int cpu;
+    tsc_t tsc;
+    union {
+        unsigned event;
+        struct {
+            unsigned minor:12,
+                sub:4,
+                main:12,
+                unused:4;
+        } evt;
+    };
+    int extra_words;
+    int size;
+    uint32_t *d;
+
+    struct trace_record rec;
+};
+
+struct data {
+    loff_t file_offset, next_cpu_change_offset;
+    int pid;
+    struct record_info ri;
+};
+
+static inline ssize_t get_rec_size(struct trace_record *rec) {
+    ssize_t s;
+    
+    s = sizeof(uint32_t);
+        
+    if(rec->cycle_flag)
+        s += sizeof(tsc_t);
+
+    s += rec->extra_words * sizeof(uint32_t);
+
+    return s;
+}
+
+ssize_t __read_record(int fd, struct trace_record *rec, loff_t offset)
+{
+    ssize_t r, rsize;
+
+    r=pread64(G.fd, rec, sizeof(*rec), offset);
+
+    if(r < 0) {
+        /* Read error */
+        perror("read");
+        fprintf(stderr, "offset %llx\n", (unsigned long long)offset);
+        return 0;
+    } else if(r==0) {
+        /* End-of-file */
+        return 0;
+    } else if(r < sizeof(uint32_t)) {
+        /* Full header not read */
+        fprintf(stderr, "%s: short read (%d bytes)\n",
+                __func__, r);
+        exit(1);
+    }
+
+    rsize=get_rec_size(rec);
+
+    if(r < rsize) {
+        /* Full record not read */
+        fprintf(stderr, "%s: short read (%d, expected %d)\n",
+                __func__, r, rsize);
+        return 0;
+    }
+
+    return rsize;
+}
+
+void __fill_in_record_info(struct record_info *ri)
+{
+    tsc_t tsc=0;
+
+    ri->event = ri->rec.event;
+    ri->extra_words = ri->rec.extra_words;
+
+    if(ri->rec.cycle_flag) {
+        tsc = (((tsc_t)ri->rec.u.tsc.tsc_hi) << 32)
+                | ri->rec.u.tsc.tsc_lo;
+
+        ri->tsc = tsc;
+        ri->d = ri->rec.u.tsc.data;
+    } else {
+        ri->tsc = 0;
+        ri->d = ri->rec.u.notsc.data;
+    }
+}
+
+struct cpu_change_data {
+    int cpu;
+    unsigned window_size;
+};
+
+void process_cpu_change(struct data *p) {
+    struct record_info *ri = &p->ri;
+    struct cpu_change_data *r = (typeof(r))ri->d;
+
+    printf(" cpu_change this-cpu %u record-cpu %u window_size %u(0x%08x)\n",
+           p->pid, r->cpu, r->window_size,
+           r->window_size);
+
+    /* File sanity check */
+    if(p->file_offset != p->next_cpu_change_offset) {
+        printf("Strange, pcpu %d expected offet %llx, actual %llx!\n",
+                p->pid, p->next_cpu_change_offset, p->file_offset);
+    }
+
+    p->next_cpu_change_offset = p->file_offset + ri->size + r->window_size;
+
+    p->pid = r->cpu;
+}
+
+void print_raw_record(struct data *p) {
+    struct record_info *ri = &p->ri;
+    int i;
+    
+    printf("R p%2d o%016llx %8lx %d ",
+           p->pid, p->file_offset,
+           (unsigned long)ri->rec.event, ri->rec.extra_words);
+
+    if(ri->rec.cycle_flag)
+        printf("t%016llx [ ", ri->tsc);
+    else
+        printf("                  [ ");
+
+    for(i=0; i<ri->rec.extra_words; i++)
+        printf("%x ",
+               ri->d[i]);
+    printf("]\n");
+}
+
+void read_records(int fd) {
+    struct data p = {
+        .file_offset = 0,
+        .next_cpu_change_offset = 0,
+        .pid = -1,
+        .ri = { 0 }
+    };
+    struct record_info *ri=&p.ri;
+
+    do {
+        ri->size = __read_record(fd, &ri->rec, p.file_offset);
+
+        if(ri->size)
+        {
+            __fill_in_record_info(ri);
+
+            print_raw_record(&p);
+
+            if(ri->event == TRC_TRACE_CPU_CHANGE)
+                process_cpu_change(&p);
+
+            p.file_offset += ri->size;
+        }
+    } while ( ri->size );
+}
+
+int main(int argc, char * argv[]) {
+    if(argc != 2) {
+        fprintf(stderr, "Usage: %s tracefile\n", argv[0]);
+        exit(1);
+    }
+
+    G.fd = open(argv[1], O_RDONLY|O_LARGEFILE);
+
+    read_records(G.fd);
+}
diff --git a/trace.h b/trace.h
new file mode 100644 (file)
index 0000000..6f04260
--- /dev/null
+++ b/trace.h
@@ -0,0 +1,182 @@
+/******************************************************************************
+ * include/public/trace.h
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
+ */
+
+#ifndef __XEN_PUBLIC_TRACE_H__
+#define __XEN_PUBLIC_TRACE_H__
+
+#define TRACE_EXTRA_MAX    7
+#define TRACE_EXTRA_SHIFT 28
+
+/* Trace classes */
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN      0x0001f000    /* General trace            */
+#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */
+#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
+#define TRC_HVM      0x0008f000    /* Xen HVM trace            */
+#define TRC_MEM      0x0010f000    /* Xen memory trace         */
+#define TRC_PV       0x0020f000    /* Xen PV traces            */
+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
+#define TRC_ALL      0x0ffff000
+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
+#define TRC_HD_CYCLE_FLAG (1UL<<31)
+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
+#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+
+/* trace subclasses for SVM */
+#define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */
+#define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */
+
+/* Trace events per class */
+#define TRC_LOST_RECORDS        (TRC_GEN + 1)
+#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
+#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
+#define TRC_TRACE_VIRQ          (TRC_GEN + 4)
+
+#define TRC_SCHED_MIN 0x0081000
+
+#define TRC_SCHED_DOM_ADD       (TRC_SCHED +  1)
+#define TRC_SCHED_DOM_REM       (TRC_SCHED +  2)
+#define TRC_SCHED_SLEEP         (TRC_SCHED +  3)
+#define TRC_SCHED_WAKE          (TRC_SCHED +  4)
+#define TRC_SCHED_YIELD         (TRC_SCHED +  5)
+#define TRC_SCHED_BLOCK         (TRC_SCHED +  6)
+#define TRC_SCHED_SHUTDOWN      (TRC_SCHED +  7)
+#define TRC_SCHED_CTL           (TRC_SCHED +  8)
+#define TRC_SCHED_ADJDOM        (TRC_SCHED +  9)
+#define TRC_SCHED_SWITCH        (TRC_SCHED + 10)
+#define TRC_SCHED_S_TIMER_FN    (TRC_SCHED + 11)
+#define TRC_SCHED_T_TIMER_FN    (TRC_SCHED + 12)
+#define TRC_SCHED_DOM_TIMER_FN  (TRC_SCHED + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED + 15)
+#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED +  16)
+
+#define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
+#define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
+
+#define TRC_PV_HYPERCALL             (TRC_PV +  1)
+#define TRC_PV_TRAP                  (TRC_PV +  3)
+#define TRC_PV_PAGE_FAULT            (TRC_PV +  4)
+#define TRC_PV_FORCED_INVALID_OP     (TRC_PV +  5)
+#define TRC_PV_EMULATE_PRIVOP        (TRC_PV +  6)
+#define TRC_PV_EMULATE_4GB           (TRC_PV +  7)
+#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV +  8)
+#define TRC_PV_PAGING_FIXUP          (TRC_PV +  9)
+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10)
+#define TRC_PV_PTWR_EMULATION        (TRC_PV + 11)
+#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV + 12)
+  /* Indicates that addresses in trace record are 64 bits */
+#define TRC_PV_64_FLAG               (0x100) 
+
+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_EMULATE_UNSHADOW_HEURISTIC (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 24)
+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 25)
+
+/* trace events per subclass */
+#define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
+#define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
+#define TRC_HVM_VMEXIT64        (TRC_HVM_ENTRYEXIT + 0x03)
+#define TRC_HVM_PF_XEN          (TRC_HVM_HANDLER + 0x01)
+#define TRC_HVM_PF_INJECT       (TRC_HVM_HANDLER + 0x02)
+#define TRC_HVM_INJ_EXC         (TRC_HVM_HANDLER + 0x03)
+#define TRC_HVM_INJ_VIRQ        (TRC_HVM_HANDLER + 0x04)
+#define TRC_HVM_REINJ_VIRQ      (TRC_HVM_HANDLER + 0x05)
+#define TRC_HVM_IO_READ         (TRC_HVM_HANDLER + 0x06)
+#define TRC_HVM_IO_WRITE        (TRC_HVM_HANDLER + 0x07)
+#define TRC_HVM_CR_READ         (TRC_HVM_HANDLER + 0x08)
+#define TRC_HVM_CR_WRITE        (TRC_HVM_HANDLER + 0x09)
+#define TRC_HVM_DR_READ         (TRC_HVM_HANDLER + 0x0A)
+#define TRC_HVM_DR_WRITE        (TRC_HVM_HANDLER + 0x0B)
+#define TRC_HVM_MSR_READ        (TRC_HVM_HANDLER + 0x0C)
+#define TRC_HVM_MSR_WRITE       (TRC_HVM_HANDLER + 0x0D)
+#define TRC_HVM_CPUID           (TRC_HVM_HANDLER + 0x0E)
+#define TRC_HVM_INTR            (TRC_HVM_HANDLER + 0x0F)
+#define TRC_HVM_NMI             (TRC_HVM_HANDLER + 0x10)
+#define TRC_HVM_SMI             (TRC_HVM_HANDLER + 0x11)
+#define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
+#define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
+#define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
+#define TRC_HVM_IO_ASSIST       (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_MMIO_ASSIST     (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_CLTS            (TRC_HVM_HANDLER + 0x18)
+#define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)
+#define TRC_HVM_PF_XEN64        (TRC_HVM_HANDLER + 0x20)
+#define TRC_HVM_OP_DESTROY_PROC (TRC_HVM_HANDLER + 0x100)
+
+
+/* This structure represents a single trace buffer record. */
+struct t_rec {
+    uint32_t event:28;
+    uint32_t extra_u32:3;         /* # entries in trailing extra_u32[] array */
+    uint32_t cycles_included:1;   /* u.cycles or u.no_cycles? */
+    union {
+        struct {
+            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
+            uint32_t extra_u32[7];         /* event data items */
+        } cycles;
+        struct {
+            uint32_t extra_u32[7];         /* event data items */
+        } nocycles;
+    } u;
+};
+
+/*
+ * This structure contains the metadata for a single trace buffer.  The head
+ * field, indexes into an array of struct t_rec's.
+ */
+struct t_buf {
+    uint32_t cons;   /* Offset of next item to be consumed by control tools. */
+    uint32_t prod;   /* Offset of next item to be produced by Xen.           */
+    /*  Records follow immediately after the meta-data header.    */
+};
+
+#endif /* __XEN_PUBLIC_TRACE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */