ia64/xen-unstable

changeset 19201:c7cba853583d

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Fri Feb 13 11:22:28 2009 +0900 (2009-02-13)
parents af992824b5cf 32b154137492
children af0da711bbdb
files xen/arch/ia64/linux-xen/mca.c
line diff
     1.1 --- a/.hgignore	Fri Feb 13 10:56:01 2009 +0900
     1.2 +++ b/.hgignore	Fri Feb 13 11:22:28 2009 +0900
     1.3 @@ -256,6 +256,7 @@
     1.4  ^xen/arch/x86/asm-offsets\.s$
     1.5  ^xen/arch/x86/boot/mkelf32$
     1.6  ^xen/arch/x86/xen\.lds$
     1.7 +^xen/arch/x86/boot/reloc.S$
     1.8  ^xen/ddb/.*$
     1.9  ^xen/include/asm$
    1.10  ^xen/include/asm-.*/asm-offsets\.h$
    1.11 @@ -279,15 +280,6 @@
    1.12  ^xen/arch/ia64/asm-xsi-offsets\.s$
    1.13  ^xen/arch/ia64/map\.out$
    1.14  ^xen/arch/ia64/xen\.lds\.s$
    1.15 -^xen/arch/powerpc/dom0\.bin$
    1.16 -^xen/arch/powerpc/asm-offsets\.s$
    1.17 -^xen/arch/powerpc/firmware$
    1.18 -^xen/arch/powerpc/firmware.dbg$
    1.19 -^xen/arch/powerpc/firmware_image.bin$
    1.20 -^xen/arch/powerpc/xen\.lds$
    1.21 -^xen/arch/powerpc/\.xen-syms$
    1.22 -^xen/arch/powerpc/xen-syms\.S$
    1.23 -^xen/arch/powerpc/cmdline.dep$
    1.24  ^unmodified_drivers/linux-2.6/\.tmp_versions
    1.25  ^unmodified_drivers/linux-2.6/.*\.cmd$
    1.26  ^unmodified_drivers/linux-2.6/.*\.ko$
     2.1 --- a/Config.mk	Fri Feb 13 10:56:01 2009 +0900
     2.2 +++ b/Config.mk	Fri Feb 13 11:22:28 2009 +0900
     2.3 @@ -1,7 +1,7 @@
     2.4  # -*- mode: Makefile; -*-
     2.5  
     2.6 -# A debug build of Xen and tools?
     2.7 -debug ?= y ## TEMPORARILY ENABLED
     2.8 +# A debug build of Xen and tools? TEMPORARILY ENABLED
     2.9 +debug ?= y
    2.10  
    2.11  XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
    2.12                           -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
     3.1 --- a/docs/man/xm.pod.1	Fri Feb 13 10:56:01 2009 +0900
     3.2 +++ b/docs/man/xm.pod.1	Fri Feb 13 11:22:28 2009 +0900
     3.3 @@ -67,6 +67,8 @@ The attached console will perform much l
     3.4  so running curses based interfaces over the console B<is not
     3.5  advised>.  Vi tends to get very odd when using it over this interface.
     3.6  
     3.7 +Use the key combination Ctrl+] to detach the domain console.
     3.8 +
     3.9  =item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
    3.10  
    3.11  The create subcommand requires a config file and can optionally take a
     4.1 --- a/extras/mini-os/arch/x86/mm.c	Fri Feb 13 10:56:01 2009 +0900
     4.2 +++ b/extras/mini-os/arch/x86/mm.c	Fri Feb 13 11:22:28 2009 +0900
     4.3 @@ -550,9 +550,15 @@ static void clear_bootstrap(void)
     4.4  
     4.5  void arch_init_p2m(unsigned long max_pfn)
     4.6  {
     4.7 +#ifdef __x86_64__
     4.8  #define L1_P2M_SHIFT    9
     4.9  #define L2_P2M_SHIFT    18    
    4.10  #define L3_P2M_SHIFT    27    
    4.11 +#else
    4.12 +#define L1_P2M_SHIFT    10
    4.13 +#define L2_P2M_SHIFT    20    
    4.14 +#define L3_P2M_SHIFT    30    
    4.15 +#endif
    4.16  #define L1_P2M_ENTRIES  (1 << L1_P2M_SHIFT)    
    4.17  #define L2_P2M_ENTRIES  (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT))    
    4.18  #define L3_P2M_ENTRIES  (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))    
     5.1 --- a/tools/blktap/drivers/Makefile	Fri Feb 13 10:56:01 2009 +0900
     5.2 +++ b/tools/blktap/drivers/Makefile	Fri Feb 13 11:22:28 2009 +0900
     5.3 @@ -13,7 +13,7 @@ CFLAGS   += $(CFLAGS_libxenstore)
     5.4  CFLAGS   += -I $(LIBAIO_DIR)
     5.5  CFLAGS   += -D_GNU_SOURCE
     5.6  
     5.7 -ifeq ($(shell . ./check_gcrypt),"yes")
     5.8 +ifeq ($(shell . ./check_gcrypt $(CC)),yes)
     5.9  CFLAGS += -DUSE_GCRYPT
    5.10  CRYPT_LIB := -lgcrypt
    5.11  else
     6.1 --- a/tools/firmware/rombios/rombios.c	Fri Feb 13 10:56:01 2009 +0900
     6.2 +++ b/tools/firmware/rombios/rombios.c	Fri Feb 13 11:22:28 2009 +0900
     6.3 @@ -4609,6 +4609,10 @@ int15_function32(regs, ES, DS, FLAGS)
     6.4  {
     6.5    Bit32u  extended_memory_size=0; // 64bits long
     6.6    Bit16u  CX,DX;
     6.7 +#ifdef HVMASSIST
     6.8 +  Bit16u off, e820_table_size;
     6.9 +  Bit32u base, type, size;
    6.10 +#endif
    6.11  
    6.12  BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
    6.13  
    6.14 @@ -4625,8 +4629,10 @@ ASM_START
    6.15  
    6.16        ;; Get the count in eax
    6.17        mov  bx, sp
    6.18 +SEG SS
    6.19        mov  ax, _int15_function32.CX [bx]
    6.20        shl  eax, #16
    6.21 +SEG SS
    6.22        mov  ax, _int15_function32.DX [bx]
    6.23  
    6.24        ;; convert to numbers of 15usec ticks
    6.25 @@ -4660,8 +4666,7 @@ ASM_END
    6.26          {
    6.27  #ifdef HVMASSIST
    6.28         case 0x20: {
    6.29 -            Bit16u e820_table_size =
    6.30 -                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
    6.31 +            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
    6.32  
    6.33              if (regs.u.r32.edx != 0x534D4150) /* SMAP */
    6.34                  goto int15_unimplemented;
    6.35 @@ -4674,8 +4679,6 @@ ASM_END
    6.36                  if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
    6.37                      regs.u.r32.ebx = 0;
    6.38              } else if (regs.u.r16.bx == 1) {
    6.39 -                Bit32u base, type;
    6.40 -                Bit16u off;
    6.41                  for (off = 0; off < e820_table_size; off += 0x14) {
    6.42                      base = read_dword(E820_SEG, E820_OFFSET + off);
    6.43                      type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
    6.44 @@ -4699,9 +4702,7 @@ ASM_END
    6.45          }
    6.46  
    6.47          case 0x01: {
    6.48 -            Bit16u off, e820_table_size =
    6.49 -                read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
    6.50 -            Bit32u base, type, size;
    6.51 +            e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
    6.52  
    6.53              // do we have any reason to fail here ?
    6.54              CLEAR_CF();
     7.1 --- a/tools/libxc/xc_domain.c	Fri Feb 13 10:56:01 2009 +0900
     7.2 +++ b/tools/libxc/xc_domain.c	Fri Feb 13 11:22:28 2009 +0900
     7.3 @@ -271,6 +271,38 @@ int xc_domain_hvm_getcontext(int xc_hand
     7.4      return (ret < 0 ? -1 : domctl.u.hvmcontext.size);
     7.5  }
     7.6  
     7.7 +/* Get just one element of the HVM guest context.
     7.8 + * size must be >= HVM_SAVE_LENGTH(type) */
     7.9 +int xc_domain_hvm_getcontext_partial(int xc_handle,
    7.10 +                                     uint32_t domid,
    7.11 +                                     uint16_t typecode,
    7.12 +                                     uint16_t instance,
    7.13 +                                     void *ctxt_buf,
    7.14 +                                     uint32_t size)
    7.15 +{
    7.16 +    int ret;
    7.17 +    DECLARE_DOMCTL;
    7.18 +
    7.19 +    if ( !ctxt_buf ) 
    7.20 +        return -EINVAL;
    7.21 +
    7.22 +    domctl.cmd = XEN_DOMCTL_gethvmcontext_partial;
    7.23 +    domctl.domain = (domid_t) domid;
    7.24 +    domctl.u.hvmcontext_partial.type = typecode;
    7.25 +    domctl.u.hvmcontext_partial.instance = instance;
    7.26 +    set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf);
    7.27 +
    7.28 +    if ( (ret = lock_pages(ctxt_buf, size)) != 0 )
    7.29 +        return ret;
    7.30 +    
    7.31 +    ret = do_domctl(xc_handle, &domctl);
    7.32 +
    7.33 +    if ( ctxt_buf ) 
    7.34 +        unlock_pages(ctxt_buf, size);
    7.35 +
    7.36 +    return ret ? -1 : 0;
    7.37 +}
    7.38 +
    7.39  /* set info to hvm guest for restore */
    7.40  int xc_domain_hvm_setcontext(int xc_handle,
    7.41                               uint32_t domid,
    7.42 @@ -909,6 +941,32 @@ int xc_domain_update_msi_irq(
    7.43      return rc;
    7.44  }
    7.45  
    7.46 +int xc_domain_unbind_msi_irq(
    7.47 +    int xc_handle,
    7.48 +    uint32_t domid,
    7.49 +    uint32_t gvec,
    7.50 +    uint32_t pirq,
    7.51 +    uint32_t gflags)
    7.52 +{
    7.53 +    int rc;
    7.54 +    xen_domctl_bind_pt_irq_t *bind;
    7.55 +
    7.56 +    DECLARE_DOMCTL;
    7.57 +
    7.58 +    domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
    7.59 +    domctl.domain = (domid_t)domid;
    7.60 +
    7.61 +    bind = &(domctl.u.bind_pt_irq);
    7.62 +    bind->hvm_domid = domid;
    7.63 +    bind->irq_type = PT_IRQ_TYPE_MSI;
    7.64 +    bind->machine_irq = pirq;
    7.65 +    bind->u.msi.gvec = gvec;
    7.66 +    bind->u.msi.gflags = gflags;
    7.67 +
    7.68 +    rc = do_domctl(xc_handle, &domctl);
    7.69 +    return rc;
    7.70 +}
    7.71 +
    7.72  /* Pass-through: binds machine irq to guests irq */
    7.73  int xc_domain_bind_pt_irq(
    7.74      int xc_handle,
     8.1 --- a/tools/libxc/xc_pagetab.c	Fri Feb 13 10:56:01 2009 +0900
     8.2 +++ b/tools/libxc/xc_pagetab.c	Fri Feb 13 11:22:28 2009 +0900
     8.3 @@ -4,50 +4,42 @@
     8.4   * Function to translate virtual to physical addresses.
     8.5   */
     8.6  #include "xc_private.h"
     8.7 +#include <xen/hvm/save.h>
     8.8  
     8.9  #define CR0_PG  0x80000000
    8.10  #define CR4_PAE 0x20
    8.11  #define PTE_PSE 0x80
    8.12 +#define EFER_LMA 0x400
    8.13 +
    8.14  
    8.15  unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
    8.16                                             int vcpu, unsigned long long virt)
    8.17  {
    8.18      xc_dominfo_t dominfo;
    8.19 -    vcpu_guest_context_any_t ctx;
    8.20      uint64_t paddr, mask, pte = 0;
    8.21      int size, level, pt_levels = 2;
    8.22      void *map;
    8.23  
    8.24      if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1 
    8.25 -        || dominfo.domid != dom
    8.26 -        || xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
    8.27 +        || dominfo.domid != dom)
    8.28          return 0;
    8.29  
    8.30      /* What kind of paging are we dealing with? */
    8.31      if (dominfo.hvm) {
    8.32 -        unsigned long cr0, cr3, cr4;
    8.33 -        xen_capabilities_info_t xen_caps = "";
    8.34 -        if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
    8.35 +        struct hvm_hw_cpu ctx;
    8.36 +        if (xc_domain_hvm_getcontext_partial(xc_handle, dom,
    8.37 +                                             HVM_SAVE_CODE(CPU), vcpu,
    8.38 +                                             &ctx, sizeof ctx) != 0)
    8.39              return 0;
    8.40 -        /* HVM context records are always host-sized */
    8.41 -        if (strstr(xen_caps, "xen-3.0-x86_64")) {
    8.42 -            cr0 = ctx.x64.ctrlreg[0];
    8.43 -            cr3 = ctx.x64.ctrlreg[3];
    8.44 -            cr4 = ctx.x64.ctrlreg[4];
    8.45 -        } else {
    8.46 -            cr0 = ctx.x32.ctrlreg[0];
    8.47 -            cr3 = ctx.x32.ctrlreg[3];
    8.48 -            cr4 = ctx.x32.ctrlreg[4];
    8.49 -        }
    8.50 -        if (!(cr0 & CR0_PG))
    8.51 +        if (!(ctx.cr0 & CR0_PG))
    8.52              return virt;
    8.53 -        if (0 /* XXX how to get EFER.LMA? */) 
    8.54 -            pt_levels = 4;
    8.55 -        else
    8.56 -            pt_levels = (cr4 & CR4_PAE) ? 3 : 2;
    8.57 -        paddr = cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
    8.58 +        pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2;
    8.59 +        paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
    8.60      } else {
    8.61          DECLARE_DOMCTL;
    8.62 +        vcpu_guest_context_any_t ctx;
    8.63 +        if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
    8.64 +            return 0;
    8.65          domctl.domain = dom;
    8.66          domctl.cmd = XEN_DOMCTL_get_address_size;
    8.67          if ( do_domctl(xc_handle, &domctl) != 0 )
     9.1 --- a/tools/libxc/xenctrl.h	Fri Feb 13 10:56:01 2009 +0900
     9.2 +++ b/tools/libxc/xenctrl.h	Fri Feb 13 11:22:28 2009 +0900
     9.3 @@ -375,6 +375,25 @@ int xc_domain_hvm_getcontext(int xc_hand
     9.4                               uint8_t *ctxt_buf,
     9.5                               uint32_t size);
     9.6  
     9.7 +
     9.8 +/**
     9.9 + * This function returns one element of the context of a hvm domain
    9.10 + * @parm xc_handle a handle to an open hypervisor interface
    9.11 + * @parm domid the domain to get information from
    9.12 + * @parm typecode which type of elemnt required 
    9.13 + * @parm instance which instance of the type
    9.14 + * @parm ctxt_buf a pointer to a structure to store the execution context of
    9.15 + *            the hvm domain
    9.16 + * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode))
    9.17 + * @return 0 on success, -1 on failure
    9.18 + */
    9.19 +int xc_domain_hvm_getcontext_partial(int xc_handle,
    9.20 +                                     uint32_t domid,
    9.21 +                                     uint16_t typecode,
    9.22 +                                     uint16_t instance,
    9.23 +                                     void *ctxt_buf,
    9.24 +                                     uint32_t size);
    9.25 +
    9.26  /**
    9.27   * This function will set the context for hvm domain
    9.28   *
    9.29 @@ -1075,6 +1094,12 @@ int xc_domain_update_msi_irq(
    9.30      uint32_t pirq,
    9.31      uint32_t gflags);
    9.32  
    9.33 +int xc_domain_unbind_msi_irq(int xc_handle,
    9.34 +                             uint32_t domid,
    9.35 +                             uint32_t gvec,
    9.36 +                             uint32_t pirq,
    9.37 +                             uint32_t gflags);
    9.38 +
    9.39  int xc_domain_bind_pt_irq(int xc_handle,
    9.40                            uint32_t domid,
    9.41                            uint8_t machine_irq,
    10.1 --- a/tools/python/xen/xend/XendAPIStore.py	Fri Feb 13 10:56:01 2009 +0900
    10.2 +++ b/tools/python/xen/xend/XendAPIStore.py	Fri Feb 13 11:22:28 2009 +0900
    10.3 @@ -33,7 +33,8 @@ def register(uuid, type, inst):
    10.4  
    10.5  def deregister(uuid, type):
    10.6      old = get(uuid, type)
    10.7 -    del __classes[(uuid, type)]
    10.8 +    if old is not None:
    10.9 +        del __classes[(uuid, type)]
   10.10      return old
   10.11  
   10.12  def get(uuid, type):
    11.1 --- a/tools/python/xen/xend/image.py	Fri Feb 13 10:56:01 2009 +0900
    11.2 +++ b/tools/python/xen/xend/image.py	Fri Feb 13 11:22:28 2009 +0900
    11.3 @@ -372,8 +372,6 @@ class ImageHandler:
    11.4              env['DISPLAY'] = self.display
    11.5          if self.xauthority:
    11.6              env['XAUTHORITY'] = self.xauthority
    11.7 -        if self.vncconsole:
    11.8 -            args = args + ([ "-vncviewer" ])
    11.9          unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
   11.10          sentinel_path = sentinel_path_prefix + unique_id
   11.11          sentinel_path_fifo = sentinel_path + '.fifo'
   11.12 @@ -558,24 +556,30 @@ class ImageHandler:
   11.13                      os.kill(self.pid, signal.SIGHUP)
   11.14                  except OSError, exn:
   11.15                      log.exception(exn)
   11.16 -                try:
   11.17 -                    # Try to reap the child every 100ms for 10s. Then SIGKILL it.
   11.18 -                    for i in xrange(100):
   11.19 +                # Try to reap the child every 100ms for 10s. Then SIGKILL it.
   11.20 +                for i in xrange(100):
   11.21 +                    try:
   11.22                          (p, rv) = os.waitpid(self.pid, os.WNOHANG)
   11.23                          if p == self.pid:
   11.24                              break
   11.25 -                        time.sleep(0.1)
   11.26 -                    else:
   11.27 -                        log.warning("DeviceModel %d took more than 10s "
   11.28 -                                    "to terminate: sending SIGKILL" % self.pid)
   11.29 +                    except OSError:
   11.30 +                        # This is expected if Xend has been restarted within
   11.31 +                        # the life of this domain.  In this case, we can kill
   11.32 +                        # the process, but we can't wait for it because it's
   11.33 +                        # not our child. We continue this loop, and after it is
   11.34 +                        # terminated make really sure the process is going away
   11.35 +                        # (SIGKILL).
   11.36 +                        pass
   11.37 +                    time.sleep(0.1)
   11.38 +                else:
   11.39 +                    log.warning("DeviceModel %d took more than 10s "
   11.40 +                                "to terminate: sending SIGKILL" % self.pid)
   11.41 +                    try:
   11.42                          os.kill(self.pid, signal.SIGKILL)
   11.43                          os.waitpid(self.pid, 0)
   11.44 -                except OSError, exn:
   11.45 -                    # This is expected if Xend has been restarted within the
   11.46 -                    # life of this domain.  In this case, we can kill the process,
   11.47 -                    # but we can't wait for it because it's not our child.
   11.48 -                    # We just make really sure it's going away (SIGKILL) first.
   11.49 -                    os.kill(self.pid, signal.SIGKILL)
   11.50 +                    except OSError:
   11.51 +                        # This happens if the process doesn't exist.
   11.52 +                        pass
   11.53                  state = xstransact.Remove("/local/domain/0/device-model/%i"
   11.54                                            % self.vm.getDomid())
   11.55              finally:
    12.1 --- a/tools/python/xen/xend/server/pciquirk.py	Fri Feb 13 10:56:01 2009 +0900
    12.2 +++ b/tools/python/xen/xend/server/pciquirk.py	Fri Feb 13 11:22:28 2009 +0900
    12.3 @@ -123,7 +123,8 @@ class PCIQuirk:
    12.4              log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE)
    12.5              self.pci_perm_dev_config = ['xend-pci-perm-devs']
    12.6  
    12.7 -        devices = child_at(child(pci_perm_dev_config, 'unconstrained_dev_ids'),0)
    12.8 +        devices = child_at(child(self.pci_perm_dev_config,
    12.9 +                                 'unconstrained_dev_ids'),0)
   12.10          if self.__matchPCIdev( devices ):
   12.11              log.debug("Permissive mode enabled for PCI device [%s]" %
   12.12                        self.devid)
    13.1 --- a/tools/python/xen/xm/create.py	Fri Feb 13 10:56:01 2009 +0900
    13.2 +++ b/tools/python/xen/xm/create.py	Fri Feb 13 11:22:28 2009 +0900
    13.3 @@ -1337,7 +1337,7 @@ def main(argv):
    13.4      elif not opts.is_xml:
    13.5          dom = make_domain(opts, config)
    13.6          
    13.7 -    if opts.vals.vncviewer:
    13.8 +    if opts.vals.vncconsole:
    13.9          domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
   13.10          vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
   13.11          console.runVncViewer(domid, vncviewer_autopass, True)
    14.1 --- a/tools/python/xen/xm/main.py	Fri Feb 13 10:56:01 2009 +0900
    14.2 +++ b/tools/python/xen/xm/main.py	Fri Feb 13 11:22:28 2009 +0900
    14.3 @@ -59,7 +59,11 @@ from xen.util.acmpolicy import ACM_LABEL
    14.4  import XenAPI
    14.5  
    14.6  import xen.lowlevel.xc
    14.7 -xc = xen.lowlevel.xc.xc()
    14.8 +try:
    14.9 +    xc = xen.lowlevel.xc.xc()
   14.10 +except Exception, ex:
   14.11 +    print >>sys.stderr, ("Is xen kernel running?")
   14.12 +    sys.exit(1)
   14.13  
   14.14  import inspect
   14.15  from xen.xend import XendOptions
   14.16 @@ -735,7 +739,7 @@ def xm_save(args):
   14.17          (options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
   14.18      except getopt.GetoptError, opterr:
   14.19          err(opterr)
   14.20 -        sys.exit(1)
   14.21 +        usage('save')
   14.22  
   14.23      checkpoint = False
   14.24      for (k, v) in options:
    15.1 --- a/tools/xentrace/xenctx.c	Fri Feb 13 10:56:01 2009 +0900
    15.2 +++ b/tools/xentrace/xenctx.c	Fri Feb 13 11:22:28 2009 +0900
    15.3 @@ -26,6 +26,7 @@
    15.4  #include "xenctrl.h"
    15.5  #include <xen/foreign/x86_32.h>
    15.6  #include <xen/foreign/x86_64.h>
    15.7 +#include <xen/hvm/save.h>
    15.8  
    15.9  int xc_handle = 0;
   15.10  int domid = 0;
   15.11 @@ -287,6 +288,35 @@ static void print_ctx_32(vcpu_guest_cont
   15.12      }
   15.13  }
   15.14  
   15.15 +static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx)
   15.16 +{
   15.17 +    struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
   15.18 +
   15.19 +    printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip);
   15.20 +    print_symbol((uint32_t)regs->eip);
   15.21 +    print_flags((uint32_t)regs->eflags);
   15.22 +    printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp);
   15.23 +
   15.24 +    printf("eax: %08x\t", (uint32_t)regs->eax);
   15.25 +    printf("ebx: %08x\t", (uint32_t)regs->ebx);
   15.26 +    printf("ecx: %08x\t", (uint32_t)regs->ecx);
   15.27 +    printf("edx: %08x\n", (uint32_t)regs->edx);
   15.28 +
   15.29 +    printf("esi: %08x\t", (uint32_t)regs->esi);
   15.30 +    printf("edi: %08x\t", (uint32_t)regs->edi);
   15.31 +    printf("ebp: %08x\n", (uint32_t)regs->ebp);
   15.32 +
   15.33 +    printf(" ds:     %04x\t", regs->ds);
   15.34 +    printf(" es:     %04x\t", regs->es);
   15.35 +    printf(" fs:     %04x\t", regs->fs);
   15.36 +    printf(" gs:     %04x\n", regs->gs);
   15.37 +
   15.38 +    if (disp_all) {
   15.39 +        print_special(ctx->ctrlreg, "cr", 0x1d, 4);
   15.40 +        print_special(ctx->debugreg, "dr", 0xcf, 4);
   15.41 +    }
   15.42 +}
   15.43 +
   15.44  static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx)
   15.45  {
   15.46      struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
   15.47 @@ -335,6 +365,8 @@ static void print_ctx(vcpu_guest_context
   15.48  {
   15.49      if (ctxt_word_size == 4) 
   15.50          print_ctx_32(&ctx->x32);
   15.51 +    else if (guest_word_size == 4)
   15.52 +        print_ctx_32on64(&ctx->x64);
   15.53      else 
   15.54          print_ctx_64(&ctx->x64);
   15.55  }
   15.56 @@ -788,23 +820,29 @@ static void dump_ctx(int vcpu)
   15.57  
   15.58  #if defined(__i386__) || defined(__x86_64__)
   15.59      {
   15.60 -        struct xen_domctl domctl;
   15.61 -        memset(&domctl, 0, sizeof domctl);
   15.62 -        domctl.domain = domid;
   15.63 -        domctl.cmd = XEN_DOMCTL_get_address_size;
   15.64 -        if (xc_domctl(xc_handle, &domctl) == 0)
   15.65 -            ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
   15.66          if (dominfo.hvm) {
   15.67 +            struct hvm_hw_cpu cpuctx;
   15.68              xen_capabilities_info_t xen_caps = "";
   15.69 +            if (xc_domain_hvm_getcontext_partial(
   15.70 +                    xc_handle, domid, HVM_SAVE_CODE(CPU), 
   15.71 +                    vcpu, &cpuctx, sizeof cpuctx) != 0) {
   15.72 +                perror("xc_domain_hvm_getcontext_partial");
   15.73 +                exit(-1);
   15.74 +            }
   15.75 +            guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4;
   15.76 +            /* HVM guest context records are always host-sized */
   15.77              if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) {
   15.78                  perror("xc_version");
   15.79                  exit(-1);
   15.80              }
   15.81 -            /* HVM guest context records are always host-sized */
   15.82              ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4;
   15.83 -            /* XXX For now we can't tell whether a HVM guest is in long
   15.84 -             * XXX mode; eventually fix this here and in xc_pagetab.c */
   15.85 -            guest_word_size = 4;
   15.86 +        } else {
   15.87 +            struct xen_domctl domctl;
   15.88 +            memset(&domctl, 0, sizeof domctl);
   15.89 +            domctl.domain = domid;
   15.90 +            domctl.cmd = XEN_DOMCTL_get_address_size;
   15.91 +            if (xc_domctl(xc_handle, &domctl) == 0)
   15.92 +                ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
   15.93          }
   15.94      }
   15.95  #endif
    16.1 --- a/xen/arch/ia64/linux-xen/iosapic.c	Fri Feb 13 10:56:01 2009 +0900
    16.2 +++ b/xen/arch/ia64/linux-xen/iosapic.c	Fri Feb 13 11:22:28 2009 +0900
    16.3 @@ -93,6 +93,16 @@
    16.4  #include <asm/ptrace.h>
    16.5  #include <asm/system.h>
    16.6  
    16.7 +#ifdef XEN
    16.8 +static inline int iosapic_irq_to_vector (int irq)
    16.9 +{
   16.10 +	return irq;
   16.11 +}
   16.12 +
   16.13 +#undef irq_to_vector
   16.14 +#define irq_to_vector(irq)      iosapic_irq_to_vector(irq)
   16.15 +#define AUTO_ASSIGN	AUTO_ASSIGN_IRQ
   16.16 +#endif
   16.17  
   16.18  #undef DEBUG_INTERRUPT_ROUTING
   16.19  
    17.1 --- a/xen/arch/ia64/linux-xen/irq_ia64.c	Fri Feb 13 10:56:01 2009 +0900
    17.2 +++ b/xen/arch/ia64/linux-xen/irq_ia64.c	Fri Feb 13 11:22:28 2009 +0900
    17.3 @@ -250,6 +250,7 @@ void
    17.4  register_percpu_irq (ia64_vector vec, struct irqaction *action)
    17.5  {
    17.6  	irq_desc_t *desc;
    17.7 +#ifndef XEN
    17.8  	unsigned int irq;
    17.9  
   17.10  	for (irq = 0; irq < NR_IRQS; ++irq)
   17.11 @@ -258,16 +259,19 @@ register_percpu_irq (ia64_vector vec, st
   17.12  			desc->status |= IRQ_PER_CPU;
   17.13  			desc->handler = &irq_type_ia64_lsapic;
   17.14  			if (action)
   17.15 -#ifdef XEN
   17.16 -				setup_vector(irq, action);
   17.17 +				setup_irq(irq, action);
   17.18 +		}
   17.19  #else
   17.20 -				setup_irq(irq, action);
   17.21 +	desc = irq_descp(vec);
   17.22 +	desc->status |= IRQ_PER_CPU;
   17.23 +	desc->handler = &irq_type_ia64_lsapic;
   17.24 +	if (action)
   17.25 +		setup_vector(vec, action);
   17.26  #endif
   17.27 -		}
   17.28  }
   17.29  
   17.30  #ifdef XEN
   17.31 -int request_irq(unsigned int irq,
   17.32 +int request_irq_vector(unsigned int vector,
   17.33  		void (*handler)(int, void *, struct cpu_user_regs *),
   17.34  		unsigned long irqflags, const char * devname, void *dev_id)
   17.35  {
   17.36 @@ -279,7 +283,7 @@ int request_irq(unsigned int irq,
   17.37  	 * otherwise we'll have trouble later trying to figure out
   17.38  	 * which interrupt is which (messes up the interrupt freeing logic etc).
   17.39  	 *                          */
   17.40 -	if (irq >= NR_IRQS)
   17.41 +	if (vector >= NR_VECTORS)
   17.42  		return -EINVAL;
   17.43  	if (!handler)
   17.44  		return -EINVAL;
   17.45 @@ -291,7 +295,7 @@ int request_irq(unsigned int irq,
   17.46  	action->handler = handler;
   17.47  	action->name = devname;
   17.48  	action->dev_id = dev_id;
   17.49 -	setup_vector(irq, action);
   17.50 +	setup_vector(vector, action);
   17.51  	if (retval)
   17.52  		xfree(action);
   17.53  
    18.1 --- a/xen/arch/ia64/linux-xen/mca.c	Fri Feb 13 10:56:01 2009 +0900
    18.2 +++ b/xen/arch/ia64/linux-xen/mca.c	Fri Feb 13 11:22:28 2009 +0900
    18.3 @@ -114,7 +114,6 @@ extern void			ia64_monarch_init_handler 
    18.4  extern void			ia64_slave_init_handler (void);
    18.5  #ifdef XEN
    18.6  extern void setup_vector (unsigned int vec, struct irqaction *action);
    18.7 -#define setup_irq(irq, action)	setup_vector(irq, action)
    18.8  #endif
    18.9  
   18.10  static ia64_mc_info_t		ia64_mc_info;
   18.11 @@ -1931,12 +1930,18 @@ ia64_mca_late_init(void)
   18.12  		if (cpe_vector >= 0) {
   18.13  			/* If platform supports CPEI, enable the irq. */
   18.14  			cpe_poll_enabled = 0;
   18.15 +#ifndef XEN
   18.16  			for (irq = 0; irq < NR_IRQS; ++irq)
   18.17  				if (irq_to_vector(irq) == cpe_vector) {
   18.18  					desc = irq_descp(irq);
   18.19  					desc->status |= IRQ_PER_CPU;
   18.20 -					setup_irq(irq, &mca_cpe_irqaction);
   18.21 +					setup_vector(irq, &mca_cpe_irqaction);
   18.22  				}
   18.23 +#else
   18.24 +			desc = irq_descp(cpe_vector);
   18.25 +			desc->status |= IRQ_PER_CPU;
   18.26 +			setup_vector(cpe_vector, &mca_cpe_irqaction);
   18.27 +#endif
   18.28  			ia64_mca_register_cpev(cpe_vector);
   18.29  			IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
   18.30  		} else {
    19.1 --- a/xen/arch/ia64/xen/hypercall.c	Fri Feb 13 10:56:01 2009 +0900
    19.2 +++ b/xen/arch/ia64/xen/hypercall.c	Fri Feb 13 11:22:28 2009 +0900
    19.3 @@ -543,7 +543,7 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
    19.4              break;
    19.5          irq_status_query.flags = 0;
    19.6          /* Edge-triggered interrupts don't need an explicit unmask downcall. */
    19.7 -        if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
    19.8 +        if ( !strstr(irq_descp(irq)->handler->typename, "edge") )
    19.9              irq_status_query.flags |= XENIRQSTAT_needs_eoi;
   19.10          ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
   19.11          break;
    20.1 --- a/xen/arch/ia64/xen/irq.c	Fri Feb 13 10:56:01 2009 +0900
    20.2 +++ b/xen/arch/ia64/xen/irq.c	Fri Feb 13 11:22:28 2009 +0900
    20.3 @@ -228,11 +228,11 @@ out:
    20.4   * disabled.
    20.5   */
    20.6  
    20.7 -int setup_vector(unsigned int irq, struct irqaction * new)
    20.8 +int setup_vector(unsigned int vector, struct irqaction * new)
    20.9  {
   20.10  	unsigned long flags;
   20.11  	struct irqaction *old, **p;
   20.12 -	irq_desc_t *desc = irq_descp(irq);
   20.13 +	irq_desc_t *desc = irq_descp(vector);
   20.14  
   20.15  	/*
   20.16  	 * The following block of code has to be executed atomically
   20.17 @@ -248,8 +248,8 @@ int setup_vector(unsigned int irq, struc
   20.18  
   20.19  	desc->depth = 0;
   20.20  	desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST);
   20.21 -	desc->handler->startup(irq);
   20.22 -	desc->handler->enable(irq);
   20.23 +	desc->handler->startup(vector);
   20.24 +	desc->handler->enable(vector);
   20.25  	spin_unlock_irqrestore(&desc->lock,flags);
   20.26  
   20.27  	return 0;
   20.28 @@ -258,13 +258,11 @@ int setup_vector(unsigned int irq, struc
   20.29  /* Vectors reserved by xen (and thus not sharable with domains).  */
   20.30  unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)];
   20.31  
   20.32 -int setup_irq(unsigned int irq, struct irqaction * new)
   20.33 +int setup_irq_vector(unsigned int vec, struct irqaction * new)
   20.34  {
   20.35 -	unsigned int vec;
   20.36  	int res;
   20.37  
   20.38 -	/* Get vector for IRQ.  */
   20.39 -	if (acpi_gsi_to_irq (irq, &vec) < 0)
   20.40 +	if ( vec == IA64_INVALID_VECTOR )
   20.41  		return -ENOSYS;
   20.42  	/* Reserve the vector (and thus the irq).  */
   20.43  	if (test_and_set_bit(vec, ia64_xen_vector))
   20.44 @@ -273,14 +271,12 @@ int setup_irq(unsigned int irq, struct i
   20.45  	return res;
   20.46  }
   20.47  
   20.48 -void free_irq(unsigned int irq)
   20.49 +void release_irq_vector(unsigned int vec)
   20.50  {
   20.51 -	unsigned int vec;
   20.52  	unsigned long flags;
   20.53  	irq_desc_t *desc;
   20.54  
   20.55 -	/* Get vector for IRQ.  */
   20.56 -	if (acpi_gsi_to_irq(irq, &vec) < 0)
   20.57 +	if ( vec == IA64_INVALID_VECTOR )
   20.58  		return;
   20.59  
   20.60  	desc = irq_descp(vec);
    21.1 --- a/xen/arch/x86/Makefile	Fri Feb 13 10:56:01 2009 +0900
    21.2 +++ b/xen/arch/x86/Makefile	Fri Feb 13 11:22:28 2009 +0900
    21.3 @@ -92,3 +92,4 @@ boot/mkelf32: boot/mkelf32.c
    21.4  clean::
    21.5  	rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
    21.6  	rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
    21.7 +	rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
    22.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Feb 13 10:56:01 2009 +0900
    22.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri Feb 13 11:22:28 2009 +0900
    22.3 @@ -58,6 +58,9 @@ static struct acpi_cpufreq_data *drv_dat
    22.4  
    22.5  static struct cpufreq_driver acpi_cpufreq_driver;
    22.6  
    22.7 +static unsigned int __read_mostly acpi_pstate_strict;
    22.8 +integer_param("acpi_pstate_strict", acpi_pstate_strict);
    22.9 +
   22.10  static int check_est_cpu(unsigned int cpuid)
   22.11  {
   22.12      struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
   22.13 @@ -180,7 +183,7 @@ static void drv_read(struct drv_cmd *cmd
   22.14      ASSERT(cpus_weight(cmd->mask) == 1);
   22.15  
   22.16      /* to reduce IPI for the sake of performance */
   22.17 -    if (cpu_isset(smp_processor_id(), cmd->mask))
   22.18 +    if (likely(cpu_isset(smp_processor_id(), cmd->mask)))
   22.19          do_drv_read((void *)cmd);
   22.20      else
   22.21          on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
   22.22 @@ -196,15 +199,16 @@ static u32 get_cur_val(cpumask_t mask)
   22.23      struct cpufreq_policy *policy;
   22.24      struct processor_performance *perf;
   22.25      struct drv_cmd cmd;
   22.26 -    unsigned int cpu;
   22.27 +    unsigned int cpu = smp_processor_id();
   22.28  
   22.29      if (unlikely(cpus_empty(mask)))
   22.30          return 0;
   22.31  
   22.32 -    cpu = first_cpu(mask);
   22.33 +    if (!cpu_isset(cpu, mask))
   22.34 +        cpu = first_cpu(mask);
   22.35      policy = cpufreq_cpu_policy[cpu];
   22.36  
   22.37 -    if (!policy)
   22.38 +    if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu])
   22.39          return 0;    
   22.40  
   22.41      switch (drv_data[policy->cpu]->cpu_feature) {
   22.42 @@ -214,7 +218,7 @@ static u32 get_cur_val(cpumask_t mask)
   22.43          break;
   22.44      case SYSTEM_IO_CAPABLE:
   22.45          cmd.type = SYSTEM_IO_CAPABLE;
   22.46 -        perf = drv_data[first_cpu(mask)]->acpi_data;
   22.47 +        perf = drv_data[policy->cpu]->acpi_data;
   22.48          cmd.addr.io.port = perf->control_register.address;
   22.49          cmd.addr.io.bit_width = perf->control_register.bit_width;
   22.50          break;
   22.51 @@ -393,7 +397,7 @@ static int acpi_cpufreq_target(struct cp
   22.52  
   22.53      drv_write(&cmd);
   22.54  
   22.55 -    if (!check_freqs(cmd.mask, freqs.new, data)) {
   22.56 +    if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
   22.57          printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
   22.58          return -EAGAIN;
   22.59      }
    23.1 --- a/xen/arch/x86/acpi/suspend.c	Fri Feb 13 10:56:01 2009 +0900
    23.2 +++ b/xen/arch/x86/acpi/suspend.c	Fri Feb 13 11:22:28 2009 +0900
    23.3 @@ -65,6 +65,9 @@ void restore_rest_processor_state(void)
    23.4      /* Reload FPU state on next FPU use. */
    23.5      stts();
    23.6  
    23.7 +    if (cpu_has_pat)
    23.8 +        wrmsrl(MSR_IA32_CR_PAT, host_pat);
    23.9 +
   23.10      mtrr_ap_init();
   23.11      mcheck_init(&boot_cpu_data);
   23.12  }
    24.1 --- a/xen/arch/x86/boot/Makefile	Fri Feb 13 10:56:01 2009 +0900
    24.2 +++ b/xen/arch/x86/boot/Makefile	Fri Feb 13 11:22:28 2009 +0900
    24.3 @@ -1,1 +1,7 @@
    24.4  obj-y += head.o
    24.5 +
    24.6 +head.o: reloc.S
    24.7 +
    24.8 +# NB. BOOT_TRAMPOLINE == 0x8c000
    24.9 +%.S: %.c
   24.10 +	RELOC=0x8c000 $(MAKE) -f build32.mk $@
    25.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    25.2 +++ b/xen/arch/x86/boot/build32.mk	Fri Feb 13 11:22:28 2009 +0900
    25.3 @@ -0,0 +1,24 @@
    25.4 +XEN_ROOT=../../../..
    25.5 +override XEN_TARGET_ARCH=x86_32
    25.6 +CFLAGS =
    25.7 +include $(XEN_ROOT)/Config.mk
    25.8 +
    25.9 +# Disable PIE/SSP if GCC supports them. They can break us.
   25.10 +$(call cc-option-add,CFLAGS,CC,-nopie)
   25.11 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
   25.12 +$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
   25.13 +
   25.14 +CFLAGS += -Werror -fno-builtin -msoft-float
   25.15 +
   25.16 +%.S: %.bin
   25.17 +	(od -v -t x $< | head -n -1 | \
   25.18 +	sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@
   25.19 +
   25.20 +%.bin: %.lnk
   25.21 +	$(OBJCOPY) -O binary $< $@
   25.22 +
   25.23 +%.lnk: %.o
   25.24 +	$(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $<
   25.25 +
   25.26 +%.o: %.c
   25.27 +	$(CC) $(CFLAGS) -c $< -o $@
    26.1 --- a/xen/arch/x86/boot/head.S	Fri Feb 13 10:56:01 2009 +0900
    26.2 +++ b/xen/arch/x86/boot/head.S	Fri Feb 13 11:22:28 2009 +0900
    26.3 @@ -79,8 +79,11 @@ gdt_boot_descr:
    26.4          cmp     $0x2BADB002,%eax
    26.5          jne     not_multiboot
    26.6  
    26.7 -        /* Save the Multiboot info structure for later use. */
    26.8 -        mov     %ebx,sym_phys(multiboot_ptr)
    26.9 +        /* Save the Multiboot info struct (after relocation) for later use. */
   26.10 +        mov     $sym_phys(cpu0_stack)+1024,%esp
   26.11 +        push    %ebx
   26.12 +        call    reloc
   26.13 +        mov     %eax,sym_phys(multiboot_ptr)
   26.14  
   26.15          /* Initialize BSS (no nasty surprises!) */
   26.16          mov     $sym_phys(__bss_start),%edi
   26.17 @@ -192,6 +195,9 @@ 2:      cmp     $L1_PAGETABLE_ENTRIES,%e
   26.18  
   26.19  #include "cmdline.S"
   26.20  
   26.21 +reloc:
   26.22 +#include "reloc.S"
   26.23 +
   26.24          .align 16
   26.25          .globl trampoline_start, trampoline_end
   26.26  trampoline_start:
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/arch/x86/boot/reloc.c	Fri Feb 13 11:22:28 2009 +0900
    27.3 @@ -0,0 +1,89 @@
    27.4 +/******************************************************************************
    27.5 + * reloc.c
    27.6 + * 
    27.7 + * 32-bit flat memory-map routines for relocating Multiboot structures
    27.8 + * and modules. This is most easily done early with paging disabled.
    27.9 + * 
   27.10 + * Copyright (c) 2009, Citrix Systems, Inc.
   27.11 + * 
   27.12 + * Authors:
   27.13 + *    Keir Fraser <keir.fraser@citrix.com>
   27.14 + */
   27.15 +
   27.16 +asm (
   27.17 +    "    .text                         \n"
   27.18 +    "    .globl _start                 \n"
   27.19 +    "_start:                           \n"
   27.20 +    "    mov  $_start,%edi             \n"
   27.21 +    "    call 1f                       \n"
   27.22 +    "1:  pop  %esi                     \n"
   27.23 +    "    sub  $1b-_start,%esi          \n"
   27.24 +    "    mov  $__bss_start-_start,%ecx \n"
   27.25 +    "    rep  movsb                    \n"
   27.26 +    "    xor  %eax,%eax                \n"
   27.27 +    "    mov  $_end,%ecx               \n"
   27.28 +    "    sub  %edi,%ecx                \n"
   27.29 +    "    rep  stosb                    \n"
   27.30 +    "    mov  $reloc,%eax              \n"
   27.31 +    "    jmp  *%eax                    \n"
   27.32 +    );
   27.33 +
   27.34 +typedef unsigned int u32;
   27.35 +#include "../../../include/xen/multiboot.h"
   27.36 +
   27.37 +extern char _start[];
   27.38 +
   27.39 +static void *memcpy(void *dest, const void *src, unsigned int n)
   27.40 +{
   27.41 +    char *s = (char *)src, *d = dest;
   27.42 +    while ( n-- )
   27.43 +        *d++ = *s++;
   27.44 +    return dest;
   27.45 +}
   27.46 +
   27.47 +static void *reloc_mbi_struct(void *old, unsigned int bytes)
   27.48 +{
   27.49 +    static void *alloc = &_start;
   27.50 +    alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul);
   27.51 +    return memcpy(alloc, old, bytes);
   27.52 +}
   27.53 +
   27.54 +static char *reloc_mbi_string(char *old)
   27.55 +{
   27.56 +    char *p;
   27.57 +    for ( p = old; *p != '\0'; p++ )
   27.58 +        continue;
   27.59 +    return reloc_mbi_struct(old, p - old + 1);
   27.60 +}
   27.61 +
   27.62 +multiboot_info_t *reloc(multiboot_info_t *mbi_old)
   27.63 +{
   27.64 +    multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi));
   27.65 +    int i;
   27.66 +
   27.67 +    if ( mbi->flags & MBI_CMDLINE )
   27.68 +        mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline);
   27.69 +
   27.70 +    if ( mbi->flags & MBI_MODULES )
   27.71 +    {
   27.72 +        module_t *mods = reloc_mbi_struct(
   27.73 +            (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
   27.74 +        mbi->mods_addr = (u32)mods;
   27.75 +        for ( i = 0; i < mbi->mods_count; i++ )
   27.76 +            if ( mods[i].string )
   27.77 +                mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
   27.78 +    }
   27.79 +
   27.80 +    if ( mbi->flags & MBI_MEMMAP )
   27.81 +        mbi->mmap_addr = (u32)reloc_mbi_struct(
   27.82 +            (memory_map_t *)mbi->mmap_addr, mbi->mmap_length);
   27.83 +
   27.84 +    /* Mask features we don't understand or don't relocate. */
   27.85 +    mbi->flags &= (MBI_MEMLIMITS |
   27.86 +                   MBI_DRIVES |
   27.87 +                   MBI_CMDLINE |
   27.88 +                   MBI_MODULES |
   27.89 +                   MBI_MEMMAP);
   27.90 +
   27.91 +    return mbi;
   27.92 +}
    28.1 --- a/xen/arch/x86/cpu/mcheck/amd_k8.c	Fri Feb 13 10:56:01 2009 +0900
    28.2 +++ b/xen/arch/x86/cpu/mcheck/amd_k8.c	Fri Feb 13 11:22:28 2009 +0900
    28.3 @@ -99,6 +99,8 @@ void k8_machine_check(struct cpu_user_re
    28.4  
    28.5  	mc_data = x86_mcinfo_getptr();
    28.6  	cpu_nr = smp_processor_id();
    28.7 +	BUG_ON(cpu_nr != vcpu->processor);
    28.8 +
    28.9  	curdom = vcpu->domain;
   28.10  
   28.11  	memset(&mc_global, 0, sizeof(mc_global));
   28.12 @@ -106,14 +108,12 @@ void k8_machine_check(struct cpu_user_re
   28.13  	mc_global.common.size = sizeof(mc_global);
   28.14  
   28.15  	mc_global.mc_domid = curdom->domain_id; /* impacted domain */
   28.16 -	mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
   28.17 -	BUG_ON(cpu_nr != vcpu->processor);
   28.18 -	mc_global.mc_core_threadid = 0;
   28.19 +
   28.20 +	x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
   28.21 +	    &mc_global.mc_coreid, &mc_global.mc_core_threadid,
   28.22 +	    &mc_global.mc_apicid, NULL, NULL, NULL);
   28.23 +
   28.24  	mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
   28.25 -#if 0 /* TODO: on which socket is this physical core?
   28.26 -         It's not clear to me how to figure this out. */
   28.27 -	mc_global.mc_socketid = ???;
   28.28 -#endif
   28.29  	mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
   28.30  	rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
   28.31  
    29.1 --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c	Fri Feb 13 10:56:01 2009 +0900
    29.2 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c	Fri Feb 13 11:22:28 2009 +0900
    29.3 @@ -95,6 +95,7 @@ void mce_amd_checkregs(void *info)
    29.4  	mc_data = NULL;
    29.5  
    29.6  	cpu_nr = smp_processor_id();
    29.7 +	BUG_ON(cpu_nr != vcpu->processor);
    29.8  	event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
    29.9  	error_found = 0;
   29.10  
   29.11 @@ -103,14 +104,12 @@ void mce_amd_checkregs(void *info)
   29.12  	mc_global.common.size = sizeof(mc_global);
   29.13  
   29.14  	mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
   29.15 -	mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
   29.16 -	BUG_ON(cpu_nr != vcpu->processor);
   29.17 -	mc_global.mc_core_threadid = 0;
   29.18  	mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
   29.19 -#if 0 /* TODO: on which socket is this physical core?
   29.20 -         It's not clear to me how to figure this out. */
   29.21 -	mc_global.mc_socketid = ???;
   29.22 -#endif
   29.23 +
   29.24 +	x86_mc_get_cpu_info(cpu_nr, &mc_global.mc_socketid,
   29.25 +	    &mc_global.mc_coreid, &mc_global.mc_core_threadid,
   29.26 +	    &mc_global.mc_apicid, NULL, NULL, NULL);
   29.27 +
   29.28  	mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
   29.29  	rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
   29.30  
    30.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Fri Feb 13 10:56:01 2009 +0900
    30.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Fri Feb 13 11:22:28 2009 +0900
    30.3 @@ -443,6 +443,96 @@ next:
    30.4  
    30.5  
    30.6  
    30.7 +static void do_mc_get_cpu_info(void *v)
    30.8 +{
    30.9 +	int cpu = smp_processor_id();
   30.10 +	int cindex, cpn;
   30.11 +	struct cpuinfo_x86 *c;
   30.12 +	xen_mc_logical_cpu_t *log_cpus, *xcp;
   30.13 +	uint32_t junk, ebx;
   30.14 +
   30.15 +	log_cpus = v;
   30.16 +	c = &cpu_data[cpu];
   30.17 +	cindex = 0;
   30.18 +	cpn = cpu - 1;
   30.19 +
   30.20 +	/*
   30.21 +	 * Deal with sparse masks, condensed into a contig array.
   30.22 +	 */
   30.23 +	while (cpn >= 0) {
   30.24 +		if (cpu_isset(cpn, cpu_online_map))
   30.25 +			cindex++;
   30.26 +		cpn--;
   30.27 +	}
   30.28 +
   30.29 +	xcp = &log_cpus[cindex];
   30.30 +	c = &cpu_data[cpu];
   30.31 +	xcp->mc_cpunr = cpu;
   30.32 +	x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
   30.33 +	    &xcp->mc_coreid, &xcp->mc_threadid,
   30.34 +	    &xcp->mc_apicid, &xcp->mc_ncores,
   30.35 +	    &xcp->mc_ncores_active, &xcp->mc_nthreads);
   30.36 +	xcp->mc_cpuid_level = c->cpuid_level;
   30.37 +	xcp->mc_family = c->x86;
   30.38 +	xcp->mc_vendor = c->x86_vendor;
   30.39 +	xcp->mc_model = c->x86_model;
   30.40 +	xcp->mc_step = c->x86_mask;
   30.41 +	xcp->mc_cache_size = c->x86_cache_size;
   30.42 +	xcp->mc_cache_alignment = c->x86_cache_alignment;
   30.43 +	memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
   30.44 +	memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
   30.45 +	memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
   30.46 +
   30.47 +	/*
   30.48 +	 * This part needs to run on the CPU itself.
   30.49 +	 */
   30.50 +	xcp->mc_nmsrvals = __MC_NMSRS;
   30.51 +	xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
   30.52 +	rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
   30.53 +
   30.54 +	if (c->cpuid_level >= 1) {
   30.55 +		cpuid(1, &junk, &ebx, &junk, &junk);
   30.56 +		xcp->mc_clusterid = (ebx >> 24) & 0xff;
   30.57 +	} else
   30.58 +		xcp->mc_clusterid = hard_smp_processor_id();
   30.59 +}
   30.60 +
   30.61 +
   30.62 +void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
   30.63 +			 uint16_t *threadid, uint32_t *apicid,
   30.64 +			 unsigned *ncores, unsigned *ncores_active,
   30.65 +			 unsigned *nthreads)
   30.66 +{
   30.67 +	struct cpuinfo_x86 *c;
   30.68 +
   30.69 +	*apicid = cpu_physical_id(cpu);
   30.70 +	c = &cpu_data[cpu];
   30.71 +	if (c->apicid == BAD_APICID) {
   30.72 +		*chipid = cpu;
   30.73 +		*coreid = 0;
   30.74 +		*threadid = 0;
   30.75 +		if (ncores != NULL)
   30.76 +			*ncores = 1;
   30.77 +		if (ncores_active != NULL)
   30.78 +			*ncores_active = 1;
   30.79 +		if (nthreads != NULL)
   30.80 +			*nthreads = 1;
   30.81 +	} else {
   30.82 +		*chipid = phys_proc_id[cpu];
   30.83 +		if (c->x86_max_cores > 1)
   30.84 +			*coreid = cpu_core_id[cpu];
   30.85 +		else
   30.86 +			*coreid = 0;
   30.87 +		*threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
   30.88 +		if (ncores != NULL)
   30.89 +			*ncores = c->x86_max_cores;
   30.90 +		if (ncores_active != NULL)
   30.91 +			*ncores_active = c->booted_cores;
   30.92 +		if (nthreads != NULL)
   30.93 +			*nthreads = c->x86_num_siblings;
   30.94 +	}
   30.95 +}
   30.96 +
   30.97  /* Machine Check Architecture Hypercall */
   30.98  long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
   30.99  {
  30.100 @@ -452,6 +542,7 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
  30.101  	struct domain *domU;
  30.102  	struct xen_mc_fetch *mc_fetch;
  30.103  	struct xen_mc_notifydomain *mc_notifydomain;
  30.104 +	struct xen_mc_physcpuinfo *mc_physcpuinfo;
  30.105  	struct mc_info *mi;
  30.106  	uint32_t flags;
  30.107  	uint32_t fetch_idx;
  30.108 @@ -460,6 +551,8 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
  30.109  	 * a DomU to fetch mc data while Dom0 notifies another DomU. */
  30.110  	static DEFINE_SPINLOCK(mc_lock);
  30.111  	static DEFINE_SPINLOCK(mc_notify_lock);
  30.112 +	int nlcpu;
  30.113 +	xen_mc_logical_cpu_t *log_cpus = NULL;
  30.114  
  30.115  	if ( copy_from_guest(op, u_xen_mc, 1) )
  30.116  		return -EFAULT;
  30.117 @@ -580,6 +673,43 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
  30.118  
  30.119  		spin_unlock(&mc_notify_lock);
  30.120  		break;
  30.121 +
  30.122 +       case XEN_MC_physcpuinfo:
  30.123 +	       if ( !IS_PRIV(v->domain) )
  30.124 +		       return -EPERM;
  30.125 + 
  30.126 +	       mc_physcpuinfo = &op->u.mc_physcpuinfo;
  30.127 +	       nlcpu = num_online_cpus();
  30.128 + 
  30.129 +	       if (!guest_handle_is_null(mc_physcpuinfo->info)) {
  30.130 +		       if (mc_physcpuinfo->ncpus <= 0)
  30.131 +			       return -EINVAL;
  30.132 +		       nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus);
  30.133 +		       log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
  30.134 +		       if (log_cpus == NULL)
  30.135 +			       return -ENOMEM;
  30.136 + 
  30.137 +		       if (on_each_cpu(do_mc_get_cpu_info, log_cpus,
  30.138 +			   1, 1) != 0) {
  30.139 +			       xfree(log_cpus);
  30.140 +			       return -EIO;
  30.141 +		       }
  30.142 +	       }
  30.143 + 
  30.144 +	       mc_physcpuinfo->ncpus = nlcpu;
  30.145 + 
  30.146 +	       if (copy_to_guest(u_xen_mc, op, 1)) {
  30.147 +		       if (log_cpus != NULL)
  30.148 +			       xfree(log_cpus);
  30.149 +		       return -EFAULT;
  30.150 +	       }
  30.151 + 
  30.152 +	       if (!guest_handle_is_null(mc_physcpuinfo->info)) {
  30.153 +		       if (copy_to_guest(mc_physcpuinfo->info,
  30.154 +			   log_cpus, nlcpu))
  30.155 +			       ret = -EFAULT;
  30.156 +		       xfree(log_cpus);
  30.157 +	       }
  30.158  	}
  30.159  
  30.160  	return ret;
    31.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Fri Feb 13 10:56:01 2009 +0900
    31.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Fri Feb 13 11:22:28 2009 +0900
    31.3 @@ -34,4 +34,5 @@ void x86_mcinfo_clear(struct mc_info *mi
    31.4  int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
    31.5  void x86_mcinfo_dump(struct mc_info *mi);
    31.6  void mc_panic(char *s);
    31.7 -
    31.8 +void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
    31.9 +			 uint32_t *, uint32_t *, uint32_t *, uint32_t *);
    32.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 13 10:56:01 2009 +0900
    32.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Fri Feb 13 11:22:28 2009 +0900
    32.3 @@ -182,11 +182,9 @@ static struct mc_info *machine_check_pol
    32.4          mcg.mc_flags = MC_FLAG_POLLED;
    32.5      else if (calltype == MC_FLAG_CMCI)
    32.6          mcg.mc_flags = MC_FLAG_CMCI;
    32.7 -    mcg.mc_socketid = phys_proc_id[cpu];
    32.8 -    mcg.mc_coreid = cpu_core_id[cpu];
    32.9 -    mcg.mc_apicid = cpu_physical_id(cpu);
   32.10 -    mcg.mc_core_threadid =
   32.11 -        mcg.mc_apicid & ( 1 << (cpu_data[cpu].x86_num_siblings - 1)); 
   32.12 +    x86_mc_get_cpu_info(
   32.13 +        cpu, &mcg.mc_socketid, &mcg.mc_coreid,
   32.14 +        &mcg.mc_core_threadid, &mcg.mc_apicid, NULL, NULL, NULL);
   32.15      rdmsrl(MSR_IA32_MCG_STATUS, mcg.mc_gstatus);
   32.16  
   32.17      for ( i = 0; i < nr_mce_banks; i++ ) {
    33.1 --- a/xen/arch/x86/domain.c	Fri Feb 13 10:56:01 2009 +0900
    33.2 +++ b/xen/arch/x86/domain.c	Fri Feb 13 11:22:28 2009 +0900
    33.3 @@ -141,7 +141,7 @@ void dump_pageframe_info(struct domain *
    33.4      }
    33.5      else
    33.6      {
    33.7 -        list_for_each_entry ( page, &d->page_list, list )
    33.8 +        page_list_for_each ( page, &d->page_list )
    33.9          {
   33.10              printk("    DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
   33.11                     _p(page_to_mfn(page)),
   33.12 @@ -154,7 +154,7 @@ void dump_pageframe_info(struct domain *
   33.13          p2m_pod_dump_data(d);
   33.14      }
   33.15  
   33.16 -    list_for_each_entry ( page, &d->xenpage_list, list )
   33.17 +    page_list_for_each ( page, &d->xenpage_list )
   33.18      {
   33.19          printk("    XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
   33.20                 _p(page_to_mfn(page)),
   33.21 @@ -352,6 +352,8 @@ int vcpu_initialise(struct vcpu *v)
   33.22      v->arch.perdomain_ptes =
   33.23          d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
   33.24  
   33.25 +    spin_lock_init(&v->arch.shadow_ldt_lock);
   33.26 +
   33.27      return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
   33.28  }
   33.29  
   33.30 @@ -380,7 +382,7 @@ int arch_domain_create(struct domain *d,
   33.31      INIT_LIST_HEAD(&d->arch.pdev_list);
   33.32  
   33.33      d->arch.relmem = RELMEM_not_started;
   33.34 -    INIT_LIST_HEAD(&d->arch.relmem_list);
   33.35 +    INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
   33.36  
   33.37      pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
   33.38      d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
   33.39 @@ -1655,9 +1657,8 @@ int hypercall_xlat_continuation(unsigned
   33.40  #endif
   33.41  
   33.42  static int relinquish_memory(
   33.43 -    struct domain *d, struct list_head *list, unsigned long type)
   33.44 +    struct domain *d, struct page_list_head *list, unsigned long type)
   33.45  {
   33.46 -    struct list_head *ent;
   33.47      struct page_info  *page;
   33.48      unsigned long     x, y;
   33.49      int               ret = 0;
   33.50 @@ -1665,17 +1666,13 @@ static int relinquish_memory(
   33.51      /* Use a recursive lock, as we may enter 'free_domheap_page'. */
   33.52      spin_lock_recursive(&d->page_alloc_lock);
   33.53  
   33.54 -    ent = list->next;
   33.55 -    while ( ent != list )
   33.56 +    while ( (page = page_list_remove_head(list)) )
   33.57      {
   33.58 -        page = list_entry(ent, struct page_info, list);
   33.59 -
   33.60          /* Grab a reference to the page so it won't disappear from under us. */
   33.61          if ( unlikely(!get_page(page, d)) )
   33.62          {
   33.63              /* Couldn't get a reference -- someone is freeing this page. */
   33.64 -            ent = ent->next;
   33.65 -            list_move_tail(&page->list, &d->arch.relmem_list);
   33.66 +            page_list_add_tail(page, &d->arch.relmem_list);
   33.67              continue;
   33.68          }
   33.69  
   33.70 @@ -1687,6 +1684,7 @@ static int relinquish_memory(
   33.71              break;
   33.72          case -EAGAIN:
   33.73          case -EINTR:
   33.74 +            page_list_add(page, list);
   33.75              set_bit(_PGT_pinned, &page->u.inuse.type_info);
   33.76              put_page(page);
   33.77              goto out;
   33.78 @@ -1723,6 +1721,7 @@ static int relinquish_memory(
   33.79                  case 0:
   33.80                      break;
   33.81                  case -EINTR:
   33.82 +                    page_list_add(page, list);
   33.83                      page->u.inuse.type_info |= PGT_validated;
   33.84                      if ( x & PGT_partial )
   33.85                          put_page(page);
   33.86 @@ -1730,6 +1729,7 @@ static int relinquish_memory(
   33.87                      ret = -EAGAIN;
   33.88                      goto out;
   33.89                  case -EAGAIN:
   33.90 +                    page_list_add(page, list);
   33.91                      page->u.inuse.type_info |= PGT_partial;
   33.92                      if ( x & PGT_partial )
   33.93                          put_page(page);
   33.94 @@ -1746,9 +1746,8 @@ static int relinquish_memory(
   33.95              }
   33.96          }
   33.97  
   33.98 -        /* Follow the list chain and /then/ potentially free the page. */
   33.99 -        ent = ent->next;
  33.100 -        list_move_tail(&page->list, &d->arch.relmem_list);
  33.101 +        /* Put the page on the list and /then/ potentially free it. */
  33.102 +        page_list_add_tail(page, &d->arch.relmem_list);
  33.103          put_page(page);
  33.104  
  33.105          if ( hypercall_preempt_check() )
  33.106 @@ -1758,7 +1757,12 @@ static int relinquish_memory(
  33.107          }
  33.108      }
  33.109  
  33.110 -    list_splice_init(&d->arch.relmem_list, list);
  33.111 +    /* list is empty at this point. */
  33.112 +    if ( !page_list_empty(&d->arch.relmem_list) )
  33.113 +    {
  33.114 +        *list = d->arch.relmem_list;
  33.115 +        INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
  33.116 +    }
  33.117  
  33.118   out:
  33.119      spin_unlock_recursive(&d->page_alloc_lock);
    34.1 --- a/xen/arch/x86/domain_build.c	Fri Feb 13 10:56:01 2009 +0900
    34.2 +++ b/xen/arch/x86/domain_build.c	Fri Feb 13 11:22:28 2009 +0900
    34.3 @@ -880,7 +880,7 @@ int __init construct_dom0(
    34.4      }
    34.5      si->first_p2m_pfn = pfn;
    34.6      si->nr_p2m_frames = d->tot_pages - count;
    34.7 -    list_for_each_entry ( page, &d->page_list, list )
    34.8 +    page_list_for_each ( page, &d->page_list )
    34.9      {
   34.10          mfn = page_to_mfn(page);
   34.11          if ( get_gpfn_from_mfn(mfn) >= count )
    35.1 --- a/xen/arch/x86/domctl.c	Fri Feb 13 10:56:01 2009 +0900
    35.2 +++ b/xen/arch/x86/domctl.c	Fri Feb 13 11:22:28 2009 +0900
    35.3 @@ -240,7 +240,7 @@ long arch_do_domctl(
    35.4          struct domain *d = rcu_lock_domain_by_id(domctl->domain);
    35.5          unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
    35.6          uint64_t mfn;
    35.7 -        struct list_head *list_ent;
    35.8 +        struct page_info *page;
    35.9  
   35.10          ret = -EINVAL;
   35.11          if ( d != NULL )
   35.12 @@ -259,19 +259,19 @@ long arch_do_domctl(
   35.13                  goto getmemlist_out;
   35.14              }
   35.15  
   35.16 -            ret = 0;
   35.17 -            list_ent = d->page_list.next;
   35.18 -            for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
   35.19 +            ret = i = 0;
   35.20 +            page_list_for_each(page, &d->page_list)
   35.21              {
   35.22 -                mfn = page_to_mfn(list_entry(
   35.23 -                    list_ent, struct page_info, list));
   35.24 +                if ( i >= max_pfns )
   35.25 +                    break;
   35.26 +                mfn = page_to_mfn(page);
   35.27                  if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
   35.28                                            i, &mfn, 1) )
   35.29                  {
   35.30                      ret = -EFAULT;
   35.31                      break;
   35.32                  }
   35.33 -                list_ent = mfn_to_page(mfn)->list.next;
   35.34 +                ++i;
   35.35              }
   35.36              
   35.37              spin_unlock(&d->page_alloc_lock);
   35.38 @@ -417,6 +417,34 @@ long arch_do_domctl(
   35.39      }
   35.40      break;
   35.41  
   35.42 +    case XEN_DOMCTL_gethvmcontext_partial:
   35.43 +    { 
   35.44 +        struct domain *d;
   35.45 +
   35.46 +        ret = -ESRCH;
   35.47 +        if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
   35.48 +            break;
   35.49 +
   35.50 +        ret = xsm_hvmcontext(d, domctl->cmd);
   35.51 +        if ( ret )
   35.52 +            goto gethvmcontext_partial_out;
   35.53 +
   35.54 +        ret = -EINVAL;
   35.55 +        if ( !is_hvm_domain(d) ) 
   35.56 +            goto gethvmcontext_partial_out;
   35.57 +
   35.58 +        domain_pause(d);
   35.59 +        ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type,
   35.60 +                           domctl->u.hvmcontext_partial.instance,
   35.61 +                           domctl->u.hvmcontext_partial.buffer);
   35.62 +        domain_unpause(d);
   35.63 +
   35.64 +    gethvmcontext_partial_out:
   35.65 +        rcu_unlock_domain(d);
   35.66 +    }
   35.67 +    break;
   35.68 +
   35.69 +
   35.70      case XEN_DOMCTL_set_address_size:
   35.71      {
   35.72          struct domain *d;
    36.1 --- a/xen/arch/x86/e820.c	Fri Feb 13 10:56:01 2009 +0900
    36.2 +++ b/xen/arch/x86/e820.c	Fri Feb 13 11:22:28 2009 +0900
    36.3 @@ -1,10 +1,10 @@
    36.4  #include <xen/config.h>
    36.5  #include <xen/init.h>
    36.6  #include <xen/lib.h>
    36.7 +#include <xen/mm.h>
    36.8  #include <xen/compat.h>
    36.9  #include <xen/dmi.h>
   36.10  #include <asm/e820.h>
   36.11 -#include <asm/mm.h>
   36.12  #include <asm/page.h>
   36.13  
   36.14  /* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
    37.1 --- a/xen/arch/x86/i8259.c	Fri Feb 13 10:56:01 2009 +0900
    37.2 +++ b/xen/arch/x86/i8259.c	Fri Feb 13 11:22:28 2009 +0900
    37.3 @@ -410,8 +410,8 @@ void __init init_IRQ(void)
    37.4      }
    37.5  
    37.6      /* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
    37.7 -    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
    37.8 -    vector_irq[0x80] = NEVER_ASSIGN;
    37.9 +    vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
   37.10 +    vector_irq[0x80] = NEVER_ASSIGN_IRQ;
   37.11  
   37.12      apic_intr_init();
   37.13  
    38.1 --- a/xen/arch/x86/io_apic.c	Fri Feb 13 10:56:01 2009 +0900
    38.2 +++ b/xen/arch/x86/io_apic.c	Fri Feb 13 11:22:28 2009 +0900
    38.3 @@ -49,7 +49,6 @@ atomic_t irq_mis_count;
    38.4  static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
    38.5  
    38.6  static DEFINE_SPINLOCK(ioapic_lock);
    38.7 -static DEFINE_SPINLOCK(vector_lock);
    38.8  
    38.9  int skip_ioapic_setup;
   38.10  
   38.11 @@ -89,9 +88,6 @@ static struct irq_pin_list {
   38.12  };
   38.13  static int irq_2_pin_free_entry = NR_IRQS;
   38.14  
   38.15 -int vector_irq[NR_VECTORS] __read_mostly = {
   38.16 -    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
   38.17 -
   38.18  /*
   38.19   * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
   38.20   * shared ISA-space IRQs, so we have to support them. We are super
   38.21 @@ -669,56 +665,6 @@ static inline int IO_APIC_irq_trigger(in
   38.22  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
   38.23  u8 irq_vector[NR_IRQS] __read_mostly;
   38.24  
   38.25 -int free_irq_vector(int vector)
   38.26 -{
   38.27 -    int irq;
   38.28 -
   38.29 -    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
   38.30 -
   38.31 -    spin_lock(&vector_lock);
   38.32 -    if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
   38.33 -        vector_irq[vector] = FREE_TO_ASSIGN;
   38.34 -    spin_unlock(&vector_lock);
   38.35 -
   38.36 -    return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
   38.37 -}
   38.38 -
   38.39 -int assign_irq_vector(int irq)
   38.40 -{
   38.41 -    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
   38.42 -    unsigned vector;
   38.43 -
   38.44 -    BUG_ON(irq >= NR_IRQS);
   38.45 -
   38.46 -    spin_lock(&vector_lock);
   38.47 -
   38.48 -    if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
   38.49 -        spin_unlock(&vector_lock);
   38.50 -        return IO_APIC_VECTOR(irq);
   38.51 -    }
   38.52 -
   38.53 -    vector = current_vector;
   38.54 -    while (vector_irq[vector] != FREE_TO_ASSIGN) {
   38.55 -        vector += 8;
   38.56 -        if (vector > LAST_DYNAMIC_VECTOR)
   38.57 -            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
   38.58 -
   38.59 -        if (vector == current_vector) {
   38.60 -            spin_unlock(&vector_lock);
   38.61 -            return -ENOSPC;
   38.62 -        }
   38.63 -    }
   38.64 -
   38.65 -    current_vector = vector;
   38.66 -    vector_irq[vector] = irq;
   38.67 -    if (irq != AUTO_ASSIGN)
   38.68 -        IO_APIC_VECTOR(irq) = vector;
   38.69 -
   38.70 -    spin_unlock(&vector_lock);
   38.71 -
   38.72 -    return vector;
   38.73 -}
   38.74 -
   38.75  static struct hw_interrupt_type ioapic_level_type;
   38.76  static struct hw_interrupt_type ioapic_edge_type;
   38.77  
    39.1 --- a/xen/arch/x86/irq.c	Fri Feb 13 10:56:01 2009 +0900
    39.2 +++ b/xen/arch/x86/irq.c	Fri Feb 13 11:22:28 2009 +0900
    39.3 @@ -27,6 +27,11 @@ boolean_param("noirqbalance", opt_noirqb
    39.4  
    39.5  irq_desc_t irq_desc[NR_VECTORS];
    39.6  
    39.7 +static DEFINE_SPINLOCK(vector_lock);
    39.8 +int vector_irq[NR_VECTORS] __read_mostly = {
    39.9 +    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
   39.10 +};
   39.11 +
   39.12  static void __do_IRQ_guest(int vector);
   39.13  
   39.14  void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
   39.15 @@ -54,6 +59,56 @@ struct hw_interrupt_type no_irq_type = {
   39.16  
   39.17  atomic_t irq_err_count;
   39.18  
   39.19 +int free_irq_vector(int vector)
   39.20 +{
   39.21 +    int irq;
   39.22 +
   39.23 +    BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
   39.24 +
   39.25 +    spin_lock(&vector_lock);
   39.26 +    if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ)
   39.27 +        vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
   39.28 +    spin_unlock(&vector_lock);
   39.29 +
   39.30 +    return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL;
   39.31 +}
   39.32 +
   39.33 +int assign_irq_vector(int irq)
   39.34 +{
   39.35 +    static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
   39.36 +    unsigned vector;
   39.37 +
   39.38 +    BUG_ON(irq >= NR_IRQS);
   39.39 +
   39.40 +    spin_lock(&vector_lock);
   39.41 +
   39.42 +    if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
   39.43 +        spin_unlock(&vector_lock);
   39.44 +        return IO_APIC_VECTOR(irq);
   39.45 +    }
   39.46 +
   39.47 +    vector = current_vector;
   39.48 +    while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
   39.49 +        vector += 8;
   39.50 +        if (vector > LAST_DYNAMIC_VECTOR)
   39.51 +            vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
   39.52 +
   39.53 +        if (vector == current_vector) {
   39.54 +            spin_unlock(&vector_lock);
   39.55 +            return -ENOSPC;
   39.56 +        }
   39.57 +    }
   39.58 +
   39.59 +    current_vector = vector;
   39.60 +    vector_irq[vector] = irq;
   39.61 +    if (irq != AUTO_ASSIGN_IRQ)
   39.62 +        IO_APIC_VECTOR(irq) = vector;
   39.63 +
   39.64 +    spin_unlock(&vector_lock);
   39.65 +
   39.66 +    return vector;
   39.67 +}
   39.68 +
   39.69  asmlinkage void do_IRQ(struct cpu_user_regs *regs)
   39.70  {
   39.71      unsigned int      vector = regs->entry_vector;
   39.72 @@ -104,7 +159,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
   39.73      spin_unlock(&desc->lock);
   39.74  }
   39.75  
   39.76 -int request_irq(unsigned int irq,
   39.77 +int request_irq_vector(unsigned int vector,
   39.78          void (*handler)(int, void *, struct cpu_user_regs *),
   39.79          unsigned long irqflags, const char * devname, void *dev_id)
   39.80  {
   39.81 @@ -117,7 +172,7 @@ int request_irq(unsigned int irq,
   39.82       * which interrupt is which (messes up the interrupt freeing
   39.83       * logic etc).
   39.84       */
   39.85 -    if (irq >= NR_IRQS)
   39.86 +    if (vector >= NR_VECTORS)
   39.87          return -EINVAL;
   39.88      if (!handler)
   39.89          return -EINVAL;
   39.90 @@ -130,34 +185,32 @@ int request_irq(unsigned int irq,
   39.91      action->name = devname;
   39.92      action->dev_id = dev_id;
   39.93  
   39.94 -    retval = setup_irq(irq, action);
   39.95 +    retval = setup_irq_vector(vector, action);
   39.96      if (retval)
   39.97          xfree(action);
   39.98  
   39.99      return retval;
  39.100  }
  39.101  
  39.102 -void free_irq(unsigned int irq)
  39.103 +void release_irq_vector(unsigned int vector)
  39.104  {
  39.105 -    unsigned int  vector = irq_to_vector(irq);
  39.106 -    irq_desc_t   *desc = &irq_desc[vector];
  39.107 +    irq_desc_t *desc = &irq_desc[vector];
  39.108      unsigned long flags;
  39.109  
  39.110      spin_lock_irqsave(&desc->lock,flags);
  39.111      desc->action  = NULL;
  39.112      desc->depth   = 1;
  39.113      desc->status |= IRQ_DISABLED;
  39.114 -    desc->handler->shutdown(irq);
  39.115 +    desc->handler->shutdown(vector);
  39.116      spin_unlock_irqrestore(&desc->lock,flags);
  39.117  
  39.118      /* Wait to make sure it's not being used on another CPU */
  39.119      do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
  39.120  }
  39.121  
  39.122 -int setup_irq(unsigned int irq, struct irqaction *new)
  39.123 +int setup_irq_vector(unsigned int vector, struct irqaction *new)
  39.124  {
  39.125 -    unsigned int  vector = irq_to_vector(irq);
  39.126 -    irq_desc_t   *desc = &irq_desc[vector];
  39.127 +    irq_desc_t *desc = &irq_desc[vector];
  39.128      unsigned long flags;
  39.129   
  39.130      spin_lock_irqsave(&desc->lock,flags);
    40.1 --- a/xen/arch/x86/mm.c	Fri Feb 13 10:56:01 2009 +0900
    40.2 +++ b/xen/arch/x86/mm.c	Fri Feb 13 11:22:28 2009 +0900
    40.3 @@ -179,12 +179,6 @@ l2_pgentry_t *compat_idle_pg_table_l2 = 
    40.4  #define l3_disallow_mask(d) L3_DISALLOW_MASK
    40.5  #endif
    40.6  
    40.7 -static void queue_deferred_ops(struct domain *d, unsigned int ops)
    40.8 -{
    40.9 -    ASSERT(d == current->domain);
   40.10 -    this_cpu(percpu_mm_info).deferred_ops |= ops;
   40.11 -}
   40.12 -
   40.13  void __init init_frametable(void)
   40.14  {
   40.15      unsigned long nr_pages, page_step, i, mfn;
   40.16 @@ -333,7 +327,7 @@ void share_xen_page_with_guest(
   40.17          page->count_info |= PGC_allocated | 1;
   40.18          if ( unlikely(d->xenheap_pages++ == 0) )
   40.19              get_knownalive_domain(d);
   40.20 -        list_add_tail(&page->list, &d->xenpage_list);
   40.21 +        page_list_add_tail(page, &d->xenpage_list);
   40.22      }
   40.23  
   40.24      spin_unlock(&d->page_alloc_lock);
   40.25 @@ -464,14 +458,18 @@ void update_cr3(struct vcpu *v)
   40.26  }
   40.27  
   40.28  
   40.29 -static void invalidate_shadow_ldt(struct vcpu *v)
   40.30 +static void invalidate_shadow_ldt(struct vcpu *v, int flush)
   40.31  {
   40.32      int i;
   40.33      unsigned long pfn;
   40.34      struct page_info *page;
   40.35 -    
   40.36 +
   40.37 +    BUG_ON(unlikely(in_irq()));
   40.38 +
   40.39 +    spin_lock(&v->arch.shadow_ldt_lock);
   40.40 +
   40.41      if ( v->arch.shadow_ldt_mapcnt == 0 )
   40.42 -        return;
   40.43 +        goto out;
   40.44  
   40.45      v->arch.shadow_ldt_mapcnt = 0;
   40.46  
   40.47 @@ -486,11 +484,12 @@ static void invalidate_shadow_ldt(struct
   40.48          put_page_and_type(page);
   40.49      }
   40.50  
   40.51 -    /* Dispose of the (now possibly invalid) mappings from the TLB.  */
   40.52 -    if ( v == current )
   40.53 -        queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
   40.54 -    else
   40.55 -        flush_tlb_mask(v->domain->domain_dirty_cpumask);
   40.56 +    /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
   40.57 +    if ( flush )
   40.58 +        flush_tlb_mask(v->vcpu_dirty_cpumask);
   40.59 +
   40.60 + out:
   40.61 +    spin_unlock(&v->arch.shadow_ldt_lock);
   40.62  }
   40.63  
   40.64  
   40.65 @@ -541,8 +540,10 @@ int map_ldt_shadow_page(unsigned int off
   40.66  
   40.67      nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
   40.68  
   40.69 +    spin_lock(&v->arch.shadow_ldt_lock);
   40.70      l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
   40.71      v->arch.shadow_ldt_mapcnt++;
   40.72 +    spin_unlock(&v->arch.shadow_ldt_lock);
   40.73  
   40.74      return 1;
   40.75  }
   40.76 @@ -989,7 +990,7 @@ void put_page_from_l1e(l1_pgentry_t l1e,
   40.77               (d == e) )
   40.78          {
   40.79              for_each_vcpu ( d, v )
   40.80 -                invalidate_shadow_ldt(v);
   40.81 +                invalidate_shadow_ldt(v, 1);
   40.82          }
   40.83          put_page(page);
   40.84      }
   40.85 @@ -2023,30 +2024,17 @@ int free_page_type(struct page_info *pag
   40.86      unsigned long gmfn;
   40.87      int rc;
   40.88  
   40.89 -    if ( likely(owner != NULL) )
   40.90 +    if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
   40.91      {
   40.92 -        /*
   40.93 -         * We have to flush before the next use of the linear mapping
   40.94 -         * (e.g., update_va_mapping()) or we could end up modifying a page
   40.95 -         * that is no longer a page table (and hence screw up ref counts).
   40.96 -         */
   40.97 -        if ( current->domain == owner )
   40.98 -            queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
   40.99 -        else
  40.100 -            flush_tlb_mask(owner->domain_dirty_cpumask);
  40.101 -
  40.102 -        if ( unlikely(paging_mode_enabled(owner)) )
  40.103 -        {
  40.104 -            /* A page table is dirtied when its type count becomes zero. */
  40.105 -            paging_mark_dirty(owner, page_to_mfn(page));
  40.106 -
  40.107 -            if ( shadow_mode_refcounts(owner) )
  40.108 -                return 0;
  40.109 -
  40.110 -            gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
  40.111 -            ASSERT(VALID_M2P(gmfn));
  40.112 -            shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
  40.113 -        }
  40.114 +        /* A page table is dirtied when its type count becomes zero. */
  40.115 +        paging_mark_dirty(owner, page_to_mfn(page));
  40.116 +
  40.117 +        if ( shadow_mode_refcounts(owner) )
  40.118 +            return 0;
  40.119 +
  40.120 +        gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
  40.121 +        ASSERT(VALID_M2P(gmfn));
  40.122 +        shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
  40.123      }
  40.124  
  40.125      if ( !(type & PGT_partial) )
  40.126 @@ -2366,8 +2354,8 @@ void cleanup_page_cacheattr(struct page_
  40.127  
  40.128  int new_guest_cr3(unsigned long mfn)
  40.129  {
  40.130 -    struct vcpu *v = current;
  40.131 -    struct domain *d = v->domain;
  40.132 +    struct vcpu *curr = current;
  40.133 +    struct domain *d = curr->domain;
  40.134      int okay;
  40.135      unsigned long old_base_mfn;
  40.136  
  40.137 @@ -2377,19 +2365,19 @@ int new_guest_cr3(unsigned long mfn)
  40.138          okay = paging_mode_refcounts(d)
  40.139              ? 0 /* Old code was broken, but what should it be? */
  40.140              : mod_l4_entry(
  40.141 -                    __va(pagetable_get_paddr(v->arch.guest_table)),
  40.142 +                    __va(pagetable_get_paddr(curr->arch.guest_table)),
  40.143                      l4e_from_pfn(
  40.144                          mfn,
  40.145                          (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
  40.146 -                    pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
  40.147 +                    pagetable_get_pfn(curr->arch.guest_table), 0, 0) == 0;
  40.148          if ( unlikely(!okay) )
  40.149          {
  40.150              MEM_LOG("Error while installing new compat baseptr %lx", mfn);
  40.151              return 0;
  40.152          }
  40.153  
  40.154 -        invalidate_shadow_ldt(v);
  40.155 -        write_ptbase(v);
  40.156 +        invalidate_shadow_ldt(curr, 0);
  40.157 +        write_ptbase(curr);
  40.158  
  40.159          return 1;
  40.160      }
  40.161 @@ -2403,14 +2391,14 @@ int new_guest_cr3(unsigned long mfn)
  40.162          return 0;
  40.163      }
  40.164  
  40.165 -    invalidate_shadow_ldt(v);
  40.166 -
  40.167 -    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
  40.168 -
  40.169 -    v->arch.guest_table = pagetable_from_pfn(mfn);
  40.170 -    update_cr3(v);
  40.171 -
  40.172 -    write_ptbase(v);
  40.173 +    invalidate_shadow_ldt(curr, 0);
  40.174 +
  40.175 +    old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
  40.176 +
  40.177 +    curr->arch.guest_table = pagetable_from_pfn(mfn);
  40.178 +    update_cr3(curr);
  40.179 +
  40.180 +    write_ptbase(curr);
  40.181  
  40.182      if ( likely(old_base_mfn != 0) )
  40.183      {
  40.184 @@ -2440,6 +2428,10 @@ static void process_deferred_ops(void)
  40.185              flush_tlb_local();
  40.186      }
  40.187  
  40.188 +    /*
  40.189 +     * Do this after flushing TLBs, to ensure we see fresh LDT mappings
  40.190 +     * via the linear pagetable mapping.
  40.191 +     */
  40.192      if ( deferred_ops & DOP_RELOAD_LDT )
  40.193          (void)map_ldt_shadow_page(0);
  40.194  
  40.195 @@ -2565,8 +2557,8 @@ int do_mmuext_op(
  40.196      unsigned long mfn = 0, gmfn = 0, type;
  40.197      unsigned int done = 0;
  40.198      struct page_info *page;
  40.199 -    struct vcpu *v = current;
  40.200 -    struct domain *d = v->domain;
  40.201 +    struct vcpu *curr = current;
  40.202 +    struct domain *d = curr->domain;
  40.203  
  40.204      if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
  40.205      {
  40.206 @@ -2729,8 +2721,8 @@ int do_mmuext_op(
  40.207                  }
  40.208              }
  40.209  
  40.210 -            old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
  40.211 -            v->arch.guest_table_user = pagetable_from_pfn(mfn);
  40.212 +            old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
  40.213 +            curr->arch.guest_table_user = pagetable_from_pfn(mfn);
  40.214  
  40.215              if ( old_mfn != 0 )
  40.216              {
  40.217 @@ -2750,7 +2742,7 @@ int do_mmuext_op(
  40.218      
  40.219          case MMUEXT_INVLPG_LOCAL:
  40.220              if ( !paging_mode_enabled(d) 
  40.221 -                 || paging_invlpg(v, op.arg1.linear_addr) != 0 )
  40.222 +                 || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
  40.223                  flush_tlb_one_local(op.arg1.linear_addr);
  40.224              break;
  40.225  
  40.226 @@ -2773,7 +2765,7 @@ int do_mmuext_op(
  40.227          }
  40.228  
  40.229          case MMUEXT_TLB_FLUSH_ALL:
  40.230 -            flush_tlb_mask(d->domain_dirty_cpumask);
  40.231 +            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
  40.232              break;
  40.233      
  40.234          case MMUEXT_INVLPG_ALL:
  40.235 @@ -2809,13 +2801,14 @@ int do_mmuext_op(
  40.236                  okay = 0;
  40.237                  MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
  40.238              }
  40.239 -            else if ( (v->arch.guest_context.ldt_ents != ents) || 
  40.240 -                      (v->arch.guest_context.ldt_base != ptr) )
  40.241 +            else if ( (curr->arch.guest_context.ldt_ents != ents) || 
  40.242 +                      (curr->arch.guest_context.ldt_base != ptr) )
  40.243              {
  40.244 -                invalidate_shadow_ldt(v);
  40.245 -                v->arch.guest_context.ldt_base = ptr;
  40.246 -                v->arch.guest_context.ldt_ents = ents;
  40.247 -                load_LDT(v);
  40.248 +                invalidate_shadow_ldt(curr, 0);
  40.249 +                this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
  40.250 +                curr->arch.guest_context.ldt_base = ptr;
  40.251 +                curr->arch.guest_context.ldt_ents = ents;
  40.252 +                load_LDT(curr);
  40.253                  this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
  40.254                  if ( ents != 0 )
  40.255                      this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
  40.256 @@ -2931,8 +2924,7 @@ int do_mmu_update(
  40.257      struct page_info *page;
  40.258      int rc = 0, okay = 1, i = 0;
  40.259      unsigned int cmd, done = 0;
  40.260 -    struct vcpu *v = current;
  40.261 -    struct domain *d = v->domain;
  40.262 +    struct domain *d = current->domain;
  40.263      struct domain_mmap_cache mapcache;
  40.264  
  40.265      if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
  40.266 @@ -3042,7 +3034,8 @@ int do_mmu_update(
  40.267  #endif
  40.268                  case PGT_writable_page:
  40.269                      perfc_incr(writable_mmu_updates);
  40.270 -                    okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
  40.271 +                    okay = paging_write_guest_entry(
  40.272 +                        current, va, req.val, _mfn(mfn));
  40.273                      break;
  40.274                  }
  40.275                  page_unlock(page);
  40.276 @@ -3052,7 +3045,8 @@ int do_mmu_update(
  40.277              else if ( get_page_type(page, PGT_writable_page) )
  40.278              {
  40.279                  perfc_incr(writable_mmu_updates);
  40.280 -                okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
  40.281 +                okay = paging_write_guest_entry(
  40.282 +                    current, va, req.val, _mfn(mfn));
  40.283                  put_page_type(page);
  40.284              }
  40.285  
  40.286 @@ -3508,7 +3502,7 @@ int steal_page(
  40.287      /* Unlink from original owner. */
  40.288      if ( !(memflags & MEMF_no_refcount) )
  40.289          d->tot_pages--;
  40.290 -    list_del(&page->list);
  40.291 +    page_list_del(page, &d->page_list);
  40.292  
  40.293      spin_unlock(&d->page_alloc_lock);
  40.294      return 0;
  40.295 @@ -3567,34 +3561,40 @@ int do_update_va_mapping(unsigned long v
  40.296      if ( pl1e )
  40.297          guest_unmap_l1e(v, pl1e);
  40.298  
  40.299 -    process_deferred_ops();
  40.300 -
  40.301      switch ( flags & UVMF_FLUSHTYPE_MASK )
  40.302      {
  40.303      case UVMF_TLB_FLUSH:
  40.304          switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
  40.305          {
  40.306          case UVMF_LOCAL:
  40.307 -            flush_tlb_local();
  40.308 +            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
  40.309              break;
  40.310          case UVMF_ALL:
  40.311 -            flush_tlb_mask(d->domain_dirty_cpumask);
  40.312 +            this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
  40.313              break;
  40.314          default:
  40.315 +            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
  40.316 +                break;
  40.317              if ( unlikely(!is_pv_32on64_domain(d) ?
  40.318                            get_user(vmask, (unsigned long *)bmap_ptr) :
  40.319                            get_user(vmask, (unsigned int *)bmap_ptr)) )
  40.320 -                rc = -EFAULT;
  40.321 +                rc = -EFAULT, vmask = 0;
  40.322              pmask = vcpumask_to_pcpumask(d, vmask);
  40.323 +            if ( cpu_isset(smp_processor_id(), pmask) )
  40.324 +                this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
  40.325              flush_tlb_mask(pmask);
  40.326              break;
  40.327          }
  40.328          break;
  40.329  
  40.330      case UVMF_INVLPG:
  40.331 +        if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
  40.332 +            break;
  40.333          switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
  40.334          {
  40.335          case UVMF_LOCAL:
  40.336 +            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
  40.337 +                break;
  40.338              if ( !paging_mode_enabled(d) ||
  40.339                   (paging_invlpg(v, va) != 0) ) 
  40.340                  flush_tlb_one_local(va);
  40.341 @@ -3606,14 +3606,18 @@ int do_update_va_mapping(unsigned long v
  40.342              if ( unlikely(!is_pv_32on64_domain(d) ?
  40.343                            get_user(vmask, (unsigned long *)bmap_ptr) :
  40.344                            get_user(vmask, (unsigned int *)bmap_ptr)) )
  40.345 -                rc = -EFAULT;
  40.346 +                rc = -EFAULT, vmask = 0;
  40.347              pmask = vcpumask_to_pcpumask(d, vmask);
  40.348 +            if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
  40.349 +                cpu_clear(smp_processor_id(), pmask);
  40.350              flush_tlb_one_mask(pmask, va);
  40.351              break;
  40.352          }
  40.353          break;
  40.354      }
  40.355  
  40.356 +    process_deferred_ops();
  40.357 +
  40.358      return rc;
  40.359  }
  40.360  
    41.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Feb 13 10:56:01 2009 +0900
    41.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Feb 13 11:22:28 2009 +0900
    41.3 @@ -45,11 +45,11 @@
    41.4  
    41.5  /* Override macros from asm/page.h to make them work with mfn_t */
    41.6  #undef mfn_to_page
    41.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m))
    41.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
    41.9  #undef mfn_valid
   41.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   41.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
   41.12  #undef page_to_mfn
   41.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   41.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
   41.15  
   41.16  /************************************************/
   41.17  /*            HAP LOG DIRTY SUPPORT             */
   41.18 @@ -96,11 +96,10 @@ static struct page_info *hap_alloc(struc
   41.19  
   41.20      ASSERT(hap_locked_by_me(d));
   41.21  
   41.22 -    if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) )
   41.23 +    pg = page_list_remove_head(&d->arch.paging.hap.freelist);
   41.24 +    if ( unlikely(!pg) )
   41.25          return NULL;
   41.26  
   41.27 -    pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list);
   41.28 -    list_del(&pg->list);
   41.29      d->arch.paging.hap.free_pages--;
   41.30  
   41.31      p = hap_map_domain_page(page_to_mfn(pg));
   41.32 @@ -118,7 +117,7 @@ static void hap_free(struct domain *d, m
   41.33      ASSERT(hap_locked_by_me(d));
   41.34  
   41.35      d->arch.paging.hap.free_pages++;
   41.36 -    list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
   41.37 +    page_list_add_tail(pg, &d->arch.paging.hap.freelist);
   41.38  }
   41.39  
   41.40  static struct page_info *hap_alloc_p2m_page(struct domain *d)
   41.41 @@ -210,15 +209,13 @@ hap_set_allocation(struct domain *d, uns
   41.42              }
   41.43              d->arch.paging.hap.free_pages++;
   41.44              d->arch.paging.hap.total_pages++;
   41.45 -            list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
   41.46 +            page_list_add_tail(pg, &d->arch.paging.hap.freelist);
   41.47          }
   41.48          else if ( d->arch.paging.hap.total_pages > pages )
   41.49          {
   41.50              /* Need to return memory to domheap */
   41.51 -            ASSERT(!list_empty(&d->arch.paging.hap.freelist));
   41.52 -            pg = list_entry(d->arch.paging.hap.freelist.next,
   41.53 -                            struct page_info, list);
   41.54 -            list_del(&pg->list);
   41.55 +            pg = page_list_remove_head(&d->arch.paging.hap.freelist);
   41.56 +            ASSERT(pg);
   41.57              d->arch.paging.hap.free_pages--;
   41.58              d->arch.paging.hap.total_pages--;
   41.59              pg->count_info = 0;
   41.60 @@ -393,7 +390,7 @@ static void hap_destroy_monitor_table(st
   41.61  void hap_domain_init(struct domain *d)
   41.62  {
   41.63      hap_lock_init(d);
   41.64 -    INIT_LIST_HEAD(&d->arch.paging.hap.freelist);
   41.65 +    INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
   41.66  
   41.67      /* This domain will use HAP for log-dirty mode */
   41.68      paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
    42.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Fri Feb 13 10:56:01 2009 +0900
    42.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Fri Feb 13 11:22:28 2009 +0900
    42.3 @@ -63,7 +63,7 @@ static int ept_set_middle_entry(struct d
    42.4  
    42.5      pg->count_info = 1;
    42.6      pg->u.inuse.type_info = 1 | PGT_validated;
    42.7 -    list_add_tail(&pg->list, &d->arch.p2m->pages);
    42.8 +    page_list_add_tail(pg, &d->arch.p2m->pages);
    42.9  
   42.10      ept_entry->emt = 0;
   42.11      ept_entry->igmt = 0;
   42.12 @@ -116,12 +116,12 @@ static int ept_next_level(struct domain 
   42.13  }
   42.14  
   42.15  /*
   42.16 - * TODO: ept_set_entry() computes 'need_modify_vtd_table' for itself,
   42.17 + * ept_set_entry() computes 'need_modify_vtd_table' for itself,
   42.18   * by observing whether any gfn->mfn translations are modified.
   42.19   */
   42.20  static int
   42.21 -_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
   42.22 -              unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table)
   42.23 +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
   42.24 +              unsigned int order, p2m_type_t p2mt)
   42.25  {
   42.26      ept_entry_t *table = NULL;
   42.27      unsigned long gfn_remainder = gfn, offset = 0;
   42.28 @@ -131,6 +131,7 @@ static int
   42.29      int walk_level = order / EPT_TABLE_ORDER;
   42.30      int direct_mmio = (p2mt == p2m_mmio_direct);
   42.31      uint8_t igmt = 0;
   42.32 +    int need_modify_vtd_table = 1;
   42.33  
   42.34      /* we only support 4k and 2m pages now */
   42.35  
   42.36 @@ -171,14 +172,23 @@ static int
   42.37  
   42.38              if ( ret == GUEST_TABLE_SUPER_PAGE )
   42.39              {
   42.40 -                ept_entry->mfn = mfn_x(mfn) - offset;
   42.41 +                if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
   42.42 +                    need_modify_vtd_table = 0;  
   42.43 +                else                  
   42.44 +                    ept_entry->mfn = mfn_x(mfn) - offset;
   42.45 +
   42.46                  if ( ept_entry->avail1 == p2m_ram_logdirty &&
   42.47                    p2mt == p2m_ram_rw )
   42.48                      for ( i = 0; i < 512; i++ )
   42.49                          paging_mark_dirty(d, mfn_x(mfn)-offset+i);
   42.50              }
   42.51              else
   42.52 -                ept_entry->mfn = mfn_x(mfn);
   42.53 +            {
   42.54 +                if ( ept_entry->mfn == mfn_x(mfn) )
   42.55 +                    need_modify_vtd_table = 0;
   42.56 +                else
   42.57 +                    ept_entry->mfn = mfn_x(mfn);
   42.58 +            }
   42.59  
   42.60              ept_entry->avail1 = p2mt;
   42.61              ept_entry->rsvd = 0;
   42.62 @@ -239,7 +249,10 @@ static int
   42.63                                                  &igmt, direct_mmio);
   42.64          split_ept_entry->igmt = igmt;
   42.65  
   42.66 -        split_ept_entry->mfn = mfn_x(mfn);
   42.67 +        if ( split_ept_entry->mfn == mfn_x(mfn) )
   42.68 +            need_modify_vtd_table = 0;
   42.69 +        else
   42.70 +            split_ept_entry->mfn = mfn_x(mfn);
   42.71          split_ept_entry->avail1 = p2mt;
   42.72          ept_p2m_type_to_flags(split_ept_entry, p2mt);
   42.73  
   42.74 @@ -289,17 +302,6 @@ out:
   42.75      return rv;
   42.76  }
   42.77  
   42.78 -static int
   42.79 -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
   42.80 -              unsigned int order, p2m_type_t p2mt)
   42.81 -{
   42.82 -    /* ept_set_entry() are called from set_entry(),
   42.83 -     * We should always create VT-d page table acording 
   42.84 -     * to the gfn to mfn translations changes.
   42.85 -     */
   42.86 -    return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); 
   42.87 -}
   42.88 -
   42.89  /* Read ept p2m entries */
   42.90  static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t,
   42.91      p2m_query_t q)
   42.92 @@ -393,6 +395,21 @@ static mfn_t ept_get_entry_current(unsig
   42.93      return ept_get_entry(current->domain, gfn, t, q);
   42.94  }
   42.95  
   42.96 +/* To test if the new emt type is the same with old,
   42.97 + * return 1 to not to reset ept entry.
   42.98 + */
   42.99 +static int need_modify_ept_entry(struct domain *d, unsigned long gfn,
  42.100 +                                    unsigned long mfn, uint8_t o_igmt,
  42.101 +                                    uint8_t o_emt, p2m_type_t p2mt)
  42.102 +{
  42.103 +    uint8_t igmt, emt;
  42.104 +    emt = epte_get_entry_emt(d, gfn, mfn, &igmt, 
  42.105 +                                (p2mt == p2m_mmio_direct));
  42.106 +    if ( (emt == o_emt) && (igmt == o_igmt) )
  42.107 +        return 0;
  42.108 +    return 1; 
  42.109 +}
  42.110 +
  42.111  void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
  42.112                   unsigned long end_gfn)
  42.113  {
  42.114 @@ -401,6 +418,7 @@ void ept_change_entry_emt_with_range(str
  42.115      uint64_t epte;
  42.116      int order = 0;
  42.117      unsigned long mfn;
  42.118 +    uint8_t o_igmt, o_emt;
  42.119  
  42.120      for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
  42.121      {
  42.122 @@ -410,7 +428,9 @@ void ept_change_entry_emt_with_range(str
  42.123          mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT;
  42.124          if ( !mfn_valid(mfn) )
  42.125              continue;
  42.126 -        p2mt = (epte & EPTE_AVAIL1_MASK) >> 8;
  42.127 +        p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT;
  42.128 +        o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT;
  42.129 +        o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT;
  42.130          order = 0;
  42.131  
  42.132          if ( epte & EPTE_SUPER_PAGE_MASK )
  42.133 @@ -422,30 +442,26 @@ void ept_change_entry_emt_with_range(str
  42.134                   * Set emt for super page.
  42.135                   */
  42.136                  order = EPT_TABLE_ORDER;
  42.137 -                /* vmx_set_uc_mode() dont' touch the gfn to mfn
  42.138 -                 * translations, only modify the emt field of the EPT entries.
  42.139 -                 * so we need not modify the current VT-d page tables.
  42.140 -                 */
  42.141 -                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
  42.142 +                if ( need_modify_ept_entry(d, gfn, mfn, 
  42.143 +                                            o_igmt, o_emt, p2mt) )
  42.144 +                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
  42.145                  gfn += 0x1FF;
  42.146              }
  42.147              else
  42.148              {
  42.149 -                /* 1)change emt for partial entries of the 2m area.
  42.150 -                 * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
  42.151 -                 * translations, only modify the emt field of the EPT entries.
  42.152 -                 * so we need not modify the current VT-d page tables.
  42.153 -                 */
  42.154 -                _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0);
  42.155 +                /* change emt for partial entries of the 2m area. */
  42.156 +                if ( need_modify_ept_entry(d, gfn, mfn, 
  42.157 +                                            o_igmt, o_emt, p2mt) )
  42.158 +                    ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
  42.159                  gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
  42.160              }
  42.161          }
  42.162 -        else /* 1)gfn assigned with 4k
  42.163 -              * 2)vmx_set_uc_mode() dont' touch the gfn to mfn
  42.164 -              * translations, only modify the emt field of the EPT entries.
  42.165 -              * so we need not modify the current VT-d page tables.
  42.166 -             */
  42.167 -            _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0);
  42.168 +        else /* gfn assigned with 4k */
  42.169 +        {
  42.170 +            if ( need_modify_ept_entry(d, gfn, mfn, 
  42.171 +                                            o_igmt, o_emt, p2mt) )
  42.172 +                ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
  42.173 +        }
  42.174      }
  42.175  }
  42.176  
    43.1 --- a/xen/arch/x86/mm/p2m.c	Fri Feb 13 10:56:01 2009 +0900
    43.2 +++ b/xen/arch/x86/mm/p2m.c	Fri Feb 13 11:22:28 2009 +0900
    43.3 @@ -89,11 +89,11 @@
    43.4  
    43.5  /* Override macros from asm/page.h to make them work with mfn_t */
    43.6  #undef mfn_to_page
    43.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m))
    43.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
    43.9  #undef mfn_valid
   43.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   43.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
   43.12  #undef page_to_mfn
   43.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   43.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
   43.15  
   43.16  
   43.17  /* PTE flags for the various types of p2m entry */
   43.18 @@ -175,7 +175,7 @@ p2m_next_level(struct domain *d, mfn_t *
   43.19          struct page_info *pg = d->arch.p2m->alloc_page(d);
   43.20          if ( pg == NULL )
   43.21              return 0;
   43.22 -        list_add_tail(&pg->list, &d->arch.p2m->pages);
   43.23 +        page_list_add_tail(pg, &d->arch.p2m->pages);
   43.24          pg->u.inuse.type_info = type | 1 | PGT_validated;
   43.25          pg->count_info = 1;
   43.26  
   43.27 @@ -214,7 +214,7 @@ p2m_next_level(struct domain *d, mfn_t *
   43.28          struct page_info *pg = d->arch.p2m->alloc_page(d);
   43.29          if ( pg == NULL )
   43.30              return 0;
   43.31 -        list_add_tail(&pg->list, &d->arch.p2m->pages);
   43.32 +        page_list_add_tail(pg, &d->arch.p2m->pages);
   43.33          pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
   43.34          pg->count_info = 1;
   43.35          
   43.36 @@ -300,18 +300,18 @@ p2m_pod_cache_add(struct domain *d,
   43.37      for(i=0; i < 1 << order ; i++)
   43.38      {
   43.39          p = page + i;
   43.40 -        list_del(&p->list);
   43.41 +        page_list_del(p, &d->page_list);
   43.42      }
   43.43  
   43.44      /* Then add the first one to the appropriate populate-on-demand list */
   43.45      switch(order)
   43.46      {
   43.47      case 9:
   43.48 -        list_add_tail(&page->list, &p2md->pod.super); /* lock: page_alloc */
   43.49 +        page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
   43.50          p2md->pod.count += 1 << order;
   43.51          break;
   43.52      case 0:
   43.53 -        list_add_tail(&page->list, &p2md->pod.single); /* lock: page_alloc */
   43.54 +        page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
   43.55          p2md->pod.count += 1 ;
   43.56          break;
   43.57      default:
   43.58 @@ -334,54 +334,51 @@ static struct page_info * p2m_pod_cache_
   43.59      struct page_info *p = NULL;
   43.60      int i;
   43.61  
   43.62 -    if ( order == 9 && list_empty(&p2md->pod.super) )
   43.63 +    if ( order == 9 && page_list_empty(&p2md->pod.super) )
   43.64      {
   43.65          return NULL;
   43.66      }
   43.67 -    else if ( order == 0 && list_empty(&p2md->pod.single) )
   43.68 +    else if ( order == 0 && page_list_empty(&p2md->pod.single) )
   43.69      {
   43.70          unsigned long mfn;
   43.71          struct page_info *q;
   43.72  
   43.73 -        BUG_ON( list_empty(&p2md->pod.super) );
   43.74 +        BUG_ON( page_list_empty(&p2md->pod.super) );
   43.75  
   43.76          /* Break up a superpage to make single pages. NB count doesn't
   43.77           * need to be adjusted. */
   43.78          printk("%s: Breaking up superpage.\n", __func__);
   43.79 -        p = list_entry(p2md->pod.super.next, struct page_info, list);
   43.80 -        list_del(&p->list);
   43.81 +        p = page_list_remove_head(&p2md->pod.super);
   43.82          mfn = mfn_x(page_to_mfn(p));
   43.83  
   43.84          for ( i=0; i<(1<<9); i++ )
   43.85          {
   43.86              q = mfn_to_page(_mfn(mfn+i));
   43.87 -            list_add_tail(&q->list, &p2md->pod.single);
   43.88 +            page_list_add_tail(q, &p2md->pod.single);
   43.89          }
   43.90      }
   43.91  
   43.92      switch ( order )
   43.93      {
   43.94      case 9:
   43.95 -        BUG_ON( list_empty(&p2md->pod.super) );
   43.96 -        p = list_entry(p2md->pod.super.next, struct page_info, list); 
   43.97 +        BUG_ON( page_list_empty(&p2md->pod.super) );
   43.98 +        p = page_list_remove_head(&p2md->pod.super);
   43.99          p2md->pod.count -= 1 << order; /* Lock: page_alloc */
  43.100          break;
  43.101      case 0:
  43.102 -        BUG_ON( list_empty(&p2md->pod.single) );
  43.103 -        p = list_entry(p2md->pod.single.next, struct page_info, list);
  43.104 +        BUG_ON( page_list_empty(&p2md->pod.single) );
  43.105 +        p = page_list_remove_head(&p2md->pod.single);
  43.106          p2md->pod.count -= 1;
  43.107          break;
  43.108      default:
  43.109          BUG();
  43.110      }
  43.111  
  43.112 -    list_del(&p->list);
  43.113 -
  43.114      /* Put the pages back on the domain page_list */
  43.115      for ( i = 0 ; i < (1 << order) ; i++ )
  43.116      {
  43.117          BUG_ON(page_get_owner(p + i) != d);
  43.118 -        list_add_tail(&p[i].list, &d->page_list);
  43.119 +        page_list_add_tail(p + i, &d->page_list);
  43.120      }
  43.121  
  43.122      return p;
  43.123 @@ -425,7 +422,7 @@ p2m_pod_set_cache_target(struct domain *
  43.124          spin_lock(&d->page_alloc_lock);
  43.125  
  43.126          if ( (p2md->pod.count - pod_target) > (1>>9)
  43.127 -             && !list_empty(&p2md->pod.super) )
  43.128 +             && !page_list_empty(&p2md->pod.super) )
  43.129              order = 9;
  43.130          else
  43.131              order = 0;
  43.132 @@ -535,38 +532,27 @@ void
  43.133  p2m_pod_empty_cache(struct domain *d)
  43.134  {
  43.135      struct p2m_domain *p2md = d->arch.p2m;
  43.136 -    struct list_head *q, *p;
  43.137 +    struct page_info *page;
  43.138  
  43.139      spin_lock(&d->page_alloc_lock);
  43.140  
  43.141 -    list_for_each_safe(p, q, &p2md->pod.super) /* lock: page_alloc */
  43.142 +    while ( (page = page_list_remove_head(&p2md->pod.super)) )
  43.143      {
  43.144          int i;
  43.145 -        struct page_info *page;
  43.146              
  43.147 -        list_del(p);
  43.148 -            
  43.149 -        page = list_entry(p, struct page_info, list);
  43.150 -
  43.151          for ( i = 0 ; i < (1 << 9) ; i++ )
  43.152          {
  43.153              BUG_ON(page_get_owner(page + i) != d);
  43.154 -            list_add_tail(&page[i].list, &d->page_list);
  43.155 +            page_list_add_tail(page + i, &d->page_list);
  43.156          }
  43.157  
  43.158          p2md->pod.count -= 1<<9;
  43.159      }
  43.160  
  43.161 -    list_for_each_safe(p, q, &p2md->pod.single)
  43.162 +    while ( (page = page_list_remove_head(&p2md->pod.single)) )
  43.163      {
  43.164 -        struct page_info *page;
  43.165 -            
  43.166 -        list_del(p);
  43.167 -            
  43.168 -        page = list_entry(p, struct page_info, list);
  43.169 -
  43.170          BUG_ON(page_get_owner(page) != d);
  43.171 -        list_add_tail(&page->list, &d->page_list);
  43.172 +        page_list_add_tail(page, &d->page_list);
  43.173  
  43.174          p2md->pod.count -= 1;
  43.175      }
  43.176 @@ -952,7 +938,7 @@ p2m_pod_emergency_sweep_super(struct dom
  43.177           * NB that this is a zero-sum game; we're increasing our cache size
  43.178           * by increasing our 'debt'.  Since we hold the p2m lock,
  43.179           * (entry_count - count) must remain the same. */
  43.180 -        if ( !list_empty(&p2md->pod.super) &&  i < limit )
  43.181 +        if ( !page_list_empty(&p2md->pod.super) &&  i < limit )
  43.182              break;
  43.183      }
  43.184  
  43.185 @@ -1035,12 +1021,12 @@ p2m_pod_demand_populate(struct domain *d
  43.186      }
  43.187  
  43.188      /* If we're low, start a sweep */
  43.189 -    if ( order == 9 && list_empty(&p2md->pod.super) )
  43.190 +    if ( order == 9 && page_list_empty(&p2md->pod.super) )
  43.191          p2m_pod_emergency_sweep_super(d);
  43.192  
  43.193 -    if ( list_empty(&p2md->pod.single) &&
  43.194 +    if ( page_list_empty(&p2md->pod.single) &&
  43.195           ( ( order == 0 )
  43.196 -           || (order == 9 && list_empty(&p2md->pod.super) ) ) )
  43.197 +           || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
  43.198          p2m_pod_emergency_sweep(d);
  43.199  
  43.200      /* Keep track of the highest gfn demand-populated by a guest fault */
  43.201 @@ -1477,9 +1463,9 @@ int p2m_init(struct domain *d)
  43.202  
  43.203      memset(p2m, 0, sizeof(*p2m));
  43.204      p2m_lock_init(p2m);
  43.205 -    INIT_LIST_HEAD(&p2m->pages);
  43.206 -    INIT_LIST_HEAD(&p2m->pod.super);
  43.207 -    INIT_LIST_HEAD(&p2m->pod.single);
  43.208 +    INIT_PAGE_LIST_HEAD(&p2m->pages);
  43.209 +    INIT_PAGE_LIST_HEAD(&p2m->pod.super);
  43.210 +    INIT_PAGE_LIST_HEAD(&p2m->pod.single);
  43.211  
  43.212      p2m->set_entry = p2m_set_entry;
  43.213      p2m->get_entry = p2m_gfn_to_mfn;
  43.214 @@ -1540,7 +1526,6 @@ int p2m_alloc_table(struct domain *d,
  43.215  
  43.216  {
  43.217      mfn_t mfn = _mfn(INVALID_MFN);
  43.218 -    struct list_head *entry;
  43.219      struct page_info *page, *p2m_top;
  43.220      unsigned int page_count = 0;
  43.221      unsigned long gfn = -1UL;
  43.222 @@ -1566,7 +1551,7 @@ int p2m_alloc_table(struct domain *d,
  43.223          p2m_unlock(p2m);
  43.224          return -ENOMEM;
  43.225      }
  43.226 -    list_add_tail(&p2m_top->list, &p2m->pages);
  43.227 +    page_list_add_tail(p2m_top, &p2m->pages);
  43.228  
  43.229      p2m_top->count_info = 1;
  43.230      p2m_top->u.inuse.type_info =
  43.231 @@ -1587,11 +1572,8 @@ int p2m_alloc_table(struct domain *d,
  43.232          goto error;
  43.233  
  43.234      /* Copy all existing mappings from the page list and m2p */
  43.235 -    for ( entry = d->page_list.next;
  43.236 -          entry != &d->page_list;
  43.237 -          entry = entry->next )
  43.238 +    page_list_for_each(page, &d->page_list)
  43.239      {
  43.240 -        page = list_entry(entry, struct page_info, list);
  43.241          mfn = page_to_mfn(page);
  43.242          gfn = get_gpfn_from_mfn(mfn_x(mfn));
  43.243          page_count++;
  43.244 @@ -1621,19 +1603,14 @@ void p2m_teardown(struct domain *d)
  43.245  /* Return all the p2m pages to Xen.
  43.246   * We know we don't have any extra mappings to these pages */
  43.247  {
  43.248 -    struct list_head *entry, *n;
  43.249      struct page_info *pg;
  43.250      struct p2m_domain *p2m = d->arch.p2m;
  43.251  
  43.252      p2m_lock(p2m);
  43.253      d->arch.phys_table = pagetable_null();
  43.254  
  43.255 -    list_for_each_safe(entry, n, &p2m->pages)
  43.256 -    {
  43.257 -        pg = list_entry(entry, struct page_info, list);
  43.258 -        list_del(entry);
  43.259 +    while ( (pg = page_list_remove_head(&p2m->pages)) )
  43.260          p2m->free_page(d, pg);
  43.261 -    }
  43.262      p2m_unlock(p2m);
  43.263  }
  43.264  
    44.1 --- a/xen/arch/x86/mm/paging.c	Fri Feb 13 10:56:01 2009 +0900
    44.2 +++ b/xen/arch/x86/mm/paging.c	Fri Feb 13 11:22:28 2009 +0900
    44.3 @@ -47,11 +47,11 @@
    44.4  /************************************************/
    44.5  /* Override macros from asm/page.h to make them work with mfn_t */
    44.6  #undef mfn_to_page
    44.7 -#define mfn_to_page(_m) (frame_table + mfn_x(_m))
    44.8 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
    44.9  #undef mfn_valid
   44.10 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   44.11 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
   44.12  #undef page_to_mfn
   44.13 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   44.14 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
   44.15  
   44.16  /* The log-dirty lock.  This protects the log-dirty bitmap from
   44.17   * concurrent accesses (and teardowns, etc).
    45.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Feb 13 10:56:01 2009 +0900
    45.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Feb 13 11:22:28 2009 +0900
    45.3 @@ -48,9 +48,9 @@ void shadow_domain_init(struct domain *d
    45.4      int i;
    45.5      shadow_lock_init(d);
    45.6      for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
    45.7 -        INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
    45.8 -    INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
    45.9 -    INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
   45.10 +        INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
   45.11 +    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
   45.12 +    INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
   45.13  
   45.14      /* Use shadow pagetables for log-dirty support */
   45.15      paging_log_dirty_init(d, shadow_enable_log_dirty, 
   45.16 @@ -1291,9 +1291,9 @@ static inline int space_is_available(
   45.17      for ( ; order <= shadow_max_order(d); ++order )
   45.18      {
   45.19          unsigned int n = count;
   45.20 -        const struct list_head *p;
   45.21 -
   45.22 -        list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
   45.23 +        const struct page_info *sp;
   45.24 +
   45.25 +        page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] )
   45.26              if ( --n == 0 )
   45.27                  return 1;
   45.28          count = (count + 1) >> 1;
   45.29 @@ -1306,8 +1306,8 @@ static inline int space_is_available(
   45.30   * non-Xen mappings in this top-level shadow mfn */
   45.31  static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
   45.32  {
   45.33 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
   45.34 -    switch ( sp->type )
   45.35 +    struct page_info *sp = mfn_to_page(smfn);
   45.36 +    switch ( sp->u.sh.type )
   45.37      {
   45.38      case SH_type_l2_32_shadow:
   45.39          SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
   45.40 @@ -1322,7 +1322,7 @@ static void shadow_unhook_mappings(struc
   45.41          break;
   45.42  #endif
   45.43      default:
   45.44 -        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
   45.45 +        SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type);
   45.46          BUG();
   45.47      }
   45.48  }
   45.49 @@ -1334,7 +1334,7 @@ static inline void trace_shadow_prealloc
   45.50          /* Convert smfn to gfn */
   45.51          unsigned long gfn;
   45.52          ASSERT(mfn_valid(smfn));
   45.53 -        gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
   45.54 +        gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
   45.55          __trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
   45.56                      sizeof(gfn), (unsigned char*)&gfn);
   45.57      }
   45.58 @@ -1350,8 +1350,7 @@ static void _shadow_prealloc(
   45.59      /* Need a vpcu for calling unpins; for now, since we don't have
   45.60       * per-vcpu shadows, any will do */
   45.61      struct vcpu *v, *v2;
   45.62 -    struct list_head *l, *t;
   45.63 -    struct shadow_page_info *sp;
   45.64 +    struct page_info *sp, *t;
   45.65      mfn_t smfn;
   45.66      int i;
   45.67  
   45.68 @@ -1365,10 +1364,9 @@ static void _shadow_prealloc(
   45.69  
   45.70      /* Stage one: walk the list of pinned pages, unpinning them */
   45.71      perfc_incr(shadow_prealloc_1);
   45.72 -    list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
   45.73 +    page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows)
   45.74      {
   45.75 -        sp = list_entry(l, struct shadow_page_info, list);
   45.76 -        smfn = shadow_page_to_mfn(sp);
   45.77 +        smfn = page_to_mfn(sp);
   45.78  
   45.79          /* Unpin this top-level shadow */
   45.80          trace_shadow_prealloc_unpin(d, smfn);
   45.81 @@ -1427,8 +1425,7 @@ void shadow_prealloc(struct domain *d, u
   45.82   * this domain's shadows */
   45.83  static void shadow_blow_tables(struct domain *d) 
   45.84  {
   45.85 -    struct list_head *l, *t;
   45.86 -    struct shadow_page_info *sp;
   45.87 +    struct page_info *sp, *t;
   45.88      struct vcpu *v = d->vcpu[0];
   45.89      mfn_t smfn;
   45.90      int i;
   45.91 @@ -1436,10 +1433,9 @@ static void shadow_blow_tables(struct do
   45.92      ASSERT(v != NULL);
   45.93  
   45.94      /* Pass one: unpin all pinned pages */
   45.95 -    list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
   45.96 +    page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows)
   45.97      {
   45.98 -        sp = list_entry(l, struct shadow_page_info, list);
   45.99 -        smfn = shadow_page_to_mfn(sp);
  45.100 +        smfn = page_to_mfn(sp);
  45.101          sh_unpin(v, smfn);
  45.102      }
  45.103          
  45.104 @@ -1493,6 +1489,18 @@ static __init int shadow_blow_tables_key
  45.105  __initcall(shadow_blow_tables_keyhandler_init);
  45.106  #endif /* !NDEBUG */
  45.107  
  45.108 +static inline struct page_info *
  45.109 +next_shadow(const struct page_info *sp)
  45.110 +{
  45.111 +    return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
  45.112 +}
  45.113 +
  45.114 +static inline void
  45.115 +set_next_shadow(struct page_info *sp, struct page_info *next)
  45.116 +{
  45.117 +    sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
  45.118 +}
  45.119 +
  45.120  /* Allocate another shadow's worth of (contiguous, aligned) pages,
  45.121   * and fill in the type and backpointer fields of their page_infos. 
  45.122   * Never fails to allocate. */
  45.123 @@ -1500,7 +1508,7 @@ mfn_t shadow_alloc(struct domain *d,
  45.124                      u32 shadow_type,
  45.125                      unsigned long backpointer)
  45.126  {
  45.127 -    struct shadow_page_info *sp = NULL;
  45.128 +    struct page_info *sp = NULL;
  45.129      unsigned int order = shadow_order(shadow_type);
  45.130      cpumask_t mask;
  45.131      void *p;
  45.132 @@ -1515,7 +1523,7 @@ mfn_t shadow_alloc(struct domain *d,
  45.133  
  45.134      /* Find smallest order which can satisfy the request. */
  45.135      for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
  45.136 -        if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
  45.137 +        if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) )
  45.138              goto found;
  45.139      
  45.140      /* If we get here, we failed to allocate. This should never happen.
  45.141 @@ -1526,16 +1534,12 @@ mfn_t shadow_alloc(struct domain *d,
  45.142      BUG();
  45.143  
  45.144   found:
  45.145 -    sp = list_entry(d->arch.paging.shadow.freelists[i].next, 
  45.146 -                    struct shadow_page_info, list);
  45.147 -    list_del(&sp->list);
  45.148 -            
  45.149      /* We may have to halve the chunk a number of times. */
  45.150      while ( i != order )
  45.151      {
  45.152          i--;
  45.153 -        sp->order = i;
  45.154 -        list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
  45.155 +        sp->v.free.order = i;
  45.156 +        page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]);
  45.157          sp += 1 << i;
  45.158      }
  45.159      d->arch.paging.shadow.free_pages -= 1 << order;
  45.160 @@ -1553,26 +1557,26 @@ mfn_t shadow_alloc(struct domain *d,
  45.161              flush_tlb_mask(mask);
  45.162          }
  45.163          /* Now safe to clear the page for reuse */
  45.164 -        p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
  45.165 +        p = sh_map_domain_page(page_to_mfn(sp+i));
  45.166          ASSERT(p != NULL);
  45.167          clear_page(p);
  45.168          sh_unmap_domain_page(p);
  45.169 -        INIT_LIST_HEAD(&sp[i].list);
  45.170 -        sp[i].type = shadow_type;
  45.171 -        sp[i].pinned = 0;
  45.172 -        sp[i].count = 0;
  45.173 -        sp[i].backpointer = backpointer;
  45.174 -        sp[i].next_shadow = NULL;
  45.175 +        INIT_PAGE_LIST_ENTRY(&sp[i].list);
  45.176 +        sp[i].u.sh.type = shadow_type;
  45.177 +        sp[i].u.sh.pinned = 0;
  45.178 +        sp[i].u.sh.count = 0;
  45.179 +        sp[i].v.sh.back = backpointer;
  45.180 +        set_next_shadow(&sp[i], NULL);
  45.181          perfc_incr(shadow_alloc_count);
  45.182      }
  45.183 -    return shadow_page_to_mfn(sp);
  45.184 +    return page_to_mfn(sp);
  45.185  }
  45.186  
  45.187  
  45.188  /* Return some shadow pages to the pool. */
  45.189  void shadow_free(struct domain *d, mfn_t smfn)
  45.190  {
  45.191 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn); 
  45.192 +    struct page_info *sp = mfn_to_page(smfn); 
  45.193      u32 shadow_type;
  45.194      unsigned long order;
  45.195      unsigned long mask;
  45.196 @@ -1581,7 +1585,7 @@ void shadow_free(struct domain *d, mfn_t
  45.197      ASSERT(shadow_locked_by_me(d));
  45.198      perfc_incr(shadow_free);
  45.199  
  45.200 -    shadow_type = sp->type;
  45.201 +    shadow_type = sp->u.sh.type;
  45.202      ASSERT(shadow_type != SH_type_none);
  45.203      ASSERT(shadow_type != SH_type_p2m_table);
  45.204      order = shadow_order(shadow_type);
  45.205 @@ -1605,7 +1609,7 @@ void shadow_free(struct domain *d, mfn_t
  45.206          }
  45.207  #endif
  45.208          /* Strip out the type: this is now a free shadow page */
  45.209 -        sp[i].type = 0;
  45.210 +        sp[i].u.sh.type = 0;
  45.211          /* Remember the TLB timestamp so we will know whether to flush 
  45.212           * TLBs when we reuse the page.  Because the destructors leave the
  45.213           * contents of the pages in place, we can delay TLB flushes until
  45.214 @@ -1618,22 +1622,24 @@ void shadow_free(struct domain *d, mfn_t
  45.215      for ( ; order < shadow_max_order(d); ++order )
  45.216      {
  45.217          mask = 1 << order;
  45.218 -        if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
  45.219 +        if ( (mfn_x(page_to_mfn(sp)) & mask) ) {
  45.220              /* Merge with predecessor block? */
  45.221 -            if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
  45.222 +            if ( ((sp-mask)->u.sh.type != PGT_none) ||
  45.223 +                 ((sp-mask)->v.free.order != order) )
  45.224                  break;
  45.225 -            list_del(&(sp-mask)->list);
  45.226              sp -= mask;
  45.227 +            page_list_del(sp, &d->arch.paging.shadow.freelists[order]);
  45.228          } else {
  45.229              /* Merge with successor block? */
  45.230 -            if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
  45.231 +            if ( ((sp+mask)->u.sh.type != PGT_none) ||
  45.232 +                 ((sp+mask)->v.free.order != order) )
  45.233                  break;
  45.234 -            list_del(&(sp+mask)->list);
  45.235 +            page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]);
  45.236          }
  45.237      }
  45.238  
  45.239 -    sp->order = order;
  45.240 -    list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
  45.241 +    sp->v.free.order = order;
  45.242 +    page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
  45.243  }
  45.244  
  45.245  /* Divert some memory from the pool to be used by the p2m mapping.
  45.246 @@ -1672,7 +1678,7 @@ sh_alloc_p2m_pages(struct domain *d)
  45.247           */
  45.248          page_set_owner(&pg[i], d);
  45.249          pg[i].count_info = 1;
  45.250 -        list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
  45.251 +        page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist);
  45.252      }
  45.253      return 1;
  45.254  }
  45.255 @@ -1681,25 +1687,22 @@ sh_alloc_p2m_pages(struct domain *d)
  45.256  static struct page_info *
  45.257  shadow_alloc_p2m_page(struct domain *d)
  45.258  {
  45.259 -    struct list_head *entry;
  45.260      struct page_info *pg;
  45.261      mfn_t mfn;
  45.262      void *p;
  45.263      
  45.264      shadow_lock(d);
  45.265  
  45.266 -    if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
  45.267 +    if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) &&
  45.268           !sh_alloc_p2m_pages(d) )
  45.269      {
  45.270          shadow_unlock(d);
  45.271          return NULL;
  45.272      }
  45.273 -    entry = d->arch.paging.shadow.p2m_freelist.next;
  45.274 -    list_del(entry);
  45.275 +    pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist);
  45.276  
  45.277      shadow_unlock(d);
  45.278  
  45.279 -    pg = list_entry(entry, struct page_info, list);
  45.280      mfn = page_to_mfn(pg);
  45.281      p = sh_map_domain_page(mfn);
  45.282      clear_page(p);
  45.283 @@ -1780,7 +1783,7 @@ static unsigned int sh_set_allocation(st
  45.284                                        unsigned int pages,
  45.285                                        int *preempted)
  45.286  {
  45.287 -    struct shadow_page_info *sp;
  45.288 +    struct page_info *sp;
  45.289      unsigned int lower_bound;
  45.290      unsigned int j, order = shadow_max_order(d);
  45.291  
  45.292 @@ -1802,7 +1805,7 @@ static unsigned int sh_set_allocation(st
  45.293          if ( d->arch.paging.shadow.total_pages < pages ) 
  45.294          {
  45.295              /* Need to allocate more memory from domheap */
  45.296 -            sp = (struct shadow_page_info *)
  45.297 +            sp = (struct page_info *)
  45.298                  alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
  45.299              if ( sp == NULL ) 
  45.300              { 
  45.301 @@ -1813,23 +1816,26 @@ static unsigned int sh_set_allocation(st
  45.302              d->arch.paging.shadow.total_pages += 1 << order;
  45.303              for ( j = 0; j < 1U << order; j++ )
  45.304              {
  45.305 -                sp[j].type = 0;  
  45.306 -                sp[j].pinned = 0;
  45.307 -                sp[j].count = 0;
  45.308 -                sp[j].mbz = 0;
  45.309 +                sp[j].u.sh.type = 0;
  45.310 +                sp[j].u.sh.pinned = 0;
  45.311 +                sp[j].u.sh.count = 0;
  45.312                  sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
  45.313              }
  45.314 -            sp->order = order;
  45.315 -            list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
  45.316 +            sp->v.free.order = order;
  45.317 +            page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
  45.318          } 
  45.319          else if ( d->arch.paging.shadow.total_pages > pages ) 
  45.320          {
  45.321              /* Need to return memory to domheap */
  45.322              _shadow_prealloc(d, order, 1);
  45.323 -            ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
  45.324 -            sp = list_entry(d->arch.paging.shadow.freelists[order].next,
  45.325 -                            struct shadow_page_info, list);
  45.326 -            list_del(&sp->list);
  45.327 +            sp = page_list_remove_head(&d->arch.paging.shadow.freelists[order]);
  45.328 +            ASSERT(sp);
  45.329 +            /*
  45.330 +             * The pages were allocated anonymously, but the owner field
  45.331 +             * gets overwritten normally, so need to clear it here.
  45.332 +             */
  45.333 +            for ( j = 0; j < 1U << order; j++ )
  45.334 +                page_set_owner(&((struct page_info *)sp)[j], NULL);
  45.335              d->arch.paging.shadow.free_pages -= 1 << order;
  45.336              d->arch.paging.shadow.total_pages -= 1 << order;
  45.337              free_domheap_pages((struct page_info *)sp, order);
  45.338 @@ -1880,7 +1886,7 @@ static inline key_t sh_hash(unsigned lon
  45.339  static void sh_hash_audit_bucket(struct domain *d, int bucket)
  45.340  /* Audit one bucket of the hash table */
  45.341  {
  45.342 -    struct shadow_page_info *sp, *x;
  45.343 +    struct page_info *sp, *x;
  45.344  
  45.345      if ( !(SHADOW_AUDIT_ENABLE) )
  45.346          return;
  45.347 @@ -1889,38 +1895,39 @@ static void sh_hash_audit_bucket(struct 
  45.348      while ( sp )
  45.349      {
  45.350          /* Not a shadow? */
  45.351 -        BUG_ON( sp->mbz != 0 );
  45.352 +        BUG_ON( sp->count_info != 0 );
  45.353          /* Bogus type? */
  45.354 -        BUG_ON( sp->type == 0 ); 
  45.355 -        BUG_ON( sp->type > SH_type_max_shadow );
  45.356 +        BUG_ON( sp->u.sh.type == 0 );
  45.357 +        BUG_ON( sp->u.sh.type > SH_type_max_shadow );
  45.358          /* Wrong bucket? */
  45.359 -        BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket ); 
  45.360 +        BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
  45.361          /* Duplicate entry? */
  45.362 -        for ( x = sp->next_shadow; x; x = x->next_shadow )
  45.363 -            BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
  45.364 +        for ( x = next_shadow(sp); x; x = next_shadow(x) )
  45.365 +            BUG_ON( x->v.sh.back == sp->v.sh.back &&
  45.366 +                    x->u.sh.type == sp->u.sh.type );
  45.367          /* Follow the backpointer to the guest pagetable */
  45.368 -        if ( sp->type != SH_type_fl1_32_shadow
  45.369 -             && sp->type != SH_type_fl1_pae_shadow
  45.370 -             && sp->type != SH_type_fl1_64_shadow )
  45.371 +        if ( sp->u.sh.type != SH_type_fl1_32_shadow
  45.372 +             && sp->u.sh.type != SH_type_fl1_pae_shadow
  45.373 +             && sp->u.sh.type != SH_type_fl1_64_shadow )
  45.374          {
  45.375 -            struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
  45.376 +            struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
  45.377              /* Bad shadow flags on guest page? */
  45.378 -            BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
  45.379 +            BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
  45.380              /* Bad type count on guest page? */
  45.381  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
  45.382 -            if ( sp->type == SH_type_l1_32_shadow
  45.383 -                 || sp->type == SH_type_l1_pae_shadow
  45.384 -                 || sp->type == SH_type_l1_64_shadow )
  45.385 +            if ( sp->u.sh.type == SH_type_l1_32_shadow
  45.386 +                 || sp->u.sh.type == SH_type_l1_pae_shadow
  45.387 +                 || sp->u.sh.type == SH_type_l1_64_shadow )
  45.388              {
  45.389                  if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
  45.390                       && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
  45.391                  {
  45.392                      if ( !page_is_out_of_sync(gpg) )
  45.393                      {
  45.394 -                        SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
  45.395 +                        SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
  45.396                                       " and not OOS but has typecount %#lx\n",
  45.397 -                                     sp->backpointer, 
  45.398 -                                     mfn_x(shadow_page_to_mfn(sp)), 
  45.399 +                                     sp->v.sh.back,
  45.400 +                                     mfn_x(page_to_mfn(sp)), 
  45.401                                       gpg->u.inuse.type_info);
  45.402                          BUG();
  45.403                      }
  45.404 @@ -1931,15 +1938,15 @@ static void sh_hash_audit_bucket(struct 
  45.405              if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 
  45.406                   && (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
  45.407              {
  45.408 -                SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
  45.409 +                SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
  45.410                               " but has typecount %#lx\n",
  45.411 -                             sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 
  45.412 +                             sp->v.sh.back, mfn_x(page_to_mfn(sp)),
  45.413                               gpg->u.inuse.type_info);
  45.414                  BUG();
  45.415              }
  45.416          }
  45.417          /* That entry was OK; on we go */
  45.418 -        sp = sp->next_shadow;
  45.419 +        sp = next_shadow(sp);
  45.420      }
  45.421  }
  45.422  
  45.423 @@ -1972,15 +1979,15 @@ static void sh_hash_audit(struct domain 
  45.424   * Returns 0 for success, 1 for error. */
  45.425  static int shadow_hash_alloc(struct domain *d)
  45.426  {
  45.427 -    struct shadow_page_info **table;
  45.428 +    struct page_info **table;
  45.429  
  45.430      ASSERT(shadow_locked_by_me(d));
  45.431      ASSERT(!d->arch.paging.shadow.hash_table);
  45.432  
  45.433 -    table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
  45.434 +    table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
  45.435      if ( !table ) return 1;
  45.436      memset(table, 0, 
  45.437 -           SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
  45.438 +           SHADOW_HASH_BUCKETS * sizeof (struct page_info *));
  45.439      d->arch.paging.shadow.hash_table = table;
  45.440      return 0;
  45.441  }
  45.442 @@ -2002,7 +2009,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
  45.443   * or INVALID_MFN if it doesn't exist */
  45.444  {
  45.445      struct domain *d = v->domain;
  45.446 -    struct shadow_page_info *sp, *prev;
  45.447 +    struct page_info *sp, *prev;
  45.448      key_t key;
  45.449  
  45.450      ASSERT(shadow_locked_by_me(d));
  45.451 @@ -2019,21 +2026,21 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
  45.452      prev = NULL;
  45.453      while(sp)
  45.454      {
  45.455 -        if ( sp->backpointer == n && sp->type == t )
  45.456 +        if ( sp->v.sh.back == n && sp->u.sh.type == t )
  45.457          {
  45.458              /* Pull-to-front if 'sp' isn't already the head item */
  45.459              if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
  45.460              {
  45.461                  if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
  45.462                      /* Can't reorder: someone is walking the hash chains */
  45.463 -                    return shadow_page_to_mfn(sp);
  45.464 +                    return page_to_mfn(sp);
  45.465                  else 
  45.466                  {
  45.467                      ASSERT(prev);
  45.468                      /* Delete sp from the list */
  45.469                      prev->next_shadow = sp->next_shadow;                    
  45.470                      /* Re-insert it at the head of the list */
  45.471 -                    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
  45.472 +                    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
  45.473                      d->arch.paging.shadow.hash_table[key] = sp;
  45.474                  }
  45.475              }
  45.476 @@ -2041,10 +2048,10 @@ mfn_t shadow_hash_lookup(struct vcpu *v,
  45.477              {
  45.478                  perfc_incr(shadow_hash_lookup_head);
  45.479              }
  45.480 -            return shadow_page_to_mfn(sp);
  45.481 +            return page_to_mfn(sp);
  45.482          }
  45.483          prev = sp;
  45.484 -        sp = sp->next_shadow;
  45.485 +        sp = next_shadow(sp);
  45.486      }
  45.487  
  45.488      perfc_incr(shadow_hash_lookup_miss);
  45.489 @@ -2056,7 +2063,7 @@ void shadow_hash_insert(struct vcpu *v, 
  45.490  /* Put a mapping (n,t)->smfn into the hash table */
  45.491  {
  45.492      struct domain *d = v->domain;
  45.493 -    struct shadow_page_info *sp;
  45.494 +    struct page_info *sp;
  45.495      key_t key;
  45.496      
  45.497      ASSERT(shadow_locked_by_me(d));
  45.498 @@ -2070,8 +2077,8 @@ void shadow_hash_insert(struct vcpu *v, 
  45.499      sh_hash_audit_bucket(d, key);
  45.500      
  45.501      /* Insert this shadow at the top of the bucket */
  45.502 -    sp = mfn_to_shadow_page(smfn);
  45.503 -    sp->next_shadow = d->arch.paging.shadow.hash_table[key];
  45.504 +    sp = mfn_to_page(smfn);
  45.505 +    set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
  45.506      d->arch.paging.shadow.hash_table[key] = sp;
  45.507      
  45.508      sh_hash_audit_bucket(d, key);
  45.509 @@ -2082,7 +2089,7 @@ void shadow_hash_delete(struct vcpu *v, 
  45.510  /* Excise the mapping (n,t)->smfn from the hash table */
  45.511  {
  45.512      struct domain *d = v->domain;
  45.513 -    struct shadow_page_info *sp, *x;
  45.514 +    struct page_info *sp, *x;
  45.515      key_t key;
  45.516  
  45.517      ASSERT(shadow_locked_by_me(d));
  45.518 @@ -2095,10 +2102,10 @@ void shadow_hash_delete(struct vcpu *v, 
  45.519      key = sh_hash(n, t);
  45.520      sh_hash_audit_bucket(d, key);
  45.521      
  45.522 -    sp = mfn_to_shadow_page(smfn);
  45.523 +    sp = mfn_to_page(smfn);
  45.524      if ( d->arch.paging.shadow.hash_table[key] == sp ) 
  45.525          /* Easy case: we're deleting the head item. */
  45.526 -        d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
  45.527 +        d->arch.paging.shadow.hash_table[key] = next_shadow(sp);
  45.528      else 
  45.529      {
  45.530          /* Need to search for the one we want */
  45.531 @@ -2107,15 +2114,15 @@ void shadow_hash_delete(struct vcpu *v, 
  45.532          {
  45.533              ASSERT(x); /* We can't have hit the end, since our target is
  45.534                          * still in the chain somehwere... */
  45.535 -            if ( x->next_shadow == sp ) 
  45.536 +            if ( next_shadow(x) == sp )
  45.537              {
  45.538                  x->next_shadow = sp->next_shadow;
  45.539                  break;
  45.540              }
  45.541 -            x = x->next_shadow;
  45.542 +            x = next_shadow(x);
  45.543          }
  45.544      }
  45.545 -    sp->next_shadow = NULL;
  45.546 +    set_next_shadow(sp, NULL);
  45.547  
  45.548      sh_hash_audit_bucket(d, key);
  45.549  }
  45.550 @@ -2137,7 +2144,7 @@ static void hash_foreach(struct vcpu *v,
  45.551  {
  45.552      int i, done = 0;
  45.553      struct domain *d = v->domain;
  45.554 -    struct shadow_page_info *x;
  45.555 +    struct page_info *x;
  45.556  
  45.557      /* Say we're here, to stop hash-lookups reordering the chains */
  45.558      ASSERT(shadow_locked_by_me(d));
  45.559 @@ -2149,14 +2156,14 @@ static void hash_foreach(struct vcpu *v,
  45.560          /* WARNING: This is not safe against changes to the hash table.
  45.561           * The callback *must* return non-zero if it has inserted or
  45.562           * deleted anything from the hash (lookups are OK, though). */
  45.563 -        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
  45.564 +        for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
  45.565          {
  45.566 -            if ( callback_mask & (1 << x->type) ) 
  45.567 +            if ( callback_mask & (1 << x->u.sh.type) )
  45.568              {
  45.569 -                ASSERT(x->type <= 15);
  45.570 -                ASSERT(callbacks[x->type] != NULL);
  45.571 -                done = callbacks[x->type](v, shadow_page_to_mfn(x), 
  45.572 -                                          callback_mfn);
  45.573 +                ASSERT(x->u.sh.type <= 15);
  45.574 +                ASSERT(callbacks[x->u.sh.type] != NULL);
  45.575 +                done = callbacks[x->u.sh.type](v, page_to_mfn(x),
  45.576 +                                               callback_mfn);
  45.577                  if ( done ) break;
  45.578              }
  45.579          }
  45.580 @@ -2173,8 +2180,8 @@ static void hash_foreach(struct vcpu *v,
  45.581  
  45.582  void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
  45.583  {
  45.584 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
  45.585 -    unsigned int t = sp->type;
  45.586 +    struct page_info *sp = mfn_to_page(smfn);
  45.587 +    unsigned int t = sp->u.sh.type;
  45.588  
  45.589  
  45.590      SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
  45.591 @@ -2186,7 +2193,7 @@ void sh_destroy_shadow(struct vcpu *v, m
  45.592             t == SH_type_fl1_64_shadow  || 
  45.593             t == SH_type_monitor_table  || 
  45.594             (is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
  45.595 -           (page_get_owner(mfn_to_page(_mfn(sp->backpointer))) 
  45.596 +           (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
  45.597              == v->domain)); 
  45.598  
  45.599      /* The down-shifts here are so that the switch statement is on nice
  45.600 @@ -2438,7 +2445,7 @@ int sh_remove_write_access(struct vcpu *
  45.601      {
  45.602          unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
  45.603          mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
  45.604 -        int shtype = mfn_to_shadow_page(last_smfn)->type;
  45.605 +        int shtype = mfn_to_page(last_smfn)->u.sh.type;
  45.606  
  45.607          if ( callbacks[shtype] ) 
  45.608              callbacks[shtype](v, last_smfn, gmfn);
  45.609 @@ -2481,25 +2488,25 @@ int sh_remove_write_access(struct vcpu *
  45.610  int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
  45.611                                       mfn_t smfn, unsigned long off)
  45.612  {
  45.613 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
  45.614 +    struct page_info *sp = mfn_to_page(smfn);
  45.615      
  45.616      ASSERT(mfn_valid(smfn));
  45.617      ASSERT(mfn_valid(gmfn));
  45.618      
  45.619 -    if ( sp->type == SH_type_l1_32_shadow
  45.620 -         || sp->type == SH_type_fl1_32_shadow )
  45.621 +    if ( sp->u.sh.type == SH_type_l1_32_shadow
  45.622 +         || sp->u.sh.type == SH_type_fl1_32_shadow )
  45.623      {
  45.624          return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
  45.625              (v, gmfn, smfn, off);
  45.626      }
  45.627  #if CONFIG_PAGING_LEVELS >= 3
  45.628 -    else if ( sp->type == SH_type_l1_pae_shadow
  45.629 -              || sp->type == SH_type_fl1_pae_shadow )
  45.630 +    else if ( sp->u.sh.type == SH_type_l1_pae_shadow
  45.631 +              || sp->u.sh.type == SH_type_fl1_pae_shadow )
  45.632          return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
  45.633              (v, gmfn, smfn, off);
  45.634  #if CONFIG_PAGING_LEVELS >= 4
  45.635 -    else if ( sp->type == SH_type_l1_64_shadow
  45.636 -              || sp->type == SH_type_fl1_64_shadow )
  45.637 +    else if ( sp->u.sh.type == SH_type_l1_64_shadow
  45.638 +              || sp->u.sh.type == SH_type_fl1_64_shadow )
  45.639          return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
  45.640              (v, gmfn, smfn, off);
  45.641  #endif
  45.642 @@ -2601,17 +2608,17 @@ static int sh_remove_shadow_via_pointer(
  45.643  /* Follow this shadow's up-pointer, if it has one, and remove the reference
  45.644   * found there.  Returns 1 if that was the only reference to this shadow */
  45.645  {
  45.646 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
  45.647 +    struct page_info *sp = mfn_to_page(smfn);
  45.648      mfn_t pmfn;
  45.649      void *vaddr;
  45.650      int rc;
  45.651  
  45.652 -    ASSERT(sp->type > 0);
  45.653 -    ASSERT(sp->type < SH_type_max_shadow);
  45.654 -    ASSERT(sp->type != SH_type_l2_32_shadow);
  45.655 -    ASSERT(sp->type != SH_type_l2_pae_shadow);
  45.656 -    ASSERT(sp->type != SH_type_l2h_pae_shadow);
  45.657 -    ASSERT(sp->type != SH_type_l4_64_shadow);
  45.658 +    ASSERT(sp->u.sh.type > 0);
  45.659 +    ASSERT(sp->u.sh.type < SH_type_max_shadow);
  45.660 +    ASSERT(sp->u.sh.type != SH_type_l2_32_shadow);
  45.661 +    ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow);
  45.662 +    ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow);
  45.663 +    ASSERT(sp->u.sh.type != SH_type_l4_64_shadow);
  45.664      
  45.665      if (sp->up == 0) return 0;
  45.666      pmfn = _mfn(sp->up >> PAGE_SHIFT);
  45.667 @@ -2622,10 +2629,10 @@ static int sh_remove_shadow_via_pointer(
  45.668      ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
  45.669      
  45.670      /* Is this the only reference to this shadow? */
  45.671 -    rc = (sp->count == 1) ? 1 : 0;
  45.672 +    rc = (sp->u.sh.count == 1) ? 1 : 0;
  45.673  
  45.674      /* Blank the offending entry */
  45.675 -    switch (sp->type) 
  45.676 +    switch (sp->u.sh.type)
  45.677      {
  45.678      case SH_type_l1_32_shadow:
  45.679      case SH_type_l2_32_shadow:
  45.680 @@ -3156,7 +3163,6 @@ void shadow_teardown(struct domain *d)
  45.681  {
  45.682      struct vcpu *v;
  45.683      mfn_t mfn;
  45.684 -    struct list_head *entry, *n;
  45.685      struct page_info *pg;
  45.686  
  45.687      ASSERT(d->is_dying);
  45.688 @@ -3208,12 +3214,8 @@ void shadow_teardown(struct domain *d)
  45.689      }
  45.690  #endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
  45.691  
  45.692 -    list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
  45.693 -    {
  45.694 -        list_del(entry);
  45.695 -        pg = list_entry(entry, struct page_info, list);
  45.696 +    while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) )
  45.697          shadow_free_p2m_page(d, pg);
  45.698 -    }
  45.699  
  45.700      if ( d->arch.paging.shadow.total_pages != 0 )
  45.701      {
  45.702 @@ -3657,7 +3659,6 @@ int shadow_track_dirty_vram(struct domai
  45.703          for ( i = 0; i < nr; i++ ) {
  45.704              mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
  45.705              struct page_info *page;
  45.706 -            u32 count_info;
  45.707              int dirty = 0;
  45.708              paddr_t sl1ma = d->dirty_vram->sl1ma[i];
  45.709  
  45.710 @@ -3668,8 +3669,7 @@ int shadow_track_dirty_vram(struct domai
  45.711              else
  45.712              {
  45.713                  page = mfn_to_page(mfn);
  45.714 -                count_info = page->u.inuse.type_info & PGT_count_mask;
  45.715 -                switch (count_info)
  45.716 +                switch (page->u.inuse.type_info & PGT_count_mask)
  45.717                  {
  45.718                  case 0:
  45.719                      /* No guest reference, nothing to track. */
    46.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Feb 13 10:56:01 2009 +0900
    46.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Feb 13 11:22:28 2009 +0900
    46.3 @@ -973,13 +973,13 @@ static int shadow_set_l2e(struct vcpu *v
    46.4          }
    46.5  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
    46.6          {
    46.7 -            struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
    46.8 -            mfn_t gl1mfn = _mfn(sp->backpointer);
    46.9 +            struct page_info *sp = mfn_to_page(sl1mfn);
   46.10 +            mfn_t gl1mfn = _mfn(sp->v.sh.back);
   46.11  
   46.12              /* If the shadow is a fl1 then the backpointer contains
   46.13                 the GFN instead of the GMFN, and it's definitely not
   46.14                 OOS. */
   46.15 -            if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
   46.16 +            if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
   46.17                   && mfn_is_out_of_sync(gl1mfn) )
   46.18                  sh_resync(v, gl1mfn);
   46.19          }
   46.20 @@ -1036,9 +1036,8 @@ static inline void shadow_vram_get_l1e(s
   46.21      if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
   46.22          unsigned long i = gfn - d->dirty_vram->begin_pfn;
   46.23          struct page_info *page = mfn_to_page(mfn);
   46.24 -        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
   46.25          
   46.26 -        if ( count_info == 1 )
   46.27 +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
   46.28              /* Initial guest reference, record it */
   46.29              d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
   46.30                  | ((unsigned long)sl1e & ~PAGE_MASK);
   46.31 @@ -1064,12 +1063,11 @@ static inline void shadow_vram_put_l1e(s
   46.32      if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
   46.33          unsigned long i = gfn - d->dirty_vram->begin_pfn;
   46.34          struct page_info *page = mfn_to_page(mfn);
   46.35 -        u32 count_info = page->u.inuse.type_info & PGT_count_mask;
   46.36          int dirty = 0;
   46.37          paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
   46.38              | ((unsigned long)sl1e & ~PAGE_MASK);
   46.39  
   46.40 -        if ( count_info == 1 ) {
   46.41 +        if ( (page->u.inuse.type_info & PGT_count_mask) == 1 ) {
   46.42              /* Last reference */
   46.43              if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
   46.44                  /* We didn't know it was that one, let's say it is dirty */
   46.45 @@ -1194,8 +1192,8 @@ static inline void increment_ptr_to_gues
   46.46  do {                                                                    \
   46.47      int _i;                                                             \
   46.48      shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn));                  \
   46.49 -    ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow       \
   46.50 -           || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
   46.51 +    ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow  \
   46.52 +           || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
   46.53      for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ )              \
   46.54      {                                                                   \
   46.55          (_sl1e) = _sp + _i;                                             \
   46.56 @@ -1232,7 +1230,7 @@ do {                                    
   46.57  do {                                                                      \
   46.58      int _i, _j, __done = 0;                                               \
   46.59      int _xen = !shadow_mode_external(_dom);                               \
   46.60 -    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow);    \
   46.61 +    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\
   46.62      for ( _j = 0; _j < 4 && !__done; _j++ )                               \
   46.63      {                                                                     \
   46.64          shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn);                  \
   46.65 @@ -1260,11 +1258,11 @@ do {                                    
   46.66      int _i;                                                                \
   46.67      int _xen = !shadow_mode_external(_dom);                                \
   46.68      shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                     \
   46.69 -    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow      \
   46.70 -           || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
   46.71 +    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \
   46.72 +           || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\
   46.73      for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                 \
   46.74          if ( (!(_xen))                                                     \
   46.75 -             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
   46.76 +             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\
   46.77               || ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES))                  \
   46.78                   < (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
   46.79          {                                                                  \
   46.80 @@ -1285,13 +1283,13 @@ do {                                    
   46.81      int _i;                                                                 \
   46.82      int _xen = !shadow_mode_external(_dom);                                 \
   46.83      shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn));                      \
   46.84 -    ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow ||     \
   46.85 -           mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow);     \
   46.86 +    ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
   46.87 +           mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
   46.88      for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ )                  \
   46.89      {                                                                       \
   46.90          if ( (!(_xen))                                                      \
   46.91               || !is_pv_32on64_domain(_dom)                                  \
   46.92 -             || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow  \
   46.93 +             || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\
   46.94               || (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) )           \
   46.95          {                                                                   \
   46.96              (_sl2e) = _sp + _i;                                             \
   46.97 @@ -1313,7 +1311,7 @@ do {                                    
   46.98  do {                                                                    \
   46.99      int _i;                                                             \
  46.100      shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn));                  \
  46.101 -    ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow);  \
  46.102 +    ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
  46.103      for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ )              \
  46.104      {                                                                   \
  46.105          (_sl3e) = _sp + _i;                                             \
  46.106 @@ -1331,7 +1329,7 @@ do {                                    
  46.107      shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn));                  \
  46.108      int _xen = !shadow_mode_external(_dom);                             \
  46.109      int _i;                                                             \
  46.110 -    ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow);  \
  46.111 +    ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
  46.112      for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ )              \
  46.113      {                                                                   \
  46.114          if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) )                  \
  46.115 @@ -1506,7 +1504,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
  46.116           && shadow_type != SH_type_l2h_pae_shadow 
  46.117           && shadow_type != SH_type_l4_64_shadow )
  46.118          /* Lower-level shadow, not yet linked form a higher level */
  46.119 -        mfn_to_shadow_page(smfn)->up = 0;
  46.120 +        mfn_to_page(smfn)->up = 0;
  46.121  
  46.122  #if GUEST_PAGING_LEVELS == 4
  46.123  #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 
  46.124 @@ -1519,14 +1517,12 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
  46.125           * of them, decide that this isn't an old linux guest, and stop
  46.126           * pinning l3es.  This is not very quick but it doesn't happen
  46.127           * very often. */
  46.128 -        struct list_head *l, *t;
  46.129 -        struct shadow_page_info *sp;
  46.130 +        struct page_info *sp, *t;
  46.131          struct vcpu *v2;
  46.132          int l4count = 0, vcpus = 0;
  46.133 -        list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
  46.134 +        page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows)
  46.135          {
  46.136 -            sp = list_entry(l, struct shadow_page_info, list);
  46.137 -            if ( sp->type == SH_type_l4_64_shadow )
  46.138 +            if ( sp->u.sh.type == SH_type_l4_64_shadow )
  46.139                  l4count++;
  46.140          }
  46.141          for_each_vcpu ( v->domain, v2 ) 
  46.142 @@ -1534,11 +1530,10 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf
  46.143          if ( l4count > 2 * vcpus ) 
  46.144          {
  46.145              /* Unpin all the pinned l3 tables, and don't pin any more. */
  46.146 -            list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows)
  46.147 +            page_list_for_each_safe(sp, t, &v->domain->arch.paging.shadow.pinned_shadows)
  46.148              {
  46.149 -                sp = list_entry(l, struct shadow_page_info, list);
  46.150 -                if ( sp->type == SH_type_l3_64_shadow )
  46.151 -                    sh_unpin(v, shadow_page_to_mfn(sp));
  46.152 +                if ( sp->u.sh.type == SH_type_l3_64_shadow )
  46.153 +                    sh_unpin(v, page_to_mfn(sp));
  46.154              }
  46.155              v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
  46.156          }
  46.157 @@ -1921,7 +1916,7 @@ static shadow_l1e_t * shadow_get_and_cre
  46.158  void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
  46.159  {
  46.160      shadow_l4e_t *sl4e;
  46.161 -    u32 t = mfn_to_shadow_page(smfn)->type;
  46.162 +    u32 t = mfn_to_page(smfn)->u.sh.type;
  46.163      mfn_t gmfn, sl4mfn;
  46.164  
  46.165      SHADOW_DEBUG(DESTROY_SHADOW,
  46.166 @@ -1929,7 +1924,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
  46.167      ASSERT(t == SH_type_l4_shadow);
  46.168  
  46.169      /* Record that the guest page isn't shadowed any more (in this type) */
  46.170 -    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
  46.171 +    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
  46.172      delete_shadow_status(v, gmfn, t, smfn);
  46.173      shadow_demote(v, gmfn, t);
  46.174      /* Decrement refcounts of all the old entries */
  46.175 @@ -1950,7 +1945,7 @@ void sh_destroy_l4_shadow(struct vcpu *v
  46.176  void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
  46.177  {
  46.178      shadow_l3e_t *sl3e;
  46.179 -    u32 t = mfn_to_shadow_page(smfn)->type;
  46.180 +    u32 t = mfn_to_page(smfn)->u.sh.type;
  46.181      mfn_t gmfn, sl3mfn;
  46.182  
  46.183      SHADOW_DEBUG(DESTROY_SHADOW,
  46.184 @@ -1958,7 +1953,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
  46.185      ASSERT(t == SH_type_l3_shadow);
  46.186  
  46.187      /* Record that the guest page isn't shadowed any more (in this type) */
  46.188 -    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
  46.189 +    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
  46.190      delete_shadow_status(v, gmfn, t, smfn);
  46.191      shadow_demote(v, gmfn, t);
  46.192  
  46.193 @@ -1980,7 +1975,7 @@ void sh_destroy_l3_shadow(struct vcpu *v
  46.194  void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
  46.195  {
  46.196      shadow_l2e_t *sl2e;
  46.197 -    u32 t = mfn_to_shadow_page(smfn)->type;
  46.198 +    u32 t = mfn_to_page(smfn)->u.sh.type;
  46.199      mfn_t gmfn, sl2mfn;
  46.200  
  46.201      SHADOW_DEBUG(DESTROY_SHADOW,
  46.202 @@ -1993,7 +1988,7 @@ void sh_destroy_l2_shadow(struct vcpu *v
  46.203  #endif
  46.204  
  46.205      /* Record that the guest page isn't shadowed any more (in this type) */
  46.206 -    gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
  46.207 +    gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
  46.208      delete_shadow_status(v, gmfn, t, smfn);
  46.209      shadow_demote(v, gmfn, t);
  46.210  
  46.211 @@ -2014,7 +2009,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
  46.212  {
  46.213      struct domain *d = v->domain;
  46.214      shadow_l1e_t *sl1e;
  46.215 -    u32 t = mfn_to_shadow_page(smfn)->type;
  46.216 +    u32 t = mfn_to_page(smfn)->u.sh.type;
  46.217  
  46.218      SHADOW_DEBUG(DESTROY_SHADOW,
  46.219                    "%s(%05lx)\n", __func__, mfn_x(smfn));
  46.220 @@ -2023,12 +2018,12 @@ void sh_destroy_l1_shadow(struct vcpu *v
  46.221      /* Record that the guest page isn't shadowed any more (in this type) */
  46.222      if ( t == SH_type_fl1_shadow )
  46.223      {
  46.224 -        gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
  46.225 +        gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back);
  46.226          delete_fl1_shadow_status(v, gfn, smfn);
  46.227      }
  46.228      else 
  46.229      {
  46.230 -        mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
  46.231 +        mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
  46.232          delete_shadow_status(v, gmfn, t, smfn);
  46.233          shadow_demote(v, gmfn, t);
  46.234      }
  46.235 @@ -2054,7 +2049,7 @@ void sh_destroy_l1_shadow(struct vcpu *v
  46.236  void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
  46.237  {
  46.238      struct domain *d = v->domain;
  46.239 -    ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
  46.240 +    ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
  46.241  
  46.242  #if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
  46.243      {
  46.244 @@ -2298,7 +2293,7 @@ static int validate_gl2e(struct vcpu *v,
  46.245  
  46.246  #if SHADOW_PAGING_LEVELS == 3
  46.247          reserved_xen_slot = 
  46.248 -            ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
  46.249 +            ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) &&
  46.250               (shadow_index 
  46.251                >= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
  46.252  #else /* SHADOW_PAGING_LEVELS == 2 */
  46.253 @@ -2352,7 +2347,7 @@ static int validate_gl1e(struct vcpu *v,
  46.254      result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
  46.255  
  46.256  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
  46.257 -    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
  46.258 +    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
  46.259      if ( mfn_valid(gl1mfn) 
  46.260           && mfn_is_out_of_sync(gl1mfn) )
  46.261      {
  46.262 @@ -2429,30 +2424,30 @@ void sh_resync_l1(struct vcpu *v, mfn_t 
  46.263   *      called in the *mode* of the vcpu that unsynced it.  Clear?  Good. */
  46.264  int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
  46.265  {
  46.266 -    struct shadow_page_info *sp;
  46.267 +    struct page_info *sp;
  46.268      mfn_t smfn;
  46.269  
  46.270      smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
  46.271      ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
  46.272      
  46.273      /* Up to l2 */
  46.274 -    sp = mfn_to_shadow_page(smfn);
  46.275 -    if ( sp->count != 1 || !sp->up )
  46.276 +    sp = mfn_to_page(smfn);
  46.277 +    if ( sp->u.sh.count != 1 || !sp->up )
  46.278          return 0;
  46.279      smfn = _mfn(sp->up >> PAGE_SHIFT);
  46.280      ASSERT(mfn_valid(smfn));
  46.281  
  46.282  #if (SHADOW_PAGING_LEVELS == 4) 
  46.283      /* up to l3 */
  46.284 -    sp = mfn_to_shadow_page(smfn);
  46.285 -    if ( sp->count != 1 || !sp->up )
  46.286 +    sp = mfn_to_page(smfn);
  46.287 +    if ( sp->u.sh.count != 1 || !sp->up )
  46.288          return 0;
  46.289      smfn = _mfn(sp->up >> PAGE_SHIFT);
  46.290      ASSERT(mfn_valid(smfn));
  46.291  
  46.292      /* up to l4 */
  46.293 -    sp = mfn_to_shadow_page(smfn);
  46.294 -    if ( sp->count != 1 
  46.295 +    sp = mfn_to_page(smfn);
  46.296 +    if ( sp->u.sh.count != 1
  46.297           || sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
  46.298          return 0;
  46.299      smfn = _mfn(sp->up >> PAGE_SHIFT);
  46.300 @@ -2970,8 +2965,8 @@ static int sh_page_fault(struct vcpu *v,
  46.301                                          + shadow_l2_linear_offset(va)),
  46.302                                         sizeof(sl2e)) != 0)
  46.303                       || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
  46.304 -                     || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
  46.305 -                                      shadow_l2e_get_mfn(sl2e))->backpointer))
  46.306 +                     || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
  46.307 +                                      shadow_l2e_get_mfn(sl2e))->v.sh.back))
  46.308                       || unlikely(mfn_is_out_of_sync(gl1mfn)) )
  46.309                 {
  46.310                     /* Hit the slow path as if there had been no 
  46.311 @@ -3523,7 +3518,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
  46.312      // easier than invalidating all of the individual 4K pages).
  46.313      //
  46.314      sl1mfn = shadow_l2e_get_mfn(sl2e);
  46.315 -    if ( mfn_to_shadow_page(sl1mfn)->type
  46.316 +    if ( mfn_to_page(sl1mfn)->u.sh.type
  46.317           == SH_type_fl1_shadow )
  46.318      {
  46.319          flush_tlb_local();
  46.320 @@ -3533,7 +3528,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
  46.321  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
  46.322      /* Check to see if the SL1 is out of sync. */
  46.323      {
  46.324 -        mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
  46.325 +        mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
  46.326          struct page_info *pg = mfn_to_page(gl1mfn);
  46.327          if ( mfn_valid(gl1mfn) 
  46.328               && page_is_out_of_sync(pg) )
  46.329 @@ -3563,7 +3558,7 @@ sh_invlpg(struct vcpu *v, unsigned long 
  46.330              }
  46.331  
  46.332              sl1mfn = shadow_l2e_get_mfn(sl2e);
  46.333 -            gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
  46.334 +            gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
  46.335              pg = mfn_to_page(gl1mfn);
  46.336              
  46.337              if ( likely(sh_mfn_is_a_page_table(gl1mfn)
  46.338 @@ -3968,7 +3963,7 @@ sh_set_toplevel_shadow(struct vcpu *v,
  46.339          /* Need to repin the old toplevel shadow if it's been unpinned
  46.340           * by shadow_prealloc(): in PV mode we're still running on this
  46.341           * shadow and it's not safe to free it yet. */
  46.342 -        if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) )
  46.343 +        if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) )
  46.344          {
  46.345              SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
  46.346              domain_crash(v->domain);
  46.347 @@ -4262,16 +4257,16 @@ int sh_rm_write_access_from_sl1p(struct 
  46.348  {
  46.349      int r;
  46.350      shadow_l1e_t *sl1p, sl1e;
  46.351 -    struct shadow_page_info *sp;
  46.352 +    struct page_info *sp;
  46.353  
  46.354      ASSERT(mfn_valid(gmfn));
  46.355      ASSERT(mfn_valid(smfn));
  46.356  
  46.357 -    sp = mfn_to_shadow_page(smfn);
  46.358 -
  46.359 -    if ( sp->mbz != 0
  46.360 -         || (sp->type != SH_type_l1_shadow
  46.361 -             && sp->type != SH_type_fl1_shadow) )
  46.362 +    sp = mfn_to_page(smfn);
  46.363 +
  46.364 +    if ( sp->count_info != 0
  46.365 +         || (sp->u.sh.type != SH_type_l1_shadow
  46.366 +             && sp->u.sh.type != SH_type_fl1_shadow) )
  46.367          goto fail;
  46.368  
  46.369      sl1p = sh_map_domain_page(smfn);
  46.370 @@ -4410,7 +4405,7 @@ int sh_rm_mappings_from_l1(struct vcpu *
  46.371  void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
  46.372  /* Blank out a single shadow entry */
  46.373  {
  46.374 -    switch ( mfn_to_shadow_page(smfn)->type )
  46.375 +    switch ( mfn_to_page(smfn)->u.sh.type )
  46.376      {
  46.377      case SH_type_l1_shadow:
  46.378          (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
  46.379 @@ -4443,7 +4438,7 @@ int sh_remove_l1_shadow(struct vcpu *v, 
  46.380               && (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
  46.381          {
  46.382              (void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
  46.383 -            if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
  46.384 +            if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
  46.385                  /* This breaks us cleanly out of the FOREACH macro */
  46.386                  done = 1;
  46.387          }
  46.388 @@ -4466,7 +4461,7 @@ int sh_remove_l2_shadow(struct vcpu *v, 
  46.389               && (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
  46.390          {
  46.391              (void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
  46.392 -            if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
  46.393 +            if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
  46.394                  /* This breaks us cleanly out of the FOREACH macro */
  46.395                  done = 1;
  46.396          }
  46.397 @@ -4488,7 +4483,7 @@ int sh_remove_l3_shadow(struct vcpu *v, 
  46.398               && (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
  46.399          {
  46.400              (void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
  46.401 -            if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
  46.402 +            if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
  46.403                  /* This breaks us cleanly out of the FOREACH macro */
  46.404                  done = 1;
  46.405          }
  46.406 @@ -4890,7 +4885,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
  46.407      int done = 0;
  46.408      
  46.409      /* Follow the backpointer */
  46.410 -    gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
  46.411 +    gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
  46.412  
  46.413  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
  46.414      /* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
  46.415 @@ -4980,7 +4975,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  46.416      int done = 0;
  46.417  
  46.418      /* Follow the backpointer */
  46.419 -    gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
  46.420 +    gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
  46.421  
  46.422  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
  46.423      /* Only L1's may be out of sync. */
  46.424 @@ -5029,7 +5024,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
  46.425      int done = 0;
  46.426  
  46.427      /* Follow the backpointer */
  46.428 -    gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
  46.429 +    gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
  46.430  
  46.431  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
  46.432      /* Only L1's may be out of sync. */
  46.433 @@ -5076,7 +5071,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
  46.434      int done = 0;
  46.435  
  46.436      /* Follow the backpointer */
  46.437 -    gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
  46.438 +    gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
  46.439  
  46.440  #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) 
  46.441      /* Only L1's may be out of sync. */
    47.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Feb 13 10:56:01 2009 +0900
    47.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Feb 13 11:22:28 2009 +0900
    47.3 @@ -220,60 +220,6 @@ extern void shadow_audit_tables(struct v
    47.4  #undef GUEST_LEVELS
    47.5  #endif /* CONFIG_PAGING_LEVELS == 4 */
    47.6  
    47.7 -/******************************************************************************
    47.8 - * Page metadata for shadow pages.
    47.9 - */
   47.10 -
   47.11 -struct shadow_page_info
   47.12 -{
   47.13 -    union {
   47.14 -        /* Ensures that shadow_page_info is same size as page_info. */
   47.15 -        struct page_info page_info;
   47.16 -
   47.17 -        struct {
   47.18 -            union {
   47.19 -                /* When in use, guest page we're a shadow of */
   47.20 -                unsigned long backpointer;
   47.21 -                /* When free, order of the freelist we're on */
   47.22 -                unsigned int order;
   47.23 -            };
   47.24 -            union {
   47.25 -                /* When in use, next shadow in this hash chain */
   47.26 -                struct shadow_page_info *next_shadow;
   47.27 -                /* When free, TLB flush time when freed */
   47.28 -                u32 tlbflush_timestamp;
   47.29 -            };
   47.30 -            struct {
   47.31 -                unsigned long mbz;     /* Must be zero: count_info is here. */
   47.32 -                unsigned long type:5;   /* What kind of shadow is this? */
   47.33 -                unsigned long pinned:1; /* Is the shadow pinned? */
   47.34 -                unsigned long count:26; /* Reference count */
   47.35 -            } __attribute__((packed));
   47.36 -            union {
   47.37 -                /* For unused shadow pages, a list of pages of this order; for 
   47.38 -                 * pinnable shadows, if pinned, a list of other pinned shadows
   47.39 -                 * (see sh_type_is_pinnable() below for the definition of 
   47.40 -                 * "pinnable" shadow types). */
   47.41 -                struct list_head list;
   47.42 -                /* For non-pinnable shadows, a higher entry that points
   47.43 -                 * at us. */
   47.44 -                paddr_t up;
   47.45 -            };
   47.46 -        };
   47.47 -    };
   47.48 -};
   47.49 -
   47.50 -/* The structure above *must* be no larger than a struct page_info
   47.51 - * from mm.h, since we'll be using the same space in the frametable. 
   47.52 - * Also, the mbz field must line up with the count_info field of normal 
   47.53 - * pages, so they cannot be successfully get_page()d. */
   47.54 -static inline void shadow_check_page_struct_offsets(void) {
   47.55 -    BUILD_BUG_ON(sizeof (struct shadow_page_info) !=
   47.56 -                 sizeof (struct page_info));
   47.57 -    BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
   47.58 -                 offsetof(struct page_info, count_info));
   47.59 -};
   47.60 -
   47.61  /* Shadow type codes */
   47.62  #define SH_type_none           (0U) /* on the shadow free list */
   47.63  #define SH_type_min_shadow     (1U)
   47.64 @@ -528,22 +474,13 @@ mfn_t oos_snapshot_lookup(struct vcpu *v
   47.65   * MFN/page-info handling 
   47.66   */
   47.67  
   47.68 -// Override mfn_to_page from asm/page.h, which was #include'd above,
   47.69 -// in order to make it work with our mfn type.
   47.70 +/* Override macros from asm/page.h to make them work with mfn_t */
   47.71  #undef mfn_to_page
   47.72 -#define mfn_to_page(_m) (frame_table + mfn_x(_m))
   47.73 -#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
   47.74 -
   47.75 -// Override page_to_mfn from asm/page.h, which was #include'd above,
   47.76 -// in order to make it work with our mfn type.
   47.77 +#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
   47.78 +#undef mfn_valid
   47.79 +#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
   47.80  #undef page_to_mfn
   47.81 -#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   47.82 -#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
   47.83 -
   47.84 -// Override mfn_valid from asm/page.h, which was #include'd above,
   47.85 -// in order to make it work with our mfn type.
   47.86 -#undef mfn_valid
   47.87 -#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   47.88 +#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
   47.89  
   47.90  /* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
   47.91  #undef pagetable_get_page
   47.92 @@ -675,26 +612,26 @@ void sh_destroy_shadow(struct vcpu *v, m
   47.93  static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
   47.94  {
   47.95      u32 x, nx;
   47.96 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
   47.97 +    struct page_info *sp = mfn_to_page(smfn);
   47.98  
   47.99      ASSERT(mfn_valid(smfn));
  47.100  
  47.101 -    x = sp->count;
  47.102 +    x = sp->u.sh.count;
  47.103      nx = x + 1;
  47.104  
  47.105      if ( unlikely(nx >= 1U<<26) )
  47.106      {
  47.107 -        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
  47.108 -                       sp->backpointer, mfn_x(smfn));
  47.109 +        SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
  47.110 +                       sp->v.sh.back, mfn_x(smfn));
  47.111          return 0;
  47.112      }
  47.113      
  47.114      /* Guarded by the shadow lock, so no need for atomic update */
  47.115 -    sp->count = nx;
  47.116 +    sp->u.sh.count = nx;
  47.117  
  47.118      /* We remember the first shadow entry that points to each shadow. */
  47.119      if ( entry_pa != 0 
  47.120 -         && !sh_type_is_pinnable(v, sp->type) 
  47.121 +         && !sh_type_is_pinnable(v, sp->u.sh.type)
  47.122           && sp->up == 0 ) 
  47.123          sp->up = entry_pa;
  47.124      
  47.125 @@ -707,29 +644,29 @@ static inline int sh_get_ref(struct vcpu
  47.126  static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
  47.127  {
  47.128      u32 x, nx;
  47.129 -    struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
  47.130 +    struct page_info *sp = mfn_to_page(smfn);
  47.131  
  47.132      ASSERT(mfn_valid(smfn));
  47.133 -    ASSERT(sp->mbz == 0);
  47.134 +    ASSERT(sp->count_info == 0);
  47.135  
  47.136      /* If this is the entry in the up-pointer, remove it */
  47.137      if ( entry_pa != 0 
  47.138 -         && !sh_type_is_pinnable(v, sp->type) 
  47.139 +         && !sh_type_is_pinnable(v, sp->u.sh.type)
  47.140           && sp->up == entry_pa ) 
  47.141          sp->up = 0;
  47.142  
  47.143 -    x = sp->count;
  47.144 +    x = sp->u.sh.count;
  47.145      nx = x - 1;
  47.146  
  47.147      if ( unlikely(x == 0) ) 
  47.148      {
  47.149          SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
  47.150 -                     mfn_x(smfn), sp->count, sp->type);
  47.151 +                     mfn_x(smfn), sp->u.sh.count, sp->u.sh.type);
  47.152          BUG();
  47.153      }
  47.154  
  47.155      /* Guarded by the shadow lock, so no need for atomic update */
  47.156 -    sp->count = nx;
  47.157 +    sp->u.sh.count = nx;
  47.158  
  47.159      if ( unlikely(nx == 0) ) 
  47.160          sh_destroy_shadow(v, smfn);
  47.161 @@ -741,26 +678,26 @@ static inline void sh_put_ref(struct vcp
  47.162   * Returns 0 for failure, 1 for success. */
  47.163  static inline int sh_pin(struct vcpu *v, mfn_t smfn)
  47.164  {
  47.165 -    struct shadow_page_info *sp;
  47.166 +    struct page_info *sp;
  47.167      
  47.168      ASSERT(mfn_valid(smfn));
  47.169 -    sp = mfn_to_shadow_page(smfn);
  47.170 -    ASSERT(sh_type_is_pinnable(v, sp->type));
  47.171 -    if ( sp->pinned ) 
  47.172 +    sp = mfn_to_page(smfn);
  47.173 +    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
  47.174 +    if ( sp->u.sh.pinned )
  47.175      {
  47.176          /* Already pinned: take it out of the pinned-list so it can go 
  47.177           * at the front */
  47.178 -        list_del(&sp->list);
  47.179 +        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
  47.180      }
  47.181      else
  47.182      {
  47.183          /* Not pinned: pin it! */
  47.184          if ( !sh_get_ref(v, smfn, 0) )
  47.185              return 0;
  47.186 -        sp->pinned = 1;
  47.187 +        sp->u.sh.pinned = 1;
  47.188      }
  47.189      /* Put it at the head of the list of pinned shadows */
  47.190 -    list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows);
  47.191 +    page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows);
  47.192      return 1;
  47.193  }
  47.194  
  47.195 @@ -768,15 +705,15 @@ static inline int sh_pin(struct vcpu *v,
  47.196   * of pinned shadows, and release the extra ref. */
  47.197  static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
  47.198  {
  47.199 -    struct shadow_page_info *sp;
  47.200 +    struct page_info *sp;
  47.201      
  47.202      ASSERT(mfn_valid(smfn));
  47.203 -    sp = mfn_to_shadow_page(smfn);
  47.204 -    ASSERT(sh_type_is_pinnable(v, sp->type));
  47.205 -    if ( sp->pinned )
  47.206 +    sp = mfn_to_page(smfn);
  47.207 +    ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
  47.208 +    if ( sp->u.sh.pinned )
  47.209      {
  47.210 -        sp->pinned = 0;
  47.211 -        list_del(&sp->list);
  47.212 +        sp->u.sh.pinned = 0;
  47.213 +        page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
  47.214          sp->up = 0; /* in case this stops being a pinnable type in future */
  47.215          sh_put_ref(v, smfn, 0);
  47.216      }
    48.1 --- a/xen/arch/x86/numa.c	Fri Feb 13 10:56:01 2009 +0900
    48.2 +++ b/xen/arch/x86/numa.c	Fri Feb 13 11:22:28 2009 +0900
    48.3 @@ -312,7 +312,7 @@ static void dump_numa(unsigned char key)
    48.4  		for_each_online_node(i)
    48.5  			page_num_node[i] = 0;
    48.6  
    48.7 -		list_for_each_entry(page, &d->page_list, list)
    48.8 +		page_list_for_each(page, &d->page_list)
    48.9  		{
   48.10  			i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
   48.11  			page_num_node[i]++;
    49.1 --- a/xen/arch/x86/physdev.c	Fri Feb 13 10:56:01 2009 +0900
    49.2 +++ b/xen/arch/x86/physdev.c	Fri Feb 13 11:22:28 2009 +0900
    49.3 @@ -62,7 +62,7 @@ static int physdev_map_pirq(struct physd
    49.4                  ret = -EINVAL;
    49.5                  goto free_domain;
    49.6              }
    49.7 -            vector = IO_APIC_VECTOR(map->index);
    49.8 +            vector = domain_irq_to_vector(current->domain, map->index);
    49.9              if ( !vector )
   49.10              {
   49.11                  dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
   49.12 @@ -75,7 +75,7 @@ static int physdev_map_pirq(struct physd
   49.13          case MAP_PIRQ_TYPE_MSI:
   49.14              vector = map->index;
   49.15              if ( vector == -1 )
   49.16 -                vector = assign_irq_vector(AUTO_ASSIGN);
   49.17 +                vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
   49.18  
   49.19              if ( vector < 0 || vector >= NR_VECTORS )
   49.20              {
    50.1 --- a/xen/arch/x86/setup.c	Fri Feb 13 10:56:01 2009 +0900
    50.2 +++ b/xen/arch/x86/setup.c	Fri Feb 13 11:22:28 2009 +0900
    50.3 @@ -417,7 +417,7 @@ void __init __start_xen(unsigned long mb
    50.4      unsigned int initrdidx = 1;
    50.5      multiboot_info_t *mbi = __va(mbi_p);
    50.6      module_t *mod = (module_t *)__va(mbi->mods_addr);
    50.7 -    unsigned long nr_pages, modules_length, modules_headroom = -1;
    50.8 +    unsigned long nr_pages, modules_length, modules_headroom;
    50.9      unsigned long allocator_bitmap_end;
   50.10      int i, e820_warn = 0, bytes = 0;
   50.11      struct ns16550_defaults ns16550 = {
   50.12 @@ -618,6 +618,12 @@ void __init __start_xen(unsigned long mb
   50.13       */
   50.14      modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
   50.15  
   50.16 +    /* ensure mod[0] is mapped before parsing */
   50.17 +    bootstrap_map(mod[0].mod_start, mod[0].mod_end);
   50.18 +    modules_headroom = bzimage_headroom(
   50.19 +                      (char *)(unsigned long)mod[0].mod_start,
   50.20 +                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
   50.21 +
   50.22      for ( i = boot_e820.nr_map-1; i >= 0; i-- )
   50.23      {
   50.24          uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
   50.25 @@ -636,7 +642,8 @@ void __init __start_xen(unsigned long mb
   50.26              s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
   50.27  
   50.28  #if defined(CONFIG_X86_64)
   50.29 -#define reloc_size ((__pa(&_end) + mask) & ~mask)
   50.30 +/* Relocate Xen image, allocation bitmap, and one page of padding. */
   50.31 +#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
   50.32          /* Is the region suitable for relocating Xen? */
   50.33          if ( !xen_phys_start && ((e-s) >= reloc_size) )
   50.34          {
   50.35 @@ -721,11 +728,6 @@ void __init __start_xen(unsigned long mb
   50.36          }
   50.37  #endif
   50.38  
   50.39 -        if ( modules_headroom == -1 )
   50.40 -            modules_headroom = bzimage_headroom(
   50.41 -                      (char *)(unsigned long)mod[0].mod_start,
   50.42 -                      (unsigned long)(mod[0].mod_end - mod[0].mod_start));
   50.43 -
   50.44          /* Is the region suitable for relocating the multiboot modules? */
   50.45          if ( !initial_images_start && (s < e) &&
   50.46               ((e-s) >= (modules_length+modules_headroom)) )
    51.1 --- a/xen/arch/x86/smpboot.c	Fri Feb 13 10:56:01 2009 +0900
    51.2 +++ b/xen/arch/x86/smpboot.c	Fri Feb 13 11:22:28 2009 +0900
    51.3 @@ -1256,8 +1256,6 @@ int __cpu_disable(void)
    51.4  	mdelay(1);
    51.5  	local_irq_disable();
    51.6  
    51.7 -	cpufreq_del_cpu(cpu);
    51.8 -
    51.9  	time_suspend();
   51.10  
   51.11  	cpu_mcheck_disable();
   51.12 @@ -1321,6 +1319,8 @@ int cpu_down(unsigned int cpu)
   51.13  
   51.14  	printk("Prepare to bring CPU%d down...\n", cpu);
   51.15  
   51.16 +	cpufreq_del_cpu(cpu);
   51.17 +
   51.18  	err = stop_machine_run(take_cpu_down, NULL, cpu);
   51.19  	if (err < 0)
   51.20  		goto out;
    52.1 --- a/xen/arch/x86/x86_32/xen.lds.S	Fri Feb 13 10:56:01 2009 +0900
    52.2 +++ b/xen/arch/x86/x86_32/xen.lds.S	Fri Feb 13 11:22:28 2009 +0900
    52.3 @@ -91,6 +91,7 @@ SECTIONS
    52.4  	*(.exit.text)
    52.5  	*(.exit.data)
    52.6  	*(.exitcall.exit)
    52.7 +	*(.eh_frame)
    52.8  	}
    52.9  
   52.10    /* Stabs debugging sections.  */
    53.1 --- a/xen/arch/x86/x86_64/entry.S	Fri Feb 13 10:56:01 2009 +0900
    53.2 +++ b/xen/arch/x86/x86_64/entry.S	Fri Feb 13 11:22:28 2009 +0900
    53.3 @@ -739,7 +739,6 @@ ENTRY(hypercall_args_table)
    53.4          .byte 1 /* do_sysctl            */  /* 35 */
    53.5          .byte 1 /* do_domctl            */
    53.6          .byte 2 /* do_kexec             */
    53.7 -        .byte 1 /* do_xsm_op            */
    53.8          .rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
    53.9          .byte 0 /* do_ni_hypercall      */
   53.10          .endr
    54.1 --- a/xen/arch/x86/x86_64/xen.lds.S	Fri Feb 13 10:56:01 2009 +0900
    54.2 +++ b/xen/arch/x86/x86_64/xen.lds.S	Fri Feb 13 11:22:28 2009 +0900
    54.3 @@ -89,6 +89,7 @@ SECTIONS
    54.4  	*(.exit.text)
    54.5  	*(.exit.data)
    54.6  	*(.exitcall.exit)
    54.7 +	*(.eh_frame)
    54.8  	}
    54.9  
   54.10    /* Stabs debugging sections.  */
    55.1 --- a/xen/common/domain.c	Fri Feb 13 10:56:01 2009 +0900
    55.2 +++ b/xen/common/domain.c	Fri Feb 13 11:22:28 2009 +0900
    55.3 @@ -41,7 +41,6 @@ boolean_param("dom0_vcpus_pin", opt_dom0
    55.4  
    55.5  /* set xen as default cpufreq */
    55.6  enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
    55.7 -struct cpufreq_governor *cpufreq_opt_governor;
    55.8  
    55.9  static void __init setup_cpufreq_option(char *str)
   55.10  {
   55.11 @@ -71,19 +70,6 @@ static void __init setup_cpufreq_option(
   55.12  }
   55.13  custom_param("cpufreq", setup_cpufreq_option);
   55.14  
   55.15 -static void __init setup_cpufreq_gov_option(char *str)
   55.16 -{
   55.17 -    if ( !strcmp(str, "userspace") )
   55.18 -        cpufreq_opt_governor = &cpufreq_gov_userspace;
   55.19 -    else if ( !strcmp(str, "performance") )
   55.20 -        cpufreq_opt_governor = &cpufreq_gov_performance;
   55.21 -    else if ( !strcmp(str, "powersave") )
   55.22 -        cpufreq_opt_governor = &cpufreq_gov_powersave;
   55.23 -    else if ( !strcmp(str, "ondemand") )
   55.24 -        cpufreq_opt_governor = &cpufreq_gov_dbs;
   55.25 -}
   55.26 -custom_param("cpufreq_governor", setup_cpufreq_gov_option);
   55.27 -
   55.28  /* Protect updates/reads (resp.) of domain_list and domain_hash. */
   55.29  DEFINE_SPINLOCK(domlist_update_lock);
   55.30  DEFINE_RCU_READ_LOCK(domlist_read_lock);
   55.31 @@ -233,8 +219,8 @@ struct domain *domain_create(
   55.32      spin_lock_init(&d->page_alloc_lock);
   55.33      spin_lock_init(&d->shutdown_lock);
   55.34      spin_lock_init(&d->hypercall_deadlock_mutex);
   55.35 -    INIT_LIST_HEAD(&d->page_list);
   55.36 -    INIT_LIST_HEAD(&d->xenpage_list);
   55.37 +    INIT_PAGE_LIST_HEAD(&d->page_list);
   55.38 +    INIT_PAGE_LIST_HEAD(&d->xenpage_list);
   55.39  
   55.40      if ( domcr_flags & DOMCRF_hvm )
   55.41          d->is_hvm = 1;
    56.1 --- a/xen/common/grant_table.c	Fri Feb 13 10:56:01 2009 +0900
    56.2 +++ b/xen/common/grant_table.c	Fri Feb 13 11:22:28 2009 +0900
    56.3 @@ -1192,7 +1192,7 @@ gnttab_transfer(
    56.4          /* Okay, add the page to 'e'. */
    56.5          if ( unlikely(e->tot_pages++ == 0) )
    56.6              get_knownalive_domain(e);
    56.7 -        list_add_tail(&page->list, &e->page_list);
    56.8 +        page_list_add_tail(page, &e->page_list);
    56.9          page_set_owner(page, e);
   56.10  
   56.11          spin_unlock(&e->page_alloc_lock);
    57.1 --- a/xen/common/hvm/save.c	Fri Feb 13 10:56:01 2009 +0900
    57.2 +++ b/xen/common/hvm/save.c	Fri Feb 13 11:22:28 2009 +0900
    57.3 @@ -26,6 +26,7 @@
    57.4  #include <xen/version.h>
    57.5  #include <public/version.h>
    57.6  #include <xen/sched.h>
    57.7 +#include <xen/guest_access.h>
    57.8  
    57.9  #include <asm/hvm/support.h>
   57.10  
   57.11 @@ -75,6 +76,53 @@ size_t hvm_save_size(struct domain *d)
   57.12      return sz;
   57.13  }
   57.14  
   57.15 +/* Extract a single instance of a save record, by marshalling all
   57.16 + * records of that type and copying out the one we need. */
   57.17 +int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance, 
   57.18 +                 XEN_GUEST_HANDLE_64(uint8) handle)
   57.19 +{
   57.20 +    int rv = 0;
   57.21 +    size_t sz = 0;
   57.22 +    struct vcpu *v;
   57.23 +    hvm_domain_context_t ctxt = { 0, };
   57.24 +
   57.25 +    if ( d->is_dying 
   57.26 +         || typecode > HVM_SAVE_CODE_MAX 
   57.27 +         || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor)
   57.28 +         || hvm_sr_handlers[typecode].save == NULL )
   57.29 +        return -EINVAL;
   57.30 +
   57.31 +    if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU )
   57.32 +        for_each_vcpu(d, v)
   57.33 +            sz += hvm_sr_handlers[typecode].size;
   57.34 +    else 
   57.35 +        sz = hvm_sr_handlers[typecode].size;
   57.36 +    
   57.37 +    if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz )
   57.38 +        return -EINVAL;
   57.39 +
   57.40 +    ctxt.size = sz;
   57.41 +    ctxt.data = xmalloc_bytes(sz);
   57.42 +    if ( !ctxt.data )
   57.43 +        return -ENOMEM;
   57.44 +
   57.45 +    if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 )
   57.46 +    {
   57.47 +        gdprintk(XENLOG_ERR, 
   57.48 +                 "HVM save: failed to save type %"PRIu16"\n", typecode);
   57.49 +        rv = -EFAULT;
   57.50 +    }
   57.51 +    else if ( copy_to_guest(handle,
   57.52 +                            ctxt.data 
   57.53 +                            + (instance * hvm_sr_handlers[typecode].size) 
   57.54 +                            + sizeof (struct hvm_save_descriptor), 
   57.55 +                            hvm_sr_handlers[typecode].size
   57.56 +                            - sizeof (struct hvm_save_descriptor)) )
   57.57 +        rv = -EFAULT;
   57.58 +
   57.59 +    xfree(ctxt.data);
   57.60 +    return rv;
   57.61 +}
   57.62  
   57.63  int hvm_save(struct domain *d, hvm_domain_context_t *h)
   57.64  {
    58.1 --- a/xen/common/memory.c	Fri Feb 13 10:56:01 2009 +0900
    58.2 +++ b/xen/common/memory.c	Fri Feb 13 11:22:28 2009 +0900
    58.3 @@ -218,8 +218,8 @@ static void decrease_reservation(struct 
    58.4  static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
    58.5  {
    58.6      struct xen_memory_exchange exch;
    58.7 -    LIST_HEAD(in_chunk_list);
    58.8 -    LIST_HEAD(out_chunk_list);
    58.9 +    PAGE_LIST_HEAD(in_chunk_list);
   58.10 +    PAGE_LIST_HEAD(out_chunk_list);
   58.11      unsigned long in_chunk_order, out_chunk_order;
   58.12      xen_pfn_t     gpfn, gmfn, mfn;
   58.13      unsigned long i, j, k;
   58.14 @@ -325,7 +325,7 @@ static long memory_exchange(XEN_GUEST_HA
   58.15                      goto fail;
   58.16                  }
   58.17  
   58.18 -                list_add(&page->list, &in_chunk_list);
   58.19 +                page_list_add(page, &in_chunk_list);
   58.20              }
   58.21          }
   58.22  
   58.23 @@ -339,7 +339,7 @@ static long memory_exchange(XEN_GUEST_HA
   58.24                  goto fail;
   58.25              }
   58.26  
   58.27 -            list_add(&page->list, &out_chunk_list);
   58.28 +            page_list_add(page, &out_chunk_list);
   58.29          }
   58.30  
   58.31          /*
   58.32 @@ -347,10 +347,8 @@ static long memory_exchange(XEN_GUEST_HA
   58.33           */
   58.34  
   58.35          /* Destroy final reference to each input page. */
   58.36 -        while ( !list_empty(&in_chunk_list) )
   58.37 +        while ( (page = page_list_remove_head(&in_chunk_list)) )
   58.38          {
   58.39 -            page = list_entry(in_chunk_list.next, struct page_info, list);
   58.40 -            list_del(&page->list);
   58.41              if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
   58.42                  BUG();
   58.43              mfn = page_to_mfn(page);
   58.44 @@ -360,10 +358,8 @@ static long memory_exchange(XEN_GUEST_HA
   58.45  
   58.46          /* Assign each output page to the domain. */
   58.47          j = 0;
   58.48 -        while ( !list_empty(&out_chunk_list) )
   58.49 +        while ( (page = page_list_remove_head(&out_chunk_list)) )
   58.50          {
   58.51 -            page = list_entry(out_chunk_list.next, struct page_info, list);
   58.52 -            list_del(&page->list);
   58.53              if ( assign_pages(d, page, exch.out.extent_order,
   58.54                                MEMF_no_refcount) )
   58.55                  BUG();
   58.56 @@ -399,21 +395,13 @@ static long memory_exchange(XEN_GUEST_HA
   58.57       */
   58.58   fail:
   58.59      /* Reassign any input pages we managed to steal. */
   58.60 -    while ( !list_empty(&in_chunk_list) )
   58.61 -    {
   58.62 -        page = list_entry(in_chunk_list.next, struct page_info, list);
   58.63 -        list_del(&page->list);
   58.64 +    while ( (page = page_list_remove_head(&in_chunk_list)) )
   58.65          if ( assign_pages(d, page, 0, MEMF_no_refcount) )
   58.66              BUG();
   58.67 -    }
   58.68  
   58.69      /* Free any output pages we managed to allocate. */
   58.70 -    while ( !list_empty(&out_chunk_list) )
   58.71 -    {
   58.72 -        page = list_entry(out_chunk_list.next, struct page_info, list);
   58.73 -        list_del(&page->list);
   58.74 +    while ( (page = page_list_remove_head(&out_chunk_list)) )
   58.75          free_domheap_pages(page, exch.out.extent_order);
   58.76 -    }
   58.77  
   58.78      exch.nr_exchanged = i << in_chunk_order;
   58.79  
    59.1 --- a/xen/common/page_alloc.c	Fri Feb 13 10:56:01 2009 +0900
    59.2 +++ b/xen/common/page_alloc.c	Fri Feb 13 11:22:28 2009 +0900
    59.3 @@ -71,7 +71,7 @@ integer_param("dma_bits", dma_bitsize);
    59.4  #endif
    59.5  
    59.6  static DEFINE_SPINLOCK(page_scrub_lock);
    59.7 -LIST_HEAD(page_scrub_list);
    59.8 +PAGE_LIST_HEAD(page_scrub_list);
    59.9  static unsigned long scrub_pages;
   59.10  
   59.11  /*********************
   59.12 @@ -264,7 +264,7 @@ unsigned long __init alloc_boot_pages(
   59.13  #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
   59.14                            (fls(page_to_mfn(pg)) - 1))
   59.15  
   59.16 -typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
   59.17 +typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
   59.18  static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
   59.19  #define heap(node, zone, order) ((*_heap[node])[zone][order])
   59.20  
   59.21 @@ -272,13 +272,16 @@ static unsigned long *avail[MAX_NUMNODES
   59.22  
   59.23  static DEFINE_SPINLOCK(heap_lock);
   59.24  
   59.25 -static void init_node_heap(int node)
   59.26 +static unsigned long init_node_heap(int node, unsigned long mfn,
   59.27 +                                    unsigned long nr)
   59.28  {
   59.29      /* First node to be discovered has its heap metadata statically alloced. */
   59.30      static heap_by_zone_and_order_t _heap_static;
   59.31      static unsigned long avail_static[NR_ZONES];
   59.32      static int first_node_initialised;
   59.33 -
   59.34 +    unsigned long needed = (sizeof(**_heap) +
   59.35 +                            sizeof(**avail) * NR_ZONES +
   59.36 +                            PAGE_SIZE - 1) >> PAGE_SHIFT;
   59.37      int i, j;
   59.38  
   59.39      if ( !first_node_initialised )
   59.40 @@ -286,19 +289,40 @@ static void init_node_heap(int node)
   59.41          _heap[node] = &_heap_static;
   59.42          avail[node] = avail_static;
   59.43          first_node_initialised = 1;
   59.44 +        needed = 0;
   59.45 +    }
   59.46 +#ifdef DIRECTMAP_VIRT_END
   59.47 +    else if ( nr >= needed &&
   59.48 +              mfn + needed <= virt_to_mfn(DIRECTMAP_VIRT_END) )
   59.49 +    {
   59.50 +        _heap[node] = mfn_to_virt(mfn);
   59.51 +        avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES;
   59.52 +    }
   59.53 +#endif
   59.54 +    else if ( get_order_from_bytes(sizeof(**_heap)) ==
   59.55 +              get_order_from_pages(needed) )
   59.56 +    {
   59.57 +        _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
   59.58 +        BUG_ON(!_heap[node]);
   59.59 +        avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
   59.60 +                      sizeof(**avail) * NR_ZONES;
   59.61 +        needed = 0;
   59.62      }
   59.63      else
   59.64      {
   59.65          _heap[node] = xmalloc(heap_by_zone_and_order_t);
   59.66          avail[node] = xmalloc_array(unsigned long, NR_ZONES);
   59.67          BUG_ON(!_heap[node] || !avail[node]);
   59.68 +        needed = 0;
   59.69      }
   59.70  
   59.71      memset(avail[node], 0, NR_ZONES * sizeof(long));
   59.72  
   59.73      for ( i = 0; i < NR_ZONES; i++ )
   59.74          for ( j = 0; j <= MAX_ORDER; j++ )
   59.75 -            INIT_LIST_HEAD(&(*_heap[node])[i][j]);
   59.76 +            INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
   59.77 +
   59.78 +    return needed;
   59.79  }
   59.80  
   59.81  /* Allocate 2^@order contiguous pages. */
   59.82 @@ -340,7 +364,7 @@ static struct page_info *alloc_heap_page
   59.83  
   59.84              /* Find smallest order which can satisfy the request. */
   59.85              for ( j = order; j <= MAX_ORDER; j++ )
   59.86 -                if ( !list_empty(&heap(node, zone, j)) )
   59.87 +                if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
   59.88                      goto found;
   59.89          } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
   59.90  
   59.91 @@ -354,14 +378,11 @@ static struct page_info *alloc_heap_page
   59.92      return NULL;
   59.93  
   59.94   found: 
   59.95 -    pg = list_entry(heap(node, zone, j).next, struct page_info, list);
   59.96 -    list_del(&pg->list);
   59.97 -
   59.98      /* We may have to halve the chunk a number of times. */
   59.99      while ( j != order )
  59.100      {
  59.101          PFN_ORDER(pg) = --j;
  59.102 -        list_add_tail(&pg->list, &heap(node, zone, j));
  59.103 +        page_list_add_tail(pg, &heap(node, zone, j));
  59.104          pg += 1 << j;
  59.105      }
  59.106      
  59.107 @@ -378,10 +399,13 @@ static struct page_info *alloc_heap_page
  59.108          /* Reference count must continuously be zero for free pages. */
  59.109          BUG_ON(pg[i].count_info != 0);
  59.110  
  59.111 -        /* Add in any extra CPUs that need flushing because of this page. */
  59.112 -        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
  59.113 -        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
  59.114 -        cpus_or(mask, mask, extra_cpus_mask);
  59.115 +        if ( pg[i].u.free.need_tlbflush )
  59.116 +        {
  59.117 +            /* Add in extra CPUs that need flushing because of this page. */
  59.118 +            cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
  59.119 +            tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
  59.120 +            cpus_or(mask, mask, extra_cpus_mask);
  59.121 +        }
  59.122  
  59.123          /* Initialise fields which have other uses for free pages. */
  59.124          pg[i].u.inuse.type_info = 0;
  59.125 @@ -404,7 +428,6 @@ static void free_heap_pages(
  59.126      unsigned long mask;
  59.127      unsigned int i, node = phys_to_nid(page_to_maddr(pg));
  59.128      unsigned int zone = page_to_zone(pg);
  59.129 -    struct domain *d;
  59.130  
  59.131      ASSERT(order <= MAX_ORDER);
  59.132      ASSERT(node >= 0);
  59.133 @@ -425,15 +448,10 @@ static void free_heap_pages(
  59.134           */
  59.135          pg[i].count_info = 0;
  59.136  
  59.137 -        if ( (d = page_get_owner(&pg[i])) != NULL )
  59.138 -        {
  59.139 +        /* If a page has no owner it will need no safety TLB flush. */
  59.140 +        pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
  59.141 +        if ( pg[i].u.free.need_tlbflush )
  59.142              pg[i].tlbflush_timestamp = tlbflush_current_time();
  59.143 -            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
  59.144 -        }
  59.145 -        else
  59.146 -        {
  59.147 -            cpus_clear(pg[i].u.free.cpumask);
  59.148 -        }
  59.149      }
  59.150  
  59.151      spin_lock(&heap_lock);
  59.152 @@ -452,8 +470,8 @@ static void free_heap_pages(
  59.153              if ( allocated_in_map(page_to_mfn(pg)-mask) ||
  59.154                   (PFN_ORDER(pg-mask) != order) )
  59.155                  break;
  59.156 -            list_del(&(pg-mask)->list);
  59.157              pg -= mask;
  59.158 +            page_list_del(pg, &heap(node, zone, order));
  59.159          }
  59.160          else
  59.161          {
  59.162 @@ -461,7 +479,7 @@ static void free_heap_pages(
  59.163              if ( allocated_in_map(page_to_mfn(pg)+mask) ||
  59.164                   (PFN_ORDER(pg+mask) != order) )
  59.165                  break;
  59.166 -            list_del(&(pg+mask)->list);
  59.167 +            page_list_del(pg + mask, &heap(node, zone, order));
  59.168          }
  59.169          
  59.170          order++;
  59.171 @@ -471,7 +489,7 @@ static void free_heap_pages(
  59.172      }
  59.173  
  59.174      PFN_ORDER(pg) = order;
  59.175 -    list_add_tail(&pg->list, &heap(node, zone, order));
  59.176 +    page_list_add_tail(pg, &heap(node, zone, order));
  59.177  
  59.178      spin_unlock(&heap_lock);
  59.179  }
  59.180 @@ -482,7 +500,6 @@ static void free_heap_pages(
  59.181   * latter is not on a MAX_ORDER boundary, then we reserve the page by
  59.182   * not freeing it to the buddy allocator.
  59.183   */
  59.184 -#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
  59.185  static void init_heap_pages(
  59.186      struct page_info *pg, unsigned long nr_pages)
  59.187  {
  59.188 @@ -491,25 +508,33 @@ static void init_heap_pages(
  59.189  
  59.190      nid_prev = phys_to_nid(page_to_maddr(pg-1));
  59.191  
  59.192 -    for ( i = 0; i < nr_pages; i++ )
  59.193 +    for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ )
  59.194      {
  59.195          nid_curr = phys_to_nid(page_to_maddr(pg+i));
  59.196  
  59.197          if ( unlikely(!avail[nid_curr]) )
  59.198 -            init_node_heap(nid_curr);
  59.199 +        {
  59.200 +            unsigned long n;
  59.201 +
  59.202 +            n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
  59.203 +            if ( n )
  59.204 +            {
  59.205 +                BUG_ON(i + n > nr_pages);
  59.206 +                i += n - 1;
  59.207 +                continue;
  59.208 +            }
  59.209 +        }
  59.210  
  59.211          /*
  59.212 -         * free pages of the same node, or if they differ, but are on a
  59.213 -         * MAX_ORDER alignement boundary (which already get reserved)
  59.214 +         * Free pages of the same node, or if they differ, but are on a
  59.215 +         * MAX_ORDER alignment boundary (which already get reserved).
  59.216           */
  59.217 -         if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
  59.218 -                                         MAX_ORDER_ALIGNED) )
  59.219 -             free_heap_pages(pg+i, 0);
  59.220 -         else
  59.221 -             printk("Reserving non-aligned node boundary @ mfn %lu\n",
  59.222 -                    page_to_mfn(pg+i));
  59.223 -
  59.224 -        nid_prev = nid_curr;
  59.225 +        if ( (nid_curr == nid_prev) ||
  59.226 +             !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) )
  59.227 +            free_heap_pages(pg+i, 0);
  59.228 +        else
  59.229 +            printk("Reserving non-aligned node boundary @ mfn %#lx\n",
  59.230 +                   page_to_mfn(pg+i));
  59.231      }
  59.232  }
  59.233  
  59.234 @@ -537,7 +562,7 @@ static unsigned long avail_heap_pages(
  59.235  #define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
  59.236  void __init end_boot_allocator(void)
  59.237  {
  59.238 -    unsigned long i;
  59.239 +    unsigned long i, nr = 0;
  59.240      int curr_free, next_free;
  59.241  
  59.242      /* Pages that are free now go to the domain sub-allocator. */
  59.243 @@ -550,8 +575,15 @@ void __init end_boot_allocator(void)
  59.244          if ( next_free )
  59.245              map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
  59.246          if ( curr_free )
  59.247 -            init_heap_pages(mfn_to_page(i), 1);
  59.248 +            ++nr;
  59.249 +        else if ( nr )
  59.250 +        {
  59.251 +            init_heap_pages(mfn_to_page(i - nr), nr);
  59.252 +            nr = 0;
  59.253 +        }
  59.254      }
  59.255 +    if ( nr )
  59.256 +        init_heap_pages(mfn_to_page(i - nr), nr);
  59.257  
  59.258      if ( !dma_bitsize && (num_online_nodes() > 1) )
  59.259      {
  59.260 @@ -786,7 +818,7 @@ int assign_pages(
  59.261          page_set_owner(&pg[i], d);
  59.262          wmb(); /* Domain pointer must be visible before updating refcnt. */
  59.263          pg[i].count_info = PGC_allocated | 1;
  59.264 -        list_add_tail(&pg[i].list, &d->page_list);
  59.265 +        page_list_add_tail(&pg[i], &d->page_list);
  59.266      }
  59.267  
  59.268      spin_unlock(&d->page_alloc_lock);
  59.269 @@ -844,7 +876,7 @@ void free_domheap_pages(struct page_info
  59.270          spin_lock_recursive(&d->page_alloc_lock);
  59.271  
  59.272          for ( i = 0; i < (1 << order); i++ )
  59.273 -            list_del(&pg[i].list);
  59.274 +            page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
  59.275  
  59.276          d->xenheap_pages -= 1 << order;
  59.277          drop_dom_ref = (d->xenheap_pages == 0);
  59.278 @@ -859,7 +891,7 @@ void free_domheap_pages(struct page_info
  59.279          for ( i = 0; i < (1 << order); i++ )
  59.280          {
  59.281              BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
  59.282 -            list_del(&pg[i].list);
  59.283 +            page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
  59.284          }
  59.285  
  59.286          d->tot_pages -= 1 << order;
  59.287 @@ -882,7 +914,7 @@ void free_domheap_pages(struct page_info
  59.288              {
  59.289                  page_set_owner(&pg[i], NULL);
  59.290                  spin_lock(&page_scrub_lock);
  59.291 -                list_add(&pg[i].list, &page_scrub_list);
  59.292 +                page_list_add(&pg[i], &page_scrub_list);
  59.293                  scrub_pages++;
  59.294                  spin_unlock(&page_scrub_lock);
  59.295              }
  59.296 @@ -965,7 +997,7 @@ static DEFINE_PER_CPU(struct timer, page
  59.297  
  59.298  static void page_scrub_softirq(void)
  59.299  {
  59.300 -    struct list_head *ent;
  59.301 +    PAGE_LIST_HEAD(list);
  59.302      struct page_info  *pg;
  59.303      void             *p;
  59.304      int               i;
  59.305 @@ -983,32 +1015,26 @@ static void page_scrub_softirq(void)
  59.306      do {
  59.307          spin_lock(&page_scrub_lock);
  59.308  
  59.309 -        if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
  59.310 +        /* Peel up to 16 pages from the list. */
  59.311 +        for ( i = 0; i < 16; i++ )
  59.312 +        {
  59.313 +            if ( !(pg = page_list_remove_head(&page_scrub_list)) )
  59.314 +                break;
  59.315 +            page_list_add_tail(pg, &list);
  59.316 +        }
  59.317 +        
  59.318 +        if ( unlikely(i == 0) )
  59.319          {
  59.320              spin_unlock(&page_scrub_lock);
  59.321              goto out;
  59.322          }
  59.323 -        
  59.324 -        /* Peel up to 16 pages from the list. */
  59.325 -        for ( i = 0; i < 16; i++ )
  59.326 -        {
  59.327 -            if ( ent->next == &page_scrub_list )
  59.328 -                break;
  59.329 -            ent = ent->next;
  59.330 -        }
  59.331 -        
  59.332 -        /* Remove peeled pages from the list. */
  59.333 -        ent->next->prev = &page_scrub_list;
  59.334 -        page_scrub_list.next = ent->next;
  59.335 -        scrub_pages -= (i+1);
  59.336 +
  59.337 +        scrub_pages -= i;
  59.338  
  59.339          spin_unlock(&page_scrub_lock);
  59.340  
  59.341 -        /* Working backwards, scrub each page in turn. */
  59.342 -        while ( ent != &page_scrub_list )
  59.343 -        {
  59.344 -            pg = list_entry(ent, struct page_info, list);
  59.345 -            ent = ent->prev;
  59.346 +        /* Scrub each page in turn. */
  59.347 +        while ( (pg = page_list_remove_head(&list)) ) {
  59.348              p = map_domain_page(page_to_mfn(pg));
  59.349              scrub_page(p);
  59.350              unmap_domain_page(p);
    60.1 --- a/xen/drivers/char/serial.c	Fri Feb 13 10:56:01 2009 +0900
    60.2 +++ b/xen/drivers/char/serial.c	Fri Feb 13 11:22:28 2009 +0900
    60.3 @@ -471,7 +471,7 @@ void serial_suspend(void)
    60.4      int i, irq;
    60.5      for ( i = 0; i < ARRAY_SIZE(com); i++ )
    60.6          if ( (irq = serial_irq(i)) >= 0 )
    60.7 -            free_irq(irq);
    60.8 +            release_irq(irq);
    60.9  }
   60.10  
   60.11  void serial_resume(void)
    61.1 --- a/xen/drivers/cpufreq/cpufreq.c	Fri Feb 13 10:56:01 2009 +0900
    61.2 +++ b/xen/drivers/cpufreq/cpufreq.c	Fri Feb 13 11:22:28 2009 +0900
    61.3 @@ -46,6 +46,9 @@
    61.4  #include <acpi/acpi.h>
    61.5  #include <acpi/cpufreq/cpufreq.h>
    61.6  
    61.7 +static unsigned int usr_max_freq, usr_min_freq;
    61.8 +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
    61.9 +
   61.10  struct cpufreq_dom {
   61.11      unsigned int	dom;
   61.12      cpumask_t		map;
   61.13 @@ -53,6 +56,7 @@ struct cpufreq_dom {
   61.14  };
   61.15  static LIST_HEAD(cpufreq_dom_list_head);
   61.16  
   61.17 +struct cpufreq_governor *cpufreq_opt_governor;
   61.18  LIST_HEAD(cpufreq_governor_list);
   61.19  
   61.20  struct cpufreq_governor *__find_governor(const char *governor)
   61.21 @@ -213,6 +217,9 @@ int cpufreq_add_cpu(unsigned int cpu)
   61.22          perf->domain_info.num_processors) {
   61.23          memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
   61.24          policy->governor = NULL;
   61.25 +
   61.26 +        cpufreq_cmdline_common_para(&new_policy);
   61.27 +
   61.28          ret = __cpufreq_set_policy(policy, &new_policy);
   61.29          if (ret) {
   61.30              if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
   61.31 @@ -467,3 +474,69 @@ out:
   61.32      return ret;
   61.33  }
   61.34  
   61.35 +static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
   61.36 +{
   61.37 +    if (usr_max_freq)
   61.38 +        new_policy->max = usr_max_freq;
   61.39 +    if (usr_min_freq)
   61.40 +        new_policy->min = usr_min_freq;
   61.41 +}
   61.42 +
   61.43 +static int __init cpufreq_handle_common_option(const char *name, const char *val)
   61.44 +{
   61.45 +    if (!strcmp(name, "maxfreq") && val) {
   61.46 +        usr_max_freq = simple_strtoul(val, NULL, 0);
   61.47 +        return 1;
   61.48 +    }
   61.49 +
   61.50 +    if (!strcmp(name, "minfreq") && val) {
   61.51 +        usr_min_freq = simple_strtoul(val, NULL, 0);
   61.52 +        return 1;
   61.53 +    }
   61.54 +
   61.55 +    return 0;
   61.56 +}
   61.57 +
   61.58 +void __init cpufreq_cmdline_parse(char *str)
   61.59 +{
   61.60 +    static struct cpufreq_governor *__initdata cpufreq_governors[] =
   61.61 +    {
   61.62 +        &cpufreq_gov_userspace,
   61.63 +        &cpufreq_gov_dbs,
   61.64 +        &cpufreq_gov_performance,
   61.65 +        &cpufreq_gov_powersave
   61.66 +    };
   61.67 +    unsigned int gov_index = 0;
   61.68 +
   61.69 +    do {
   61.70 +        char *val, *end = strchr(str, ',');
   61.71 +        unsigned int i;
   61.72 +
   61.73 +        if (end)
   61.74 +            *end++ = '\0';
   61.75 +        val = strchr(str, '=');
   61.76 +        if (val)
   61.77 +            *val++ = '\0';
   61.78 +
   61.79 +        if (!cpufreq_opt_governor) {
   61.80 +            if (!val) {
   61.81 +                for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
   61.82 +                    if (!strcmp(str, cpufreq_governors[i]->name)) {
   61.83 +                        cpufreq_opt_governor = cpufreq_governors[i];
   61.84 +                        gov_index = i;
   61.85 +                        str = NULL;
   61.86 +                        break;
   61.87 +                    }
   61.88 +                }
   61.89 +            } else {
   61.90 +                cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
   61.91 +            }
   61.92 +        }
   61.93 +
   61.94 +        if (str && !cpufreq_handle_common_option(str, val) &&
   61.95 +            cpufreq_governors[gov_index]->handle_option)
   61.96 +            cpufreq_governors[gov_index]->handle_option(str, val);
   61.97 +
   61.98 +        str = end;
   61.99 +    } while (str);
  61.100 +}
    62.1 --- a/xen/drivers/cpufreq/cpufreq_misc_governors.c	Fri Feb 13 10:56:01 2009 +0900
    62.2 +++ b/xen/drivers/cpufreq/cpufreq_misc_governors.c	Fri Feb 13 11:22:28 2009 +0900
    62.3 @@ -18,6 +18,7 @@
    62.4  #include <xen/sched.h>
    62.5  #include <acpi/cpufreq/cpufreq.h>
    62.6  
    62.7 +static unsigned int usr_speed;
    62.8  
    62.9  /*
   62.10   * cpufreq userspace governor
   62.11 @@ -26,6 +27,7 @@ static int cpufreq_governor_userspace(st
   62.12                                        unsigned int event)
   62.13  {
   62.14      int ret = 0;
   62.15 +    unsigned int freq;
   62.16  
   62.17      if (!policy)
   62.18          return -EINVAL;
   62.19 @@ -35,12 +37,17 @@ static int cpufreq_governor_userspace(st
   62.20      case CPUFREQ_GOV_STOP:
   62.21          break;
   62.22      case CPUFREQ_GOV_LIMITS:
   62.23 -        if (policy->max < policy->cur)
   62.24 +        freq = usr_speed ? : policy->cur;
   62.25 +        if (policy->max < freq)
   62.26              ret = __cpufreq_driver_target(policy, policy->max,
   62.27                          CPUFREQ_RELATION_H);
   62.28 -        else if (policy->min > policy->cur)
   62.29 +        else if (policy->min > freq)
   62.30              ret = __cpufreq_driver_target(policy, policy->min,
   62.31                          CPUFREQ_RELATION_L);
   62.32 +        else if (usr_speed)
   62.33 +            ret = __cpufreq_driver_target(policy, freq,
   62.34 +                        CPUFREQ_RELATION_L);
   62.35 +
   62.36          break;
   62.37      default:
   62.38          ret = -EINVAL;
   62.39 @@ -50,9 +57,17 @@ static int cpufreq_governor_userspace(st
   62.40      return ret;
   62.41  }
   62.42  
   62.43 +static void __init 
   62.44 +cpufreq_userspace_handle_option(const char *name, const char *val)
   62.45 +{
   62.46 +    if (!strcmp(name, "speed") && val)
   62.47 +        usr_speed = simple_strtoul(val, NULL, 0);
   62.48 +}
   62.49 +
   62.50  struct cpufreq_governor cpufreq_gov_userspace = {
   62.51      .name = "userspace",
   62.52      .governor = cpufreq_governor_userspace,
   62.53 +    .handle_option = cpufreq_userspace_handle_option
   62.54  };
   62.55  
   62.56  static int __init cpufreq_gov_userspace_init(void)
   62.57 @@ -61,7 +76,7 @@ static int __init cpufreq_gov_userspace_
   62.58  }
   62.59  __initcall(cpufreq_gov_userspace_init);
   62.60  
   62.61 -static void cpufreq_gov_userspace_exit(void)
   62.62 +static void __exit cpufreq_gov_userspace_exit(void)
   62.63  {
   62.64      cpufreq_unregister_governor(&cpufreq_gov_userspace);
   62.65  }
   62.66 @@ -106,7 +121,7 @@ static int __init cpufreq_gov_performanc
   62.67  }
   62.68  __initcall(cpufreq_gov_performance_init);
   62.69  
   62.70 -static void cpufreq_gov_performance_exit(void)
   62.71 +static void __exit cpufreq_gov_performance_exit(void)
   62.72  {
   62.73      cpufreq_unregister_governor(&cpufreq_gov_performance);
   62.74  }
   62.75 @@ -151,7 +166,7 @@ static int __init cpufreq_gov_powersave_
   62.76  }
   62.77  __initcall(cpufreq_gov_powersave_init);
   62.78  
   62.79 -static void cpufreq_gov_powersave_exit(void)
   62.80 +static void __exit cpufreq_gov_powersave_exit(void)
   62.81  {
   62.82      cpufreq_unregister_governor(&cpufreq_gov_powersave);
   62.83  }
    63.1 --- a/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Feb 13 10:56:01 2009 +0900
    63.2 +++ b/xen/drivers/cpufreq/cpufreq_ondemand.c	Fri Feb 13 11:22:28 2009 +0900
    63.3 @@ -281,9 +281,50 @@ int cpufreq_governor_dbs(struct cpufreq_
    63.4      return 0;
    63.5  }
    63.6  
    63.7 +static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
    63.8 +{
    63.9 +    if ( !strcmp(name, "rate") && val )
   63.10 +    {
   63.11 +        usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
   63.12 +    }
   63.13 +    else if ( !strcmp(name, "up_threshold") && val )
   63.14 +    {
   63.15 +        unsigned long tmp = simple_strtoul(val, NULL, 0);
   63.16 +
   63.17 +        if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
   63.18 +        {
   63.19 +            printk(XENLOG_WARNING "cpufreq/ondemand: "
   63.20 +                   "specified threshold too low, using %d\n",
   63.21 +                   MIN_FREQUENCY_UP_THRESHOLD);
   63.22 +            tmp = MIN_FREQUENCY_UP_THRESHOLD;
   63.23 +        }
   63.24 +        else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
   63.25 +        {
   63.26 +            printk(XENLOG_WARNING "cpufreq/ondemand: "
   63.27 +                   "specified threshold too high, using %d\n",
   63.28 +                   MAX_FREQUENCY_UP_THRESHOLD);
   63.29 +            tmp = MAX_FREQUENCY_UP_THRESHOLD;
   63.30 +        }
   63.31 +        dbs_tuners_ins.up_threshold = tmp;
   63.32 +    }
   63.33 +    else if ( !strcmp(name, "bias") && val )
   63.34 +    {
   63.35 +        unsigned long tmp = simple_strtoul(val, NULL, 0);
   63.36 +
   63.37 +        if ( tmp > 1000 )
   63.38 +        {
   63.39 +            printk(XENLOG_WARNING "cpufreq/ondemand: "
   63.40 +                   "specified bias too high, using 1000\n");
   63.41 +            tmp = 1000;
   63.42 +        }
   63.43 +        dbs_tuners_ins.powersave_bias = tmp;
   63.44 +    }
   63.45 +}
   63.46 +
   63.47  struct cpufreq_governor cpufreq_gov_dbs = {
   63.48      .name = "ondemand",
   63.49      .governor = cpufreq_governor_dbs,
   63.50 +    .handle_option = cpufreq_dbs_handle_option
   63.51  };
   63.52  
   63.53  static int __init cpufreq_gov_dbs_init(void)
   63.54 @@ -292,60 +333,8 @@ static int __init cpufreq_gov_dbs_init(v
   63.55  }
   63.56  __initcall(cpufreq_gov_dbs_init);
   63.57  
   63.58 -static void cpufreq_gov_dbs_exit(void)
   63.59 +static void __exit cpufreq_gov_dbs_exit(void)
   63.60  {
   63.61      cpufreq_unregister_governor(&cpufreq_gov_dbs);
   63.62  }
   63.63  __exitcall(cpufreq_gov_dbs_exit);
   63.64 -
   63.65 -void __init cpufreq_cmdline_parse(char *str)
   63.66 -{
   63.67 -    do {
   63.68 -        char *val, *end = strchr(str, ',');
   63.69 -
   63.70 -        if ( end )
   63.71 -            *end++ = '\0';
   63.72 -        val = strchr(str, '=');
   63.73 -        if ( val )
   63.74 -            *val++ = '\0';
   63.75 -
   63.76 -        if ( !strcmp(str, "rate") && val )
   63.77 -        {
   63.78 -            usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
   63.79 -        }
   63.80 -        else if ( !strcmp(str, "threshold") && val )
   63.81 -        {
   63.82 -            unsigned long tmp = simple_strtoul(val, NULL, 0);
   63.83 -
   63.84 -            if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
   63.85 -            {
   63.86 -                printk(XENLOG_WARNING "cpufreq/ondemand: "
   63.87 -                       "specified threshold too low, using %d\n",
   63.88 -                       MIN_FREQUENCY_UP_THRESHOLD);
   63.89 -                tmp = MIN_FREQUENCY_UP_THRESHOLD;
   63.90 -            }
   63.91 -            else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
   63.92 -            {
   63.93 -                printk(XENLOG_WARNING "cpufreq/ondemand: "
   63.94 -                       "specified threshold too high, using %d\n",
   63.95 -                       MAX_FREQUENCY_UP_THRESHOLD);
   63.96 -                tmp = MAX_FREQUENCY_UP_THRESHOLD;
   63.97 -            }
   63.98 -            dbs_tuners_ins.up_threshold = tmp;
   63.99 -        }
  63.100 -        else if ( !strcmp(str, "bias") && val )
  63.101 -        {
  63.102 -            unsigned long tmp = simple_strtoul(val, NULL, 0);
  63.103 -
  63.104 -            if ( tmp > 1000 )
  63.105 -            {
  63.106 -                printk(XENLOG_WARNING "cpufreq/ondemand: "
  63.107 -                       "specified bias too high, using 1000\n");
  63.108 -                tmp = 1000;
  63.109 -            }
  63.110 -            dbs_tuners_ins.powersave_bias = tmp;
  63.111 -        }
  63.112 -
  63.113 -        str = end;
  63.114 -    } while ( str );
  63.115 -}
    64.1 --- a/xen/drivers/passthrough/amd/iommu_init.c	Fri Feb 13 10:56:01 2009 +0900
    64.2 +++ b/xen/drivers/passthrough/amd/iommu_init.c	Fri Feb 13 11:22:28 2009 +0900
    64.3 @@ -479,26 +479,27 @@ static int set_iommu_interrupt_handler(s
    64.4  {
    64.5      int vector, ret;
    64.6  
    64.7 -    vector = assign_irq_vector(AUTO_ASSIGN);
    64.8 -    vector_to_iommu[vector] = iommu;
    64.9 -
   64.10 -    /* make irq == vector */
   64.11 -    irq_vector[vector] = vector;
   64.12 -    vector_irq[vector] = vector;
   64.13 -
   64.14 -    if ( !vector )
   64.15 +    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
   64.16 +    if ( vector <= 0 )
   64.17      {
   64.18 -        amd_iov_error("no vectors\n");
   64.19 +        gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
   64.20          return 0;
   64.21      }
   64.22  
   64.23      irq_desc[vector].handler = &iommu_msi_type;
   64.24 -    ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
   64.25 +    ret = request_irq_vector(vector, amd_iommu_page_fault, 0,
   64.26 +                             "amd_iommu", iommu);
   64.27      if ( ret )
   64.28      {
   64.29 +        irq_desc[vector].handler = &no_irq_type;
   64.30 +        free_irq_vector(vector);
   64.31          amd_iov_error("can't request irq\n");
   64.32          return 0;
   64.33      }
   64.34 +
   64.35 +    /* Make sure that vector is never re-used. */
   64.36 +    vector_irq[vector] = NEVER_ASSIGN_IRQ;
   64.37 +    vector_to_iommu[vector] = iommu;
   64.38      iommu->vector = vector;
   64.39      return vector;
   64.40  }
    65.1 --- a/xen/drivers/passthrough/amd/iommu_map.c	Fri Feb 13 10:56:01 2009 +0900
    65.2 +++ b/xen/drivers/passthrough/amd/iommu_map.c	Fri Feb 13 11:22:28 2009 +0900
    65.3 @@ -461,8 +461,8 @@ int amd_iommu_map_page(struct domain *d,
    65.4      iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
    65.5      if ( iommu_l2e == 0 )
    65.6      {
    65.7 +        spin_unlock_irqrestore(&hd->mapping_lock, flags);
    65.8          amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
    65.9 -        spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.10          return -EFAULT;
   65.11      }
   65.12      set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
   65.13 @@ -493,8 +493,8 @@ int amd_iommu_unmap_page(struct domain *
   65.14  
   65.15      if ( iommu_l2e == 0 )
   65.16      {
   65.17 +        spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.18          amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
   65.19 -        spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.20          return -EFAULT;
   65.21      }
   65.22  
   65.23 @@ -533,9 +533,9 @@ int amd_iommu_reserve_domain_unity_map(
   65.24  
   65.25          if ( iommu_l2e == 0 )
   65.26          {
   65.27 -            amd_iov_error(
   65.28 -            "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
   65.29              spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.30 +            amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n",
   65.31 +                          phys_addr);
   65.32              return -EFAULT;
   65.33          }
   65.34  
   65.35 @@ -552,7 +552,6 @@ int amd_iommu_sync_p2m(struct domain *d)
   65.36  {
   65.37      unsigned long mfn, gfn, flags;
   65.38      u64 iommu_l2e;
   65.39 -    struct list_head *entry;
   65.40      struct page_info *page;
   65.41      struct hvm_iommu *hd;
   65.42      int iw = IOMMU_IO_WRITE_ENABLED;
   65.43 @@ -568,10 +567,10 @@ int amd_iommu_sync_p2m(struct domain *d)
   65.44      if ( hd->p2m_synchronized )
   65.45          goto out;
   65.46  
   65.47 -    for ( entry = d->page_list.next; entry != &d->page_list;
   65.48 -            entry = entry->next )
   65.49 +    spin_lock(&d->page_alloc_lock);
   65.50 +
   65.51 +    page_list_for_each ( page, &d->page_list )
   65.52      {
   65.53 -        page = list_entry(entry, struct page_info, list);
   65.54          mfn = page_to_mfn(page);
   65.55          gfn = get_gpfn_from_mfn(mfn);
   65.56  
   65.57 @@ -582,14 +581,17 @@ int amd_iommu_sync_p2m(struct domain *d)
   65.58  
   65.59          if ( iommu_l2e == 0 )
   65.60          {
   65.61 +            spin_unlock(&d->page_alloc_lock);
   65.62 +            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.63              amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
   65.64 -            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   65.65              return -EFAULT;
   65.66          }
   65.67  
   65.68          set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
   65.69      }
   65.70  
   65.71 +    spin_unlock(&d->page_alloc_lock);
   65.72 +
   65.73      hd->p2m_synchronized = 1;
   65.74  
   65.75  out:
    66.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Feb 13 10:56:01 2009 +0900
    66.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Feb 13 11:22:28 2009 +0900
    66.3 @@ -23,7 +23,6 @@
    66.4  #include <xen/pci_regs.h>
    66.5  #include <asm/amd-iommu.h>
    66.6  #include <asm/hvm/svm/amd-iommu-proto.h>
    66.7 -#include <asm/mm.h>
    66.8  
    66.9  extern unsigned short ivrs_bdf_entries;
   66.10  extern struct ivrs_mappings *ivrs_mappings;
    67.1 --- a/xen/drivers/passthrough/io.c	Fri Feb 13 10:56:01 2009 +0900
    67.2 +++ b/xen/drivers/passthrough/io.c	Fri Feb 13 11:22:28 2009 +0900
    67.3 @@ -87,8 +87,8 @@ int pt_irq_create_bind_vtd(
    67.4  
    67.5          if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
    67.6          {
    67.7 +            spin_unlock(&d->event_lock);
    67.8              xfree(hvm_irq_dpci);
    67.9 -            spin_unlock(&d->event_lock);
   67.10              return -EINVAL;
   67.11          }
   67.12      }
    68.1 --- a/xen/drivers/passthrough/iommu.c	Fri Feb 13 10:56:01 2009 +0900
    68.2 +++ b/xen/drivers/passthrough/iommu.c	Fri Feb 13 11:22:28 2009 +0900
    68.3 @@ -33,6 +33,8 @@ int amd_iov_detect(void);
    68.4   *   no-pv                      Disable IOMMU for PV domains (default)
    68.5   *   force|required             Don't boot unless IOMMU is enabled
    68.6   *   passthrough                Bypass VT-d translation for Dom0
    68.7 + *   snoop                      Utilize the snoop control for IOMMU (default)
    68.8 + *   no-snoop                   Dont utilize the snoop control for IOMMU
    68.9   */
   68.10  custom_param("iommu", parse_iommu_param);
   68.11  int iommu_enabled = 0;
   68.12 @@ -45,6 +47,7 @@ static void __init parse_iommu_param(cha
   68.13  {
   68.14      char *ss;
   68.15      iommu_enabled = 1;
   68.16 +    iommu_snoop = 1;
   68.17  
   68.18      do {
   68.19          ss = strchr(s, ',');
   68.20 @@ -62,6 +65,10 @@ static void __init parse_iommu_param(cha
   68.21              force_iommu = 1;
   68.22          else if ( !strcmp(s, "passthrough") )
   68.23              iommu_passthrough = 1;
   68.24 +        else if ( !strcmp(s, "snoop") )
   68.25 +            iommu_snoop = 1;
   68.26 +        else if ( !strcmp(s, "no-snoop") )
   68.27 +            iommu_snoop = 0;
   68.28  
   68.29          s = ss + 1;
   68.30      } while ( ss );
   68.31 @@ -141,7 +148,7 @@ static int iommu_populate_page_table(str
   68.32  
   68.33      spin_lock(&d->page_alloc_lock);
   68.34  
   68.35 -    list_for_each_entry ( page, &d->page_list, list )
   68.36 +    page_list_for_each ( page, &d->page_list )
   68.37      {
   68.38          if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
   68.39          {
    69.1 --- a/xen/drivers/passthrough/vtd/dmar.c	Fri Feb 13 10:56:01 2009 +0900
    69.2 +++ b/xen/drivers/passthrough/vtd/dmar.c	Fri Feb 13 11:22:28 2009 +0900
    69.3 @@ -21,6 +21,7 @@
    69.4  
    69.5  #include <xen/init.h>
    69.6  #include <xen/bitmap.h>
    69.7 +#include <xen/errno.h>
    69.8  #include <xen/kernel.h>
    69.9  #include <xen/acpi.h>
   69.10  #include <xen/mm.h>
   69.11 @@ -518,8 +519,6 @@ static int __init acpi_parse_dmar(struct
   69.12  int acpi_dmar_init(void)
   69.13  {
   69.14      int rc;
   69.15 -    struct acpi_drhd_unit *drhd;
   69.16 -    struct iommu *iommu;
   69.17  
   69.18      rc = -ENODEV;
   69.19      if ( force_iommu )
   69.20 @@ -536,20 +535,7 @@ int acpi_dmar_init(void)
   69.21      if ( list_empty(&acpi_drhd_units) )
   69.22          goto fail;
   69.23  
   69.24 -    /* Giving that all devices within guest use same io page table,
   69.25 -     * enable snoop control only if all VT-d engines support it.
   69.26 -     */
   69.27 -    iommu_snoop = 1;
   69.28 -    for_each_drhd_unit ( drhd )
   69.29 -    {
   69.30 -        iommu = drhd->iommu;
   69.31 -        if ( !ecap_snp_ctl(iommu->ecap) ) {
   69.32 -            iommu_snoop = 0;
   69.33 -            break;
   69.34 -        }
   69.35 -    }
   69.36 -
   69.37 -    printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop);
   69.38 +    printk("Intel VT-d has been enabled\n");
   69.39  
   69.40      return 0;
   69.41  
    70.1 --- a/xen/drivers/passthrough/vtd/ia64/vtd.c	Fri Feb 13 10:56:01 2009 +0900
    70.2 +++ b/xen/drivers/passthrough/vtd/ia64/vtd.c	Fri Feb 13 11:22:28 2009 +0900
    70.3 @@ -29,7 +29,9 @@
    70.4  #include "../vtd.h"
    70.5  
    70.6  
    70.7 -int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
    70.8 +int vector_irq[NR_VECTORS] __read_mostly = {
    70.9 +    [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
   70.10 +};
   70.11  /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
   70.12  u8 irq_vector[NR_IRQS] __read_mostly;
   70.13  
   70.14 @@ -45,18 +47,19 @@ void unmap_vtd_domain_page(void *va)
   70.15  }
   70.16  
   70.17  /* Allocate page table, return its machine address */
   70.18 -u64 alloc_pgtable_maddr(struct domain *d)
   70.19 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
   70.20  {
   70.21      struct page_info *pg;
   70.22      u64 *vaddr;
   70.23  
   70.24 -    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
   70.25 +    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
   70.26 +                             d ? MEMF_node(domain_to_node(d)) : 0);
   70.27      vaddr = map_domain_page(page_to_mfn(pg));
   70.28      if ( !vaddr )
   70.29          return 0;
   70.30 -    memset(vaddr, 0, PAGE_SIZE);
   70.31 +    memset(vaddr, 0, PAGE_SIZE * npages);
   70.32  
   70.33 -    iommu_flush_cache_page(vaddr);
   70.34 +    iommu_flush_cache_page(vaddr, npages);
   70.35      unmap_domain_page(vaddr);
   70.36  
   70.37      return page_to_maddr(pg);
    71.1 --- a/xen/drivers/passthrough/vtd/intremap.c	Fri Feb 13 10:56:01 2009 +0900
    71.2 +++ b/xen/drivers/passthrough/vtd/intremap.c	Fri Feb 13 11:22:28 2009 +0900
    71.3 @@ -502,7 +502,7 @@ int intremap_setup(struct iommu *iommu)
    71.4      ir_ctrl = iommu_ir_ctrl(iommu);
    71.5      if ( ir_ctrl->iremap_maddr == 0 )
    71.6      {
    71.7 -        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL);
    71.8 +        ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1);
    71.9          if ( ir_ctrl->iremap_maddr == 0 )
   71.10          {
   71.11              dprintk(XENLOG_WARNING VTDPREFIX,
    72.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Fri Feb 13 10:56:01 2009 +0900
    72.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Fri Feb 13 11:22:28 2009 +0900
    72.3 @@ -129,9 +129,9 @@ void iommu_flush_cache_entry(void *addr)
    72.4      __iommu_flush_cache(addr, 8);
    72.5  }
    72.6  
    72.7 -void iommu_flush_cache_page(void *addr)
    72.8 +void iommu_flush_cache_page(void *addr, unsigned long npages)
    72.9  {
   72.10 -    __iommu_flush_cache(addr, PAGE_SIZE_4K);
   72.11 +    __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
   72.12  }
   72.13  
   72.14  int nr_iommus;
   72.15 @@ -146,7 +146,7 @@ static u64 bus_to_context_maddr(struct i
   72.16      root = &root_entries[bus];
   72.17      if ( !root_present(*root) )
   72.18      {
   72.19 -        maddr = alloc_pgtable_maddr(NULL);
   72.20 +        maddr = alloc_pgtable_maddr(NULL, 1);
   72.21          if ( maddr == 0 )
   72.22          {
   72.23              unmap_vtd_domain_page(root_entries);
   72.24 @@ -174,7 +174,7 @@ static u64 addr_to_dma_page_maddr(struct
   72.25      addr &= (((u64)1) << addr_width) - 1;
   72.26      ASSERT(spin_is_locked(&hd->mapping_lock));
   72.27      if ( hd->pgd_maddr == 0 )
   72.28 -        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) )
   72.29 +        if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) )
   72.30              goto out;
   72.31  
   72.32      parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
   72.33 @@ -187,7 +187,7 @@ static u64 addr_to_dma_page_maddr(struct
   72.34          {
   72.35              if ( !alloc )
   72.36                  break;
   72.37 -            maddr = alloc_pgtable_maddr(domain);
   72.38 +            maddr = alloc_pgtable_maddr(domain, 1);
   72.39              if ( !maddr )
   72.40                  break;
   72.41              dma_set_pte_addr(*pte, maddr);
   72.42 @@ -577,7 +577,7 @@ static int iommu_set_root_entry(struct i
   72.43      spin_lock(&iommu->lock);
   72.44  
   72.45      if ( iommu->root_maddr == 0 )
   72.46 -        iommu->root_maddr = alloc_pgtable_maddr(NULL);
   72.47 +        iommu->root_maddr = alloc_pgtable_maddr(NULL, 1);
   72.48      if ( iommu->root_maddr == 0 )
   72.49      {
   72.50          spin_unlock(&iommu->lock);
   72.51 @@ -874,23 +874,27 @@ int iommu_set_interrupt(struct iommu *io
   72.52  {
   72.53      int vector, ret;
   72.54  
   72.55 -    vector = assign_irq_vector(AUTO_ASSIGN);
   72.56 -    vector_to_iommu[vector] = iommu;
   72.57 -
   72.58 -    /* VT-d fault is a MSI, make irq == vector */
   72.59 -    irq_vector[vector] = vector;
   72.60 -    vector_irq[vector] = vector;
   72.61 -
   72.62 -    if ( !vector )
   72.63 +    vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
   72.64 +    if ( vector <= 0 )
   72.65      {
   72.66          gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
   72.67          return -EINVAL;
   72.68      }
   72.69  
   72.70      irq_desc[vector].handler = &dma_msi_type;
   72.71 -    ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
   72.72 +    ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu);
   72.73      if ( ret )
   72.74 +    {
   72.75 +        irq_desc[vector].handler = &no_irq_type;
   72.76 +        free_irq_vector(vector);
   72.77          gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
   72.78 +        return ret;
   72.79 +    }
   72.80 +
   72.81 +    /* Make sure that vector is never re-used. */
   72.82 +    vector_irq[vector] = NEVER_ASSIGN_IRQ;
   72.83 +    vector_to_iommu[vector] = iommu;
   72.84 +
   72.85      return vector;
   72.86  }
   72.87  
   72.88 @@ -966,7 +970,7 @@ static void iommu_free(struct acpi_drhd_
   72.89          iounmap(iommu->reg);
   72.90  
   72.91      free_intel_iommu(iommu->intel);
   72.92 -    free_irq(iommu->vector);
   72.93 +    release_irq_vector(iommu->vector);
   72.94      xfree(iommu);
   72.95  
   72.96      drhd->iommu = NULL;
   72.97 @@ -1677,6 +1681,11 @@ static int init_vtd_hw(void)
   72.98          }
   72.99  
  72.100          vector = iommu_set_interrupt(iommu);
  72.101 +        if ( vector < 0 )
  72.102 +        {
  72.103 +            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
  72.104 +            return vector;
  72.105 +        }
  72.106          dma_msi_data_init(iommu, vector);
  72.107          dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
  72.108          iommu->vector = vector;
  72.109 @@ -1756,6 +1765,23 @@ int intel_vtd_setup(void)
  72.110      if ( init_vtd_hw() )
  72.111          goto error;
  72.112  
  72.113 +    /* Giving that all devices within guest use same io page table,
  72.114 +     * enable snoop control only if all VT-d engines support it.
  72.115 +     */
  72.116 +
  72.117 +    if ( iommu_snoop )
  72.118 +    {
  72.119 +        for_each_drhd_unit ( drhd )
  72.120 +        {
  72.121 +            iommu = drhd->iommu;
  72.122 +            if ( !ecap_snp_ctl(iommu->ecap) ) {
  72.123 +                iommu_snoop = 0;
  72.124 +                break;
  72.125 +            }
  72.126 +        }
  72.127 +    }
  72.128 +    
  72.129 +    printk("Intel VT-d snoop control %sabled\n", iommu_snoop ? "en" : "dis");
  72.130      register_keyhandler('V', dump_iommu_info, "dump iommu info");
  72.131  
  72.132      return 0;
  72.133 @@ -1764,6 +1790,7 @@ int intel_vtd_setup(void)
  72.134      for_each_drhd_unit ( drhd )
  72.135          iommu_free(drhd);
  72.136      vtd_enabled = 0;
  72.137 +    iommu_snoop = 0;
  72.138      return -ENOMEM;
  72.139  }
  72.140  
    73.1 --- a/xen/drivers/passthrough/vtd/iommu.h	Fri Feb 13 10:56:01 2009 +0900
    73.2 +++ b/xen/drivers/passthrough/vtd/iommu.h	Fri Feb 13 11:22:28 2009 +0900
    73.3 @@ -397,7 +397,9 @@ struct poll_info {
    73.4      u32 udata;
    73.5  };
    73.6  
    73.7 -#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
    73.8 +#define MAX_QINVAL_PAGES 8
    73.9 +#define NUM_QINVAL_PAGES 1
   73.10 +#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct qinval_entry))
   73.11  #define qinval_present(v) ((v).lo & 1)
   73.12  #define qinval_fault_disable(v) (((v).lo >> 1) & 1)
   73.13  
    74.1 --- a/xen/drivers/passthrough/vtd/qinval.c	Fri Feb 13 10:56:01 2009 +0900
    74.2 +++ b/xen/drivers/passthrough/vtd/qinval.c	Fri Feb 13 11:22:28 2009 +0900
    74.3 @@ -427,7 +427,7 @@ int qinval_setup(struct iommu *iommu)
    74.4  
    74.5      if ( qi_ctrl->qinval_maddr == 0 )
    74.6      {
    74.7 -        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL);
    74.8 +        qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES);
    74.9          if ( qi_ctrl->qinval_maddr == 0 )
   74.10          {
   74.11              dprintk(XENLOG_WARNING VTDPREFIX,
   74.12 @@ -445,6 +445,8 @@ int qinval_setup(struct iommu *iommu)
   74.13       * registers are automatically reset to 0 with write
   74.14       * to IQA register.
   74.15       */
   74.16 +    if ( NUM_QINVAL_PAGES <= MAX_QINVAL_PAGES )
   74.17 +        qi_ctrl->qinval_maddr |= NUM_QINVAL_PAGES - 1;
   74.18      dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
   74.19  
   74.20      /* enable queued invalidation hardware */
    75.1 --- a/xen/drivers/passthrough/vtd/vtd.h	Fri Feb 13 10:56:01 2009 +0900
    75.2 +++ b/xen/drivers/passthrough/vtd/vtd.h	Fri Feb 13 11:22:28 2009 +0900
    75.3 @@ -101,12 +101,12 @@ unsigned int get_cache_line_size(void);
    75.4  void cacheline_flush(char *);
    75.5  void flush_all_cache(void);
    75.6  void *map_to_nocache_virt(int nr_iommus, u64 maddr);
    75.7 -u64 alloc_pgtable_maddr(struct domain *d);
    75.8 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages);
    75.9  void free_pgtable_maddr(u64 maddr);
   75.10  void *map_vtd_domain_page(u64 maddr);
   75.11  void unmap_vtd_domain_page(void *va);
   75.12  
   75.13  void iommu_flush_cache_entry(void *addr);
   75.14 -void iommu_flush_cache_page(void *addr);
   75.15 +void iommu_flush_cache_page(void *addr, unsigned long npages);
   75.16  
   75.17  #endif // _VTD_H_
    76.1 --- a/xen/drivers/passthrough/vtd/x86/vtd.c	Fri Feb 13 10:56:01 2009 +0900
    76.2 +++ b/xen/drivers/passthrough/vtd/x86/vtd.c	Fri Feb 13 11:22:28 2009 +0900
    76.3 @@ -38,20 +38,21 @@ void unmap_vtd_domain_page(void *va)
    76.4  }
    76.5  
    76.6  /* Allocate page table, return its machine address */
    76.7 -u64 alloc_pgtable_maddr(struct domain *d)
    76.8 +u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
    76.9  {
   76.10      struct page_info *pg;
   76.11      u64 *vaddr;
   76.12      unsigned long mfn;
   76.13  
   76.14 -    pg = alloc_domheap_page(NULL, d ? MEMF_node(domain_to_node(d)) : 0);
   76.15 +    pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
   76.16 +                             d ? MEMF_node(domain_to_node(d)) : 0);
   76.17      if ( !pg )
   76.18          return 0;
   76.19      mfn = page_to_mfn(pg);
   76.20      vaddr = map_domain_page(mfn);
   76.21 -    memset(vaddr, 0, PAGE_SIZE);
   76.22 +    memset(vaddr, 0, PAGE_SIZE * npages);
   76.23  
   76.24 -    iommu_flush_cache_page(vaddr);
   76.25 +    iommu_flush_cache_page(vaddr, npages);
   76.26      unmap_domain_page(vaddr);
   76.27  
   76.28      return (u64)mfn << PAGE_SHIFT_4K;
    77.1 --- a/xen/include/acpi/cpufreq/cpufreq.h	Fri Feb 13 10:56:01 2009 +0900
    77.2 +++ b/xen/include/acpi/cpufreq/cpufreq.h	Fri Feb 13 11:22:28 2009 +0900
    77.3 @@ -87,6 +87,7 @@ struct cpufreq_governor {
    77.4      char    name[CPUFREQ_NAME_LEN];
    77.5      int     (*governor)(struct cpufreq_policy *policy,
    77.6                          unsigned int event);
    77.7 +    void    (*handle_option)(const char *name, const char *value);
    77.8      struct list_head governor_list;
    77.9  };
   77.10  
    78.1 --- a/xen/include/asm-ia64/hardirq.h	Fri Feb 13 10:56:01 2009 +0900
    78.2 +++ b/xen/include/asm-ia64/hardirq.h	Fri Feb 13 11:22:28 2009 +0900
    78.3 @@ -4,6 +4,7 @@
    78.4  #define __ARCH_IRQ_STAT	1
    78.5  #define HARDIRQ_BITS	14
    78.6  #include <linux/hardirq.h>
    78.7 +#include <xen/sched.h>
    78.8  
    78.9  #define local_softirq_pending()		(local_cpu_data->softirq_pending)
   78.10  
    79.1 --- a/xen/include/asm-ia64/hvm/iommu.h	Fri Feb 13 10:56:01 2009 +0900
    79.2 +++ b/xen/include/asm-ia64/hvm/iommu.h	Fri Feb 13 11:22:28 2009 +0900
    79.3 @@ -28,7 +28,6 @@ static inline void pci_cleanup_msi(struc
    79.4      /* TODO */
    79.5  }
    79.6  
    79.7 -#define AUTO_ASSIGN         -1
    79.8  
    79.9  extern int assign_irq_vector (int irq);
   79.10  
    80.1 --- a/xen/include/asm-ia64/hvm/irq.h	Fri Feb 13 10:56:01 2009 +0900
    80.2 +++ b/xen/include/asm-ia64/hvm/irq.h	Fri Feb 13 11:22:28 2009 +0900
    80.3 @@ -90,14 +90,18 @@ struct hvm_irq {
    80.4  #define hvm_pci_intx_link(dev, intx) \
    80.5      (((dev) + (intx)) & 3)
    80.6  
    80.7 -/* Extract the IA-64 vector that corresponds to IRQ.  */
    80.8 -static inline int
    80.9 -irq_to_vector (int irq)
   80.10 +#define IA64_INVALID_VECTOR	((unsigned int)((int)-1))
   80.11 +static inline unsigned int irq_to_vector(int irq)
   80.12  {
   80.13 -    return irq;
   80.14 +    int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
   80.15 +    unsigned int vector;
   80.16 +
   80.17 +    if ( acpi_gsi_to_irq(irq, &vector) < 0)
   80.18 +        return 0;
   80.19 +
   80.20 +    return vector;
   80.21  }
   80.22  
   80.23 -
   80.24  extern u8 irq_vector[NR_IRQS];
   80.25  extern int vector_irq[NR_VECTORS];
   80.26  
    81.1 --- a/xen/include/asm-ia64/linux-xen/asm/smp.h	Fri Feb 13 10:56:01 2009 +0900
    81.2 +++ b/xen/include/asm-ia64/linux-xen/asm/smp.h	Fri Feb 13 11:22:28 2009 +0900
    81.3 @@ -47,7 +47,6 @@ ia64_get_lid (void)
    81.4  #define SMP_IPI_REDIRECTION	(1 << 1)
    81.5  
    81.6  #ifdef XEN
    81.7 -#include <xen/sched.h>
    81.8  #define raw_smp_processor_id() (current->processor)
    81.9  #else
   81.10  #define raw_smp_processor_id() (current_thread_info()->cpu)
    82.1 --- a/xen/include/asm-ia64/linux-xen/linux/interrupt.h	Fri Feb 13 10:56:01 2009 +0900
    82.2 +++ b/xen/include/asm-ia64/linux-xen/linux/interrupt.h	Fri Feb 13 11:22:28 2009 +0900
    82.3 @@ -52,10 +52,10 @@ struct irqaction {
    82.4  };
    82.5  
    82.6  extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
    82.7 -extern int request_irq(unsigned int,
    82.8 +extern int request_irq_vector(unsigned int,
    82.9  		       irqreturn_t (*handler)(int, void *, struct pt_regs *),
   82.10  		       unsigned long, const char *, void *);
   82.11 -extern void free_irq(unsigned int, void *);
   82.12 +extern void release_irq_vector(unsigned int, void *);
   82.13  #endif
   82.14  
   82.15  
    83.1 --- a/xen/include/asm-ia64/linux/asm/hw_irq.h	Fri Feb 13 10:56:01 2009 +0900
    83.2 +++ b/xen/include/asm-ia64/linux/asm/hw_irq.h	Fri Feb 13 11:22:28 2009 +0900
    83.3 @@ -34,7 +34,7 @@ typedef u8 ia64_vector;
    83.4  #define IA64_MAX_VECTORED_IRQ		255
    83.5  #define IA64_NUM_VECTORS		256
    83.6  
    83.7 -#define AUTO_ASSIGN			-1
    83.8 +#define AUTO_ASSIGN_IRQ			(-1)
    83.9  
   83.10  #define IA64_SPURIOUS_INT_VECTOR	0x0f
   83.11  
    84.1 --- a/xen/include/asm-ia64/mm.h	Fri Feb 13 10:56:01 2009 +0900
    84.2 +++ b/xen/include/asm-ia64/mm.h	Fri Feb 13 11:22:28 2009 +0900
    84.3 @@ -13,7 +13,6 @@
    84.4  #include <xen/list.h>
    84.5  #include <xen/spinlock.h>
    84.6  #include <xen/perfc.h>
    84.7 -#include <xen/sched.h>
    84.8  
    84.9  #include <asm/processor.h>
   84.10  #include <asm/atomic.h>
   84.11 @@ -63,21 +62,14 @@ struct page_info
   84.12          struct {
   84.13              /* Order-size of the free chunk this page is the head of. */
   84.14              u32 order;
   84.15 -            /* Mask of possibly-tainted TLBs. */
   84.16 -            cpumask_t cpumask;
   84.17 +            /* Do TLBs need flushing for safety before next page use? */
   84.18 +            bool_t need_tlbflush;
   84.19          } free;
   84.20  
   84.21      } u;
   84.22  
   84.23      /* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
   84.24      u32 tlbflush_timestamp;
   84.25 -
   84.26 -#if 0
   84.27 -// following added for Linux compiling
   84.28 -    page_flags_t flags;
   84.29 -    atomic_t _count;
   84.30 -    struct list_head lru;	// is this the same as above "list"?
   84.31 -#endif
   84.32  };
   84.33  
   84.34  #define set_page_count(p,v) 	atomic_set(&(p)->_count, v - 1)
    85.1 --- a/xen/include/asm-ia64/tlbflush.h	Fri Feb 13 10:56:01 2009 +0900
    85.2 +++ b/xen/include/asm-ia64/tlbflush.h	Fri Feb 13 11:22:28 2009 +0900
    85.3 @@ -1,7 +1,8 @@
    85.4  #ifndef __FLUSHTLB_H__
    85.5  #define __FLUSHTLB_H__
    85.6  
    85.7 -#include <xen/sched.h>
    85.8 +struct vcpu;
    85.9 +struct domain;
   85.10  
   85.11  /* TLB flushes can be either local (current vcpu only) or domain wide (on
   85.12     all vcpus).
    86.1 --- a/xen/include/asm-x86/domain.h	Fri Feb 13 10:56:01 2009 +0900
    86.2 +++ b/xen/include/asm-x86/domain.h	Fri Feb 13 11:22:28 2009 +0900
    86.3 @@ -79,11 +79,11 @@ struct shadow_domain {
    86.4      int               locker; /* processor which holds the lock */
    86.5      const char       *locker_function; /* Func that took it */
    86.6      unsigned int      opt_flags;    /* runtime tunable optimizations on/off */
    86.7 -    struct list_head  pinned_shadows;
    86.8 +    struct page_list_head pinned_shadows;
    86.9  
   86.10      /* Memory allocation */
   86.11 -    struct list_head  freelists[SHADOW_MAX_ORDER + 1];
   86.12 -    struct list_head  p2m_freelist;
   86.13 +    struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
   86.14 +    struct page_list_head p2m_freelist;
   86.15      unsigned int      total_pages;  /* number of pages allocated */
   86.16      unsigned int      free_pages;   /* number of pages on freelists */
   86.17      unsigned int      p2m_pages;    /* number of pages allocates to p2m */
   86.18 @@ -92,7 +92,7 @@ struct shadow_domain {
   86.19      pagetable_t unpaged_pagetable;
   86.20  
   86.21      /* Shadow hashtable */
   86.22 -    struct shadow_page_info **hash_table;
   86.23 +    struct page_info **hash_table;
   86.24      int hash_walking;  /* Some function is walking the hash table */
   86.25  
   86.26      /* Fast MMIO path heuristic */
   86.27 @@ -143,7 +143,7 @@ struct hap_domain {
   86.28      int               locker;
   86.29      const char       *locker_function;
   86.30  
   86.31 -    struct list_head  freelist;
   86.32 +    struct page_list_head freelist;
   86.33      unsigned int      total_pages;  /* number of pages allocated */
   86.34      unsigned int      free_pages;   /* number of pages on freelists */
   86.35      unsigned int      p2m_pages;    /* number of pages allocates to p2m */
   86.36 @@ -265,7 +265,7 @@ struct arch_domain
   86.37          RELMEM_l2,
   86.38          RELMEM_done,
   86.39      } relmem;
   86.40 -    struct list_head relmem_list;
   86.41 +    struct page_list_head relmem_list;
   86.42  
   86.43      cpuid_input_t cpuids[MAX_CPUID_INPUT];
   86.44  } __cacheline_aligned;
   86.45 @@ -352,6 +352,7 @@ struct arch_vcpu
   86.46  
   86.47      /* Current LDT details. */
   86.48      unsigned long shadow_ldt_mapcnt;
   86.49 +    spinlock_t shadow_ldt_lock;
   86.50  
   86.51      struct paging_vcpu paging;
   86.52  
    87.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Fri Feb 13 10:56:01 2009 +0900
    87.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Fri Feb 13 11:22:28 2009 +0900
    87.3 @@ -48,7 +48,11 @@ typedef union {
    87.4  #define EPTE_SUPER_PAGE_MASK    0x80
    87.5  #define EPTE_MFN_MASK           0x1fffffffffff000
    87.6  #define EPTE_AVAIL1_MASK        0xF00
    87.7 -#define EPTE_EMT_MASK           0x78
    87.8 +#define EPTE_EMT_MASK           0x38
    87.9 +#define EPTE_IGMT_MASK          0x40
   87.10 +#define EPTE_AVAIL1_SHIFT       8
   87.11 +#define EPTE_EMT_SHIFT          3
   87.12 +#define EPTE_IGMT_SHIFT         6
   87.13  
   87.14  void vmx_asm_vmexit_handler(struct cpu_user_regs);
   87.15  void vmx_asm_do_vmentry(void);
    88.1 --- a/xen/include/asm-x86/iocap.h	Fri Feb 13 10:56:01 2009 +0900
    88.2 +++ b/xen/include/asm-x86/iocap.h	Fri Feb 13 11:22:28 2009 +0900
    88.3 @@ -14,7 +14,8 @@
    88.4  #define ioports_access_permitted(d, s, e)               \
    88.5      rangeset_contains_range((d)->arch.ioport_caps, s, e)
    88.6  
    88.7 -#define cache_flush_permitted(d)                       \
    88.8 -    (!rangeset_is_empty((d)->iomem_caps))
    88.9 +#define cache_flush_permitted(d)                        \
   88.10 +    (!rangeset_is_empty((d)->iomem_caps) ||             \
   88.11 +     !rangeset_is_empty((d)->arch.ioport_caps))
   88.12  
   88.13  #endif /* __X86_IOCAP_H__ */
    89.1 --- a/xen/include/asm-x86/irq.h	Fri Feb 13 10:56:01 2009 +0900
    89.2 +++ b/xen/include/asm-x86/irq.h	Fri Feb 13 11:22:28 2009 +0900
    89.3 @@ -19,9 +19,6 @@
    89.4  
    89.5  extern int vector_irq[NR_VECTORS];
    89.6  extern u8 irq_vector[NR_IRQS];
    89.7 -#define AUTO_ASSIGN    -1
    89.8 -#define NEVER_ASSIGN   -2
    89.9 -#define FREE_TO_ASSIGN -3
   89.10  
   89.11  #define platform_legacy_irq(irq)	((irq) < 16)
   89.12  
    90.1 --- a/xen/include/asm-x86/mm.h	Fri Feb 13 10:56:01 2009 +0900
    90.2 +++ b/xen/include/asm-x86/mm.h	Fri Feb 13 11:22:28 2009 +0900
    90.3 @@ -12,15 +12,40 @@
    90.4   * Per-page-frame information.
    90.5   * 
    90.6   * Every architecture must ensure the following:
    90.7 - *  1. 'struct page_info' contains a 'struct list_head list'.
    90.8 + *  1. 'struct page_info' contains a 'struct page_list_entry list'.
    90.9   *  2. Provide a PFN_ORDER() macro for accessing the order of a free page.
   90.10   */
   90.11 -#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
   90.12 +#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
   90.13 +
   90.14 +/*
   90.15 + * This definition is solely for the use in struct page_info (and
   90.16 + * struct page_list_head), intended to allow easy adjustment once x86-64
   90.17 + * wants to support more than 16TB.
   90.18 + * 'unsigned long' should be used for MFNs everywhere else.
   90.19 + */
   90.20 +#define __mfn_t unsigned int
   90.21 +#define PRpgmfn "08x"
   90.22 +
   90.23 +#undef page_list_entry
   90.24 +struct page_list_entry
   90.25 +{
   90.26 +    __mfn_t next, prev;
   90.27 +};
   90.28  
   90.29  struct page_info
   90.30  {
   90.31 -    /* Each frame can be threaded onto a doubly-linked list. */
   90.32 -    struct list_head list;
   90.33 +    union {
   90.34 +        /* Each frame can be threaded onto a doubly-linked list.
   90.35 +         *
   90.36 +         * For unused shadow pages, a list of pages of this order; for
   90.37 +         * pinnable shadows, if pinned, a list of other pinned shadows
   90.38 +         * (see sh_type_is_pinnable() below for the definition of
   90.39 +         * "pinnable" shadow types).
   90.40 +         */
   90.41 +        struct page_list_entry list;
   90.42 +        /* For non-pinnable shadows, a higher entry that points at us. */
   90.43 +        paddr_t up;
   90.44 +    };
   90.45  
   90.46      /* Reference count and various PGC_xxx flags and fields. */
   90.47      unsigned long count_info;
   90.48 @@ -30,23 +55,48 @@ struct page_info
   90.49  
   90.50          /* Page is in use: ((count_info & PGC_count_mask) != 0). */
   90.51          struct {
   90.52 -            /* Owner of this page (NULL if page is anonymous). */
   90.53 -            u32 _domain; /* pickled format */
   90.54              /* Type reference count and various PGT_xxx flags and fields. */
   90.55              unsigned long type_info;
   90.56          } inuse;
   90.57  
   90.58 +        /* Page is in use as a shadow: count_info == 0. */
   90.59 +        struct {
   90.60 +            unsigned long type:5;   /* What kind of shadow is this? */
   90.61 +            unsigned long pinned:1; /* Is the shadow pinned? */
   90.62 +            unsigned long count:26; /* Reference count */
   90.63 +        } sh;
   90.64 +
   90.65          /* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
   90.66          struct {
   90.67 -            /* Order-size of the free chunk this page is the head of. */
   90.68 -            u32 order;
   90.69 -            /* Mask of possibly-tainted TLBs. */
   90.70 -            cpumask_t cpumask;
   90.71 +            /* Do TLBs need flushing for safety before next page use? */
   90.72 +            bool_t need_tlbflush;
   90.73          } free;
   90.74  
   90.75      } u;
   90.76  
   90.77      union {
   90.78 +
   90.79 +        /* Page is in use, but not as a shadow. */
   90.80 +        struct {
   90.81 +            /* Owner of this page (NULL if page is anonymous). */
   90.82 +            u32 _domain; /* pickled format */
   90.83 +        } inuse;
   90.84 +
   90.85 +        /* Page is in use as a shadow. */
   90.86 +        struct {
   90.87 +            /* GMFN of guest page we're a shadow of. */
   90.88 +            __mfn_t back;
   90.89 +        } sh;
   90.90 +
   90.91 +        /* Page is on a free list (including shadow code free lists). */
   90.92 +        struct {
   90.93 +            /* Order-size of the free chunk this page is the head of. */
   90.94 +            unsigned int order;
   90.95 +        } free;
   90.96 +
   90.97 +    } v;
   90.98 +
   90.99 +    union {
  90.100          /*
  90.101           * Timestamp from 'TLB clock', used to avoid extra safety flushes.
  90.102           * Only valid for: a) free pages, and b) pages with zero type count
  90.103 @@ -95,9 +145,14 @@ struct page_info
  90.104           * tracked for TLB-flush avoidance when a guest runs in shadow mode.
  90.105           */
  90.106          u32 shadow_flags;
  90.107 +
  90.108 +        /* When in use as a shadow, next shadow in this hash chain. */
  90.109 +        __mfn_t next_shadow;
  90.110      };
  90.111  };
  90.112  
  90.113 +#undef __mfn_t
  90.114 +
  90.115  #define PG_shift(idx)   (BITS_PER_LONG - (idx))
  90.116  #define PG_mask(x, idx) (x ## UL << PG_shift(idx))
  90.117  
  90.118 @@ -155,7 +210,8 @@ struct page_info
  90.119  })
  90.120  #else
  90.121  #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
  90.122 -#define is_xen_heap_mfn(mfn) is_xen_heap_page(&frame_table[mfn])
  90.123 +#define is_xen_heap_mfn(mfn) \
  90.124 +    (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
  90.125  #endif
  90.126  
  90.127  #if defined(__i386__)
  90.128 @@ -174,10 +230,10 @@ struct page_info
  90.129  #define SHADOW_OOS_FIXUPS 2
  90.130  
  90.131  #define page_get_owner(_p)                                              \
  90.132 -    ((struct domain *)((_p)->u.inuse._domain ?                          \
  90.133 -                       mfn_to_virt((_p)->u.inuse._domain) : NULL))
  90.134 +    ((struct domain *)((_p)->v.inuse._domain ?                          \
  90.135 +                       mfn_to_virt((_p)->v.inuse._domain) : NULL))
  90.136  #define page_set_owner(_p,_d)                                           \
  90.137 -    ((_p)->u.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
  90.138 +    ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
  90.139  
  90.140  #define maddr_get_owner(ma)   (page_get_owner(maddr_to_page((ma))))
  90.141  #define vaddr_get_owner(va)   (page_get_owner(virt_to_page((va))))
    91.1 --- a/xen/include/asm-x86/p2m.h	Fri Feb 13 10:56:01 2009 +0900
    91.2 +++ b/xen/include/asm-x86/p2m.h	Fri Feb 13 11:22:28 2009 +0900
    91.3 @@ -110,7 +110,7 @@ struct p2m_domain {
    91.4      const char        *locker_function; /* Func that took it */
    91.5  
    91.6      /* Pages used to construct the p2m */
    91.7 -    struct list_head   pages;
    91.8 +    struct page_list_head pages;
    91.9  
   91.10      /* Functions to call to get or free pages for the p2m */
   91.11      struct page_info * (*alloc_page  )(struct domain *d);
   91.12 @@ -148,7 +148,7 @@ struct p2m_domain {
   91.13       *   protect moving stuff from the PoD cache to the domain page list.
   91.14       */
   91.15      struct {
   91.16 -        struct list_head super,        /* List of superpages                */
   91.17 +        struct page_list_head super,   /* List of superpages                */
   91.18                           single;       /* Non-super lists                   */
   91.19          int              count,        /* # of pages in cache lists         */
   91.20                           entry_count;  /* # of pages in p2m marked pod      */
    92.1 --- a/xen/include/asm-x86/page.h	Fri Feb 13 10:56:01 2009 +0900
    92.2 +++ b/xen/include/asm-x86/page.h	Fri Feb 13 11:22:28 2009 +0900
    92.3 @@ -220,31 +220,47 @@ void copy_page_sse2(void *, const void *
    92.4                               copy_page_sse2(_t, _f) :                   \
    92.5                               (void)memcpy(_t, _f, PAGE_SIZE))
    92.6  
    92.7 -#define mfn_valid(mfn)      ((mfn) < max_page)
    92.8 +#define __mfn_valid(mfn)    ((mfn) < max_page)
    92.9  
   92.10  /* Convert between Xen-heap virtual addresses and machine addresses. */
   92.11  #define __pa(x)             (virt_to_maddr(x))
   92.12  #define __va(x)             (maddr_to_virt(x))
   92.13  
   92.14  /* Convert between Xen-heap virtual addresses and machine frame numbers. */
   92.15 -#define virt_to_mfn(va)     (virt_to_maddr(va) >> PAGE_SHIFT)
   92.16 -#define mfn_to_virt(mfn)    (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
   92.17 +#define __virt_to_mfn(va)   (virt_to_maddr(va) >> PAGE_SHIFT)
   92.18 +#define __mfn_to_virt(mfn)  (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
   92.19  
   92.20  /* Convert between machine frame numbers and page-info structures. */
   92.21 -#define mfn_to_page(mfn)    (frame_table + (mfn))
   92.22 -#define page_to_mfn(pg)     ((unsigned long)((pg) - frame_table))
   92.23 +#define __mfn_to_page(mfn)  (frame_table + (mfn))
   92.24 +#define __page_to_mfn(pg)   ((unsigned long)((pg) - frame_table))
   92.25  
   92.26  /* Convert between machine addresses and page-info structures. */
   92.27 -#define maddr_to_page(ma)   (frame_table + ((ma) >> PAGE_SHIFT))
   92.28 -#define page_to_maddr(pg)   ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
   92.29 +#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
   92.30 +#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
   92.31  
   92.32  /* Convert between Xen-heap virtual addresses and page-info structures. */
   92.33 -#define virt_to_page(va)    (frame_table + (__pa(va) >> PAGE_SHIFT))
   92.34 -#define page_to_virt(pg)    (maddr_to_virt(page_to_maddr(pg)))
   92.35 +#define __virt_to_page(va)  (frame_table + (__pa(va) >> PAGE_SHIFT))
   92.36 +#define __page_to_virt(pg)  (maddr_to_virt(page_to_maddr(pg)))
   92.37  
   92.38  /* Convert between frame number and address formats.  */
   92.39 -#define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
   92.40 -#define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
   92.41 +#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
   92.42 +#define __paddr_to_pfn(pa)  ((unsigned long)((pa) >> PAGE_SHIFT))
   92.43 +
   92.44 +/*
   92.45 + * We define non-underscored wrappers for above conversion functions. These are
   92.46 + * overridden in various source files while underscored versions remain intact.
   92.47 + */
   92.48 +#define mfn_valid(mfn)      __mfn_valid(mfn)
   92.49 +#define virt_to_mfn(va)     __virt_to_mfn(va)
   92.50 +#define mfn_to_virt(mfn)    __mfn_to_virt(mfn)
   92.51 +#define mfn_to_page(mfn)    __mfn_to_page(mfn)
   92.52 +#define page_to_mfn(pg)     __page_to_mfn(pg)
   92.53 +#define maddr_to_page(ma)   __maddr_to_page(ma)
   92.54 +#define page_to_maddr(pg)   __page_to_maddr(pg)
   92.55 +#define virt_to_page(va)    __virt_to_page(va)
   92.56 +#define page_to_virt(pg)    __page_to_virt(pg)
   92.57 +#define pfn_to_paddr(pfn)   __pfn_to_paddr(pfn)
   92.58 +#define paddr_to_pfn(pa)    __paddr_to_pfn(pa)
   92.59  
   92.60  #endif /* !defined(__ASSEMBLY__) */
   92.61  
    93.1 --- a/xen/include/asm-x86/perfc.h	Fri Feb 13 10:56:01 2009 +0900
    93.2 +++ b/xen/include/asm-x86/perfc.h	Fri Feb 13 11:22:28 2009 +0900
    93.3 @@ -1,6 +1,5 @@
    93.4  #ifndef __ASM_PERFC_H__
    93.5  #define __ASM_PERFC_H__
    93.6 -#include <asm/mm.h>
    93.7  
    93.8  static inline void arch_perfc_printall(void)
    93.9  {
    94.1 --- a/xen/include/asm-x86/processor.h	Fri Feb 13 10:56:01 2009 +0900
    94.2 +++ b/xen/include/asm-x86/processor.h	Fri Feb 13 11:22:28 2009 +0900
    94.3 @@ -188,6 +188,7 @@ extern struct cpuinfo_x86 cpu_data[];
    94.4  #define current_cpu_data boot_cpu_data
    94.5  #endif
    94.6  
    94.7 +extern u64 host_pat;
    94.8  extern int phys_proc_id[NR_CPUS];
    94.9  extern int cpu_core_id[NR_CPUS];
   94.10  
    95.1 --- a/xen/include/public/arch-ia64/hvm/save.h	Fri Feb 13 10:56:01 2009 +0900
    95.2 +++ b/xen/include/public/arch-ia64/hvm/save.h	Fri Feb 13 11:22:28 2009 +0900
    95.3 @@ -23,8 +23,8 @@
    95.4  #ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__
    95.5  #define __XEN_PUBLIC_HVM_SAVE_IA64_H__
    95.6  
    95.7 -#include <public/hvm/save.h>
    95.8 -#include <public/arch-ia64.h>
    95.9 +#include "../../hvm/save.h"
   95.10 +#include "../../arch-ia64.h"
   95.11  
   95.12  /* 
   95.13   * Save/restore header: general info about the save file. 
    96.1 --- a/xen/include/public/arch-x86/hvm/save.h	Fri Feb 13 10:56:01 2009 +0900
    96.2 +++ b/xen/include/public/arch-x86/hvm/save.h	Fri Feb 13 11:22:28 2009 +0900
    96.3 @@ -287,7 +287,7 @@ struct hvm_hw_pci_irqs {
    96.4       * Indexed by: device*4 + INTx#.
    96.5       */
    96.6      union {
    96.7 -        DECLARE_BITMAP(i, 32*4);
    96.8 +        unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */
    96.9          uint64_t pad[2];
   96.10      };
   96.11  };
   96.12 @@ -300,7 +300,7 @@ struct hvm_hw_isa_irqs {
   96.13       * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
   96.14       */
   96.15      union {
   96.16 -        DECLARE_BITMAP(i, 16);
   96.17 +        unsigned long i[1];  /* DECLARE_BITMAP(i, 16); */
   96.18          uint64_t pad[1];
   96.19      };
   96.20  };
    97.1 --- a/xen/include/public/arch-x86/xen-mca.h	Fri Feb 13 10:56:01 2009 +0900
    97.2 +++ b/xen/include/public/arch-x86/xen-mca.h	Fri Feb 13 11:22:28 2009 +0900
    97.3 @@ -56,7 +56,7 @@
    97.4  /* Hypercall */
    97.5  #define __HYPERVISOR_mca __HYPERVISOR_arch_0
    97.6  
    97.7 -#define XEN_MCA_INTERFACE_VERSION 0x03000001
    97.8 +#define XEN_MCA_INTERFACE_VERSION 0x03000002
    97.9  
   97.10  /* IN: Dom0 calls hypercall from MC event handler. */
   97.11  #define XEN_MC_CORRECTABLE  0x0
   97.12 @@ -118,7 +118,7 @@ struct mcinfo_global {
   97.13      uint16_t mc_domid;
   97.14      uint32_t mc_socketid; /* physical socket of the physical core */
   97.15      uint16_t mc_coreid; /* physical impacted core */
   97.16 -    uint8_t  mc_apicid;
   97.17 +    uint32_t mc_apicid;
   97.18      uint16_t mc_core_threadid; /* core thread of physical core */
   97.19      uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
   97.20      uint64_t mc_gstatus; /* global status */
   97.21 @@ -175,6 +175,41 @@ struct mc_info {
   97.22  };
   97.23  typedef struct mc_info mc_info_t;
   97.24  
   97.25 +#define __MC_MSR_ARRAYSIZE 8
   97.26 +#define __MC_NMSRS 1
   97.27 +#define MC_NCAPS	7	/* 7 CPU feature flag words */
   97.28 +#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
   97.29 +#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
   97.30 +#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
   97.31 +#define MC_CAPS_LINUX	3	/* Linux-defined */
   97.32 +#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
   97.33 +#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
   97.34 +#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
   97.35 +
   97.36 +typedef struct mcinfo_logical_cpu {
   97.37 +    uint32_t mc_cpunr;          
   97.38 +    uint32_t mc_chipid; 
   97.39 +    uint16_t mc_coreid;
   97.40 +    uint16_t mc_threadid;
   97.41 +    uint32_t mc_apicid;
   97.42 +    uint32_t mc_clusterid;
   97.43 +    uint32_t mc_ncores;
   97.44 +    uint32_t mc_ncores_active;
   97.45 +    uint32_t mc_nthreads;
   97.46 +    int32_t mc_cpuid_level;
   97.47 +    uint32_t mc_family;
   97.48 +    uint32_t mc_vendor;
   97.49 +    uint32_t mc_model;
   97.50 +    uint32_t mc_step;
   97.51 +    char mc_vendorid[16];
   97.52 +    char mc_brandid[64];
   97.53 +    uint32_t mc_cpu_caps[MC_NCAPS];
   97.54 +    uint32_t mc_cache_size;
   97.55 +    uint32_t mc_cache_alignment;
   97.56 +    int32_t mc_nmsrvals;
   97.57 +    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
   97.58 +} xen_mc_logical_cpu_t;
   97.59 +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
   97.60  
   97.61  
   97.62  /* 
   97.63 @@ -272,6 +307,14 @@ struct xen_mc_notifydomain {
   97.64  typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
   97.65  DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
   97.66  
   97.67 +#define XEN_MC_physcpuinfo 3
   97.68 +struct xen_mc_physcpuinfo {
   97.69 +	/* IN/OUT */
   97.70 +	uint32_t ncpus;
   97.71 +	uint32_t pad0;
   97.72 +	/* OUT */
   97.73 +	XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
   97.74 +};
   97.75  
   97.76  struct xen_mc {
   97.77      uint32_t cmd;
   97.78 @@ -279,6 +322,7 @@ struct xen_mc {
   97.79      union {
   97.80          struct xen_mc_fetch        mc_fetch;
   97.81          struct xen_mc_notifydomain mc_notifydomain;
   97.82 +        struct xen_mc_physcpuinfo  mc_physcpuinfo;
   97.83          uint8_t pad[MCINFO_HYPERCALLSIZE];
   97.84      } u;
   97.85  };
    98.1 --- a/xen/include/public/domctl.h	Fri Feb 13 10:56:01 2009 +0900
    98.2 +++ b/xen/include/public/domctl.h	Fri Feb 13 11:22:28 2009 +0900
    98.3 @@ -630,6 +630,17 @@ struct xen_domctl_debug_op {
    98.4  typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
    98.5  DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
    98.6  
    98.7 +/*
    98.8 + * Request a particular record from the HVM context
    98.9 + */
   98.10 +#define XEN_DOMCTL_gethvmcontext_partial   55
   98.11 +typedef struct xen_domctl_hvmcontext_partial {
   98.12 +    uint32_t type;                      /* IN: Type of record required */
   98.13 +    uint32_t instance;                  /* IN: Instance of that type */
   98.14 +    XEN_GUEST_HANDLE_64(uint8) buffer;  /* OUT: buffer to write record into */
   98.15 +} xen_domctl_hvmcontext_partial_t;
   98.16 +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
   98.17 +
   98.18  
   98.19  struct xen_domctl {
   98.20      uint32_t cmd;
   98.21 @@ -658,6 +669,7 @@ struct xen_domctl {
   98.22          struct xen_domctl_settimeoffset     settimeoffset;
   98.23          struct xen_domctl_real_mode_area    real_mode_area;
   98.24          struct xen_domctl_hvmcontext        hvmcontext;
   98.25 +        struct xen_domctl_hvmcontext_partial hvmcontext_partial;
   98.26          struct xen_domctl_address_size      address_size;
   98.27          struct xen_domctl_sendtrigger       sendtrigger;
   98.28          struct xen_domctl_get_device_group  get_device_group;
    99.1 --- a/xen/include/public/io/pciif.h	Fri Feb 13 10:56:01 2009 +0900
    99.2 +++ b/xen/include/public/io/pciif.h	Fri Feb 13 11:22:28 2009 +0900
    99.3 @@ -29,7 +29,7 @@
    99.4  
    99.5  /* xen_pci_sharedinfo flags */
    99.6  #define _XEN_PCIF_active     (0)
    99.7 -#define XEN_PCIF_active      (1<<_XEN_PCI_active)
    99.8 +#define XEN_PCIF_active      (1<<_XEN_PCIF_active)
    99.9  #define _XEN_PCIB_AERHANDLER (1)
   99.10  #define XEN_PCIB_AERHANDLER  (1<<_XEN_PCIB_AERHANDLER)
   99.11  #define _XEN_PCIB_active     (2)
   100.1 --- a/xen/include/xen/hvm/save.h	Fri Feb 13 10:56:01 2009 +0900
   100.2 +++ b/xen/include/xen/hvm/save.h	Fri Feb 13 11:22:28 2009 +0900
   100.3 @@ -152,6 +152,8 @@ static int __hvm_register_##_x##_save_an
   100.4  /* Entry points for saving and restoring HVM domain state */
   100.5  size_t hvm_save_size(struct domain *d);
   100.6  int hvm_save(struct domain *d, hvm_domain_context_t *h);
   100.7 +int hvm_save_one(struct domain *d,  uint16_t typecode, uint16_t instance, 
   100.8 +                 XEN_GUEST_HANDLE_64(uint8) handle);
   100.9  int hvm_load(struct domain *d, hvm_domain_context_t *h);
  100.10  
  100.11  /* Arch-specific definitions. */
   101.1 --- a/xen/include/xen/iocap.h	Fri Feb 13 10:56:01 2009 +0900
   101.2 +++ b/xen/include/xen/iocap.h	Fri Feb 13 11:22:28 2009 +0900
   101.3 @@ -29,6 +29,7 @@
   101.4      rangeset_contains_singleton((d)->irq_caps, i)
   101.5  
   101.6  #define multipage_allocation_permitted(d)               \
   101.7 -    (!rangeset_is_empty((d)->iomem_caps))
   101.8 +    (!rangeset_is_empty((d)->iomem_caps) ||             \
   101.9 +     !rangeset_is_empty((d)->arch.ioport_caps))
  101.10  
  101.11  #endif /* __XEN_IOCAP_H__ */
   102.1 --- a/xen/include/xen/irq.h	Fri Feb 13 10:56:01 2009 +0900
   102.2 +++ b/xen/include/xen/irq.h	Fri Feb 13 11:22:28 2009 +0900
   102.3 @@ -25,6 +25,11 @@ struct irqaction
   102.4  #define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
   102.5  #define IRQ_PER_CPU     256     /* IRQ is per CPU */
   102.6  
   102.7 +/* Special IRQ numbers. */
   102.8 +#define AUTO_ASSIGN_IRQ         (-1)
   102.9 +#define NEVER_ASSIGN_IRQ        (-2)
  102.10 +#define FREE_TO_ASSIGN_IRQ      (-3)
  102.11 +
  102.12  /*
  102.13   * Interrupt controller descriptor. This is all we need
  102.14   * to describe about the low-level hardware. 
  102.15 @@ -64,12 +69,21 @@ typedef struct {
  102.16  
  102.17  extern irq_desc_t irq_desc[NR_VECTORS];
  102.18  
  102.19 -extern int setup_irq(unsigned int, struct irqaction *);
  102.20 -extern void free_irq(unsigned int);
  102.21 -extern int request_irq(unsigned int irq,
  102.22 +extern int setup_irq_vector(unsigned int, struct irqaction *);
  102.23 +extern void release_irq_vector(unsigned int);
  102.24 +extern int request_irq_vector(unsigned int vector,
  102.25                 void (*handler)(int, void *, struct cpu_user_regs *),
  102.26                 unsigned long irqflags, const char * devname, void *dev_id);
  102.27  
  102.28 +#define setup_irq(irq, action) \
  102.29 +    setup_irq_vector(irq_to_vector(irq), action)
  102.30 +
  102.31 +#define release_irq(irq) \
  102.32 +    release_irq_vector(irq_to_vector(irq))
  102.33 +
  102.34 +#define request_irq(irq, handler, irqflags, devname, devid) \
  102.35 +    request_irq_vector(irq_to_vector(irq), handler, irqflags, defname, devid)
  102.36 +
  102.37  extern hw_irq_controller no_irq_type;
  102.38  extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
  102.39  
   103.1 --- a/xen/include/xen/mm.h	Fri Feb 13 10:56:01 2009 +0900
   103.2 +++ b/xen/include/xen/mm.h	Fri Feb 13 11:22:28 2009 +0900
   103.3 @@ -85,22 +85,192 @@ int assign_pages(
   103.4  #define MAX_ORDER 20 /* 2^20 contiguous pages */
   103.5  #endif
   103.6  
   103.7 +#define page_list_entry list_head
   103.8 +
   103.9 +#include <asm/mm.h>
  103.10 +
  103.11 +#ifndef page_list_entry
  103.12 +struct page_list_head
  103.13 +{
  103.14 +    struct page_info *next, *tail;
  103.15 +};
  103.16 +/* These must only have instances in struct page_info. */
  103.17 +# define page_list_entry
  103.18 +
  103.19 +# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
  103.20 +# define PAGE_LIST_HEAD(name) \
  103.21 +    struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
  103.22 +# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
  103.23 +# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
  103.24 +
  103.25 +static inline int
  103.26 +page_list_empty(const struct page_list_head *head)
  103.27 +{
  103.28 +    return !head->next;
  103.29 +}
  103.30 +static inline struct page_info *
  103.31 +page_list_first(const struct page_list_head *head)
  103.32 +{
  103.33 +    return head->next;
  103.34 +}
  103.35 +static inline struct page_info *
  103.36 +page_list_next(const struct page_info *page,
  103.37 +               const struct page_list_head *head)
  103.38 +{
  103.39 +    return page != head->tail ? mfn_to_page(page->list.next) : NULL;
  103.40 +}
  103.41 +static inline struct page_info *
  103.42 +page_list_prev(const struct page_info *page,
  103.43 +               const struct page_list_head *head)
  103.44 +{
  103.45 +    return page != head->next ? mfn_to_page(page->list.prev) : NULL;
  103.46 +}
  103.47 +static inline void
  103.48 +page_list_add(struct page_info *page, struct page_list_head *head)
  103.49 +{
  103.50 +    if ( head->next )
  103.51 +    {
  103.52 +        page->list.next = page_to_mfn(head->next);
  103.53 +        head->next->list.prev = page_to_mfn(page);
  103.54 +    }
  103.55 +    else
  103.56 +    {
  103.57 +        head->tail = page;
  103.58 +        page->list.next = ~0;
  103.59 +    }
  103.60 +    page->list.prev = ~0;
  103.61 +    head->next = page;
  103.62 +}
  103.63 +static inline void
  103.64 +page_list_add_tail(struct page_info *page, struct page_list_head *head)
  103.65 +{
  103.66 +    page->list.next = ~0;
  103.67 +    if ( head->next )
  103.68 +    {
  103.69 +        page->list.prev = page_to_mfn(head->tail);
  103.70 +        head->tail->list.next = page_to_mfn(page);
  103.71 +    }
  103.72 +    else
  103.73 +    {
  103.74 +        page->list.prev = ~0;
  103.75 +        head->next = page;
  103.76 +    }
  103.77 +    head->tail = page;
  103.78 +}
  103.79 +static inline bool_t
  103.80 +__page_list_del_head(struct page_info *page, struct page_list_head *head,
  103.81 +                     struct page_info *next, struct page_info *prev)
  103.82 +{
  103.83 +    if ( head->next == page )
  103.84 +    {
  103.85 +        if ( head->tail != page )
  103.86 +        {
  103.87 +            next->list.prev = ~0;
  103.88 +            head->next = next;
  103.89 +        }
  103.90 +        else
  103.91 +            head->tail = head->next = NULL;
  103.92 +        return 1;
  103.93 +    }
  103.94 +
  103.95 +    if ( head->tail == page )
  103.96 +    {
  103.97 +        prev->list.next = ~0;
  103.98 +        head->tail = prev;
  103.99 +        return 1;
 103.100 +    }
 103.101 +
 103.102 +    return 0;
 103.103 +}
 103.104 +static inline void
 103.105 +page_list_del(struct page_info *page, struct page_list_head *head)
 103.106 +{
 103.107 +    struct page_info *next = mfn_to_page(page->list.next);
 103.108 +    struct page_info *prev = mfn_to_page(page->list.prev);
 103.109 +
 103.110 +    if ( !__page_list_del_head(page, head, next, prev) )
 103.111 +    {
 103.112 +        next->list.prev = page->list.prev;
 103.113 +        prev->list.next = page->list.next;
 103.114 +    }
 103.115 +}
 103.116 +static inline void
 103.117 +page_list_del2(struct page_info *page, struct page_list_head *head1,
 103.118 +               struct page_list_head *head2)
 103.119 +{
 103.120 +    struct page_info *next = mfn_to_page(page->list.next);
 103.121 +    struct page_info *prev = mfn_to_page(page->list.prev);
 103.122 +
 103.123 +    if ( !__page_list_del_head(page, head1, next, prev) &&
 103.124 +         !__page_list_del_head(page, head2, next, prev) )
 103.125 +    {
 103.126 +        next->list.prev = page->list.prev;
 103.127 +        prev->list.next = page->list.next;
 103.128 +    }
 103.129 +}
 103.130 +static inline struct page_info *
 103.131 +page_list_remove_head(struct page_list_head *head)
 103.132 +{
 103.133 +    struct page_info *page = head->next;
 103.134 +
 103.135 +    if ( page )
 103.136 +        page_list_del(page, head);
 103.137 +
 103.138 +    return page;
 103.139 +}
 103.140 +
 103.141 +#define page_list_for_each(pos, head) \
 103.142 +    for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
 103.143 +#define page_list_for_each_safe(pos, tmp, head) \
 103.144 +    for ( pos = (head)->next; \
 103.145 +          pos ? (tmp = page_list_next(pos, head), 1) : 0; \
 103.146 +          pos = tmp )
 103.147 +#define page_list_for_each_safe_reverse(pos, tmp, head) \
 103.148 +    for ( pos = (head)->tail; \
 103.149 +          pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
 103.150 +          pos = tmp )
 103.151 +#else
 103.152 +# define page_list_head                  list_head
 103.153 +# define PAGE_LIST_HEAD_INIT             LIST_HEAD_INIT
 103.154 +# define PAGE_LIST_HEAD                  LIST_HEAD
 103.155 +# define INIT_PAGE_LIST_HEAD             INIT_LIST_HEAD
 103.156 +# define INIT_PAGE_LIST_ENTRY            INIT_LIST_HEAD
 103.157 +# define page_list_empty                 list_empty
 103.158 +# define page_list_first(hd)             list_entry((hd)->next, \
 103.159 +                                                    struct page_info, list)
 103.160 +# define page_list_next(pg, hd)          list_entry((pg)->list.next, \
 103.161 +                                                    struct page_info, list)
 103.162 +# define page_list_add(pg, hd)           list_add(&(pg)->list, hd)
 103.163 +# define page_list_add_tail(pg, hd)      list_add_tail(&(pg)->list, hd)
 103.164 +# define page_list_del(pg, hd)           list_del(&(pg)->list)
 103.165 +# define page_list_del2(pg, hd1, hd2)    list_del(&(pg)->list)
 103.166 +# define page_list_remove_head(hd)       (!page_list_empty(hd) ? \
 103.167 +    ({ \
 103.168 +        struct page_info *__pg = page_list_first(hd); \
 103.169 +        list_del(&__pg->list); \
 103.170 +        __pg; \
 103.171 +    }) : NULL)
 103.172 +# define page_list_for_each(pos, head)   list_for_each_entry(pos, head, list)
 103.173 +# define page_list_for_each_safe(pos, tmp, head) \
 103.174 +    list_for_each_entry_safe(pos, tmp, head, list)
 103.175 +# define page_list_for_each_safe_reverse(pos, tmp, head) \
 103.176 +    list_for_each_entry_safe_reverse(pos, tmp, head, list)
 103.177 +#endif
 103.178 +
 103.179  /* Automatic page scrubbing for dead domains. */
 103.180 -extern struct list_head page_scrub_list;
 103.181 -#define page_scrub_schedule_work()              \
 103.182 -    do {                                        \
 103.183 -        if ( !list_empty(&page_scrub_list) )    \
 103.184 -            raise_softirq(PAGE_SCRUB_SOFTIRQ);  \
 103.185 +extern struct page_list_head page_scrub_list;
 103.186 +#define page_scrub_schedule_work()                 \
 103.187 +    do {                                           \
 103.188 +        if ( !page_list_empty(&page_scrub_list) )  \
 103.189 +            raise_softirq(PAGE_SCRUB_SOFTIRQ);     \
 103.190      } while ( 0 )
 103.191  #define page_scrub_kick()                                               \
 103.192      do {                                                                \
 103.193 -        if ( !list_empty(&page_scrub_list) )                            \
 103.194 +        if ( !page_list_empty(&page_scrub_list) )                       \
 103.195              cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ);  \
 103.196      } while ( 0 )
 103.197  unsigned long avail_scrub_pages(void);
 103.198  
 103.199 -#include <asm/mm.h>
 103.200 -
 103.201  int guest_remove_page(struct domain *d, unsigned long gmfn);
 103.202  
 103.203  /* Returns TRUE if the whole page at @mfn is ordinary RAM. */
   104.1 --- a/xen/include/xen/sched.h	Fri Feb 13 10:56:01 2009 +0900
   104.2 +++ b/xen/include/xen/sched.h	Fri Feb 13 11:22:28 2009 +0900
   104.3 @@ -19,6 +19,7 @@
   104.4  #include <xen/xenoprof.h>
   104.5  #include <xen/rcupdate.h>
   104.6  #include <xen/irq.h>
   104.7 +#include <xen/mm.h>
   104.8  
   104.9  #ifdef CONFIG_COMPAT
  104.10  #include <compat/vcpu.h>
  104.11 @@ -171,8 +172,8 @@ struct domain
  104.12      spinlock_t       domain_lock;
  104.13  
  104.14      spinlock_t       page_alloc_lock; /* protects all the following fields  */
  104.15 -    struct list_head page_list;       /* linked list, of size tot_pages     */
  104.16 -    struct list_head xenpage_list;    /* linked list, of size xenheap_pages */
  104.17 +    struct page_list_head page_list;  /* linked list, of size tot_pages     */
  104.18 +    struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
  104.19      unsigned int     tot_pages;       /* number of pages currently possesed */
  104.20      unsigned int     max_pages;       /* maximum value for tot_pages        */
  104.21      unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
   105.1 --- a/xen/xsm/flask/hooks.c	Fri Feb 13 10:56:01 2009 +0900
   105.2 +++ b/xen/xsm/flask/hooks.c	Fri Feb 13 11:22:28 2009 +0900
   105.3 @@ -820,6 +820,7 @@ static int flask_hvmcontext(struct domai
   105.4              perm = HVM__SETHVMC;
   105.5          break;
   105.6          case XEN_DOMCTL_gethvmcontext:
   105.7 +        case XEN_DOMCTL_gethvmcontext_partial:
   105.8              perm = HVM__GETHVMC;
   105.9          break;
  105.10          default: