ia64/xen-unstable

changeset 17670:6d0cc186bf41

merge with xen-unstable.hg
author Isaku Yamahata <yamahata@valinux.co.jp>
date Fri May 16 21:59:38 2008 +0900 (2008-05-16)
parents c96507e0c83d d0817f08599a
children e78f5dbedbe0
files xen/arch/ia64/xen/mm.c
line diff
     1.1 --- a/tools/ioemu/block.c	Thu May 15 16:23:56 2008 +0900
     1.2 +++ b/tools/ioemu/block.c	Fri May 16 21:59:38 2008 +0900
     1.3 @@ -240,8 +240,28 @@ static int is_windows_drive(const char *
     1.4  }
     1.5  #endif
     1.6  
     1.7 +static int bdrv_invalid_protocol_open(BlockDriverState *bs,
     1.8 +				      const char *filename, int flags) {
     1.9 +    return -ENOENT;
    1.10 +}
    1.11 +
    1.12 +static BlockDriver bdrv_invalid_protocol = {
    1.13 +    "invalid_protocol",
    1.14 +    .bdrv_open = bdrv_invalid_protocol_open,
    1.15 +};
    1.16 +
    1.17  static BlockDriver *find_protocol(const char *filename)
    1.18  {
    1.19 +    /* Return values:
    1.20 +     *   &bdrv_xxx
    1.21 +     *      filename specifies protocol xxx
    1.22 +     *      caller should use that
    1.23 +     *   NULL                    filename does not specify any protocol
    1.24 +     *       caller may apply their own default
    1.25 +     *   &bdrv_invalid_protocol  filename speciies an unknown protocol
    1.26 +     *       caller should return -ENOENT; or may just try to open with
    1.27 +     *       that bdrv, which always fails that way.
    1.28 +     */
    1.29      BlockDriver *drv1;
    1.30      char protocol[128];
    1.31      int len;
    1.32 @@ -254,7 +274,7 @@ static BlockDriver *find_protocol(const 
    1.33  #endif
    1.34      p = strchr(filename, ':');
    1.35      if (!p)
    1.36 -        return NULL; /* do not ever guess raw, it is a security problem! */
    1.37 +        return NULL;
    1.38      len = p - filename;
    1.39      if (len > sizeof(protocol) - 1)
    1.40          len = sizeof(protocol) - 1;
    1.41 @@ -265,7 +285,7 @@ static BlockDriver *find_protocol(const 
    1.42              !strcmp(drv1->protocol_name, protocol))
    1.43              return drv1;
    1.44      }
    1.45 -    return NULL;
    1.46 +    return &bdrv_invalid_protocol;
    1.47  }
    1.48  
    1.49  /* XXX: force raw format if block or character device ? It would
    1.50 @@ -295,8 +315,8 @@ static BlockDriver *find_image_format(co
    1.51  #endif
    1.52      
    1.53      drv = find_protocol(filename);
    1.54 -    /* no need to test disk image formats for vvfat */
    1.55 -    if (drv == &bdrv_vvfat)
    1.56 +    /* no need to test disk image format if the filename told us */
    1.57 +    if (drv != NULL)
    1.58          return drv;
    1.59  
    1.60      ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
    1.61 @@ -390,7 +410,7 @@ int bdrv_open2(BlockDriverState *bs, con
    1.62      if (flags & BDRV_O_FILE) {
    1.63          drv = find_protocol(filename);
    1.64          if (!drv)
    1.65 -            return -ENOENT;
    1.66 +	    drv = &bdrv_raw;
    1.67      } else {
    1.68          if (!drv) {
    1.69              drv = find_image_format(filename);
    1.70 @@ -438,7 +458,7 @@ int bdrv_open2(BlockDriverState *bs, con
    1.71          }
    1.72          path_combine(backing_filename, sizeof(backing_filename),
    1.73                       filename, bs->backing_file);
    1.74 -        if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
    1.75 +        if (bdrv_open2(bs->backing_hd, backing_filename, 0, &bdrv_raw) < 0)
    1.76              goto fail;
    1.77      }
    1.78  
     2.1 --- a/tools/ioemu/hw/serial.c	Thu May 15 16:23:56 2008 +0900
     2.2 +++ b/tools/ioemu/hw/serial.c	Fri May 16 21:59:38 2008 +0900
     2.3 @@ -705,12 +705,13 @@ static void serial_save(QEMUFile *f, voi
     2.4      qemu_put_8s(f,&s->lsr);
     2.5      qemu_put_8s(f,&s->msr);
     2.6      qemu_put_8s(f,&s->scr);
     2.7 -    qemu_get_8s(f,&s->fcr);
     2.8 +    qemu_put_8s(f,&s->fcr);
     2.9  }
    2.10  
    2.11  static int serial_load(QEMUFile *f, void *opaque, int version_id)
    2.12  {
    2.13      SerialState *s = opaque;
    2.14 +    uint8_t fcr = 0;
    2.15  
    2.16      if(version_id > 2)
    2.17          return -EINVAL;
    2.18 @@ -729,6 +730,11 @@ static int serial_load(QEMUFile *f, void
    2.19      qemu_get_8s(f,&s->scr);
    2.20      qemu_get_8s(f,&s->fcr);
    2.21  
    2.22 +    if (version_id >= 2)
    2.23 +        qemu_get_8s(f,&fcr);
    2.24 +
    2.25 +    /* Initialize fcr via setter to perform essential side-effects */
    2.26 +    serial_ioport_write(s, 0x02, fcr);
    2.27      return 0;
    2.28  }
    2.29  
     3.1 --- a/tools/ioemu/hw/xenfb.c	Thu May 15 16:23:56 2008 +0900
     3.2 +++ b/tools/ioemu/hw/xenfb.c	Fri May 16 21:59:38 2008 +0900
     3.3 @@ -502,6 +502,7 @@ static int xenfb_configure_fb(struct xen
     3.4  		fprintf(stderr,
     3.5  			"FB: frontend fb size %zu limited to %zu\n",
     3.6  			fb_len, fb_len_lim);
     3.7 +		fb_len = fb_len_lim;
     3.8  	}
     3.9  	if (depth != 8 && depth != 16 && depth != 24 && depth != 32) {
    3.10  		fprintf(stderr,
     4.1 --- a/tools/ioemu/xenstore.c	Thu May 15 16:23:56 2008 +0900
     4.2 +++ b/tools/ioemu/xenstore.c	Fri May 16 21:59:38 2008 +0900
     4.3 @@ -260,6 +260,8 @@ void xenstore_parse_domain_config(int hv
     4.4  		    /* autoguess qcow vs qcow2 */
     4.5  		} else if (!strcmp(drv,"file") || !strcmp(drv,"phy")) {
     4.6  		    format = &bdrv_raw;
     4.7 +		} else if (!strcmp(drv,"phy")) {
     4.8 +		    format = &bdrv_raw;
     4.9  		} else {
    4.10  		    format = bdrv_find_format(drv);
    4.11  		    if (!format) {
    4.12 @@ -269,7 +271,7 @@ void xenstore_parse_domain_config(int hv
    4.13  		}
    4.14  	    }
    4.15              if (bdrv_open2(bs, params, 0 /* snapshot */, format) < 0)
    4.16 -                fprintf(stderr, "qemu: could not open vbd '%s' or hard disk image '%s' (drv '%s')\n", buf, params, drv ? drv : "?");
    4.17 +                fprintf(stderr, "qemu: could not open vbd '%s' or hard disk image '%s' (drv '%s' format '%s')\n", buf, params, drv ? drv : "?", format ? format->format_name : "0");
    4.18          }
    4.19      }
    4.20  
     5.1 --- a/tools/libxc/Makefile	Thu May 15 16:23:56 2008 +0900
     5.2 +++ b/tools/libxc/Makefile	Fri May 16 21:59:38 2008 +0900
     5.3 @@ -20,6 +20,7 @@ CTRL_SRCS-y       += xc_private.c
     5.4  CTRL_SRCS-y       += xc_sedf.c
     5.5  CTRL_SRCS-y       += xc_csched.c
     5.6  CTRL_SRCS-y       += xc_tbuf.c
     5.7 +CTRL_SRCS-y       += xc_pm.c
     5.8  ifneq ($(stubdom),y)
     5.9  CTRL_SRCS-y       += xc_resume.c
    5.10  endif
     6.1 --- a/tools/libxc/xc_hvm_build.c	Thu May 15 16:23:56 2008 +0900
     6.2 +++ b/tools/libxc/xc_hvm_build.c	Fri May 16 21:59:38 2008 +0900
     6.3 @@ -19,6 +19,9 @@
     6.4  
     6.5  #include <xen/libelf.h>
     6.6  
     6.7 +#define SUPERPAGE_PFN_SHIFT  9
     6.8 +#define SUPERPAGE_NR_PFNS    (1UL << SUPERPAGE_PFN_SHIFT)
     6.9 +
    6.10  #define SCRATCH_PFN 0xFFFFF
    6.11  
    6.12  #define SPECIALPAGE_GUARD    0
    6.13 @@ -211,7 +214,7 @@ static int setup_guest(int xc_handle,
    6.14  
    6.15      /*
    6.16       * Allocate memory for HVM guest, skipping VGA hole 0xA0000-0xC0000.
    6.17 -     * We allocate pages in batches of no more than 2048 to ensure that
    6.18 +     * We allocate pages in batches of no more than 8MB to ensure that
    6.19       * we can be preempted and hence dom0 remains responsive.
    6.20       */
    6.21      rc = xc_domain_memory_populate_physmap(
    6.22 @@ -219,13 +222,50 @@ static int setup_guest(int xc_handle,
    6.23      cur_pages = 0xc0;
    6.24      while ( (rc == 0) && (nr_pages > cur_pages) )
    6.25      {
    6.26 +        /* Clip count to maximum 8MB extent. */
    6.27          unsigned long count = nr_pages - cur_pages;
    6.28          if ( count > 2048 )
    6.29              count = 2048;
    6.30 -        rc = xc_domain_memory_populate_physmap(
    6.31 -            xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
    6.32 -        cur_pages += count;
    6.33 +
    6.34 +        /* Clip partial superpage extents to superpage boundaries. */
    6.35 +        if ( ((cur_pages & (SUPERPAGE_NR_PFNS-1)) != 0) &&
    6.36 +             (count > (-cur_pages & (SUPERPAGE_NR_PFNS-1))) )
    6.37 +            count = -cur_pages & (SUPERPAGE_NR_PFNS-1); /* clip s.p. tail */
    6.38 +        else if ( ((count & (SUPERPAGE_NR_PFNS-1)) != 0) &&
    6.39 +                  (count > SUPERPAGE_NR_PFNS) )
    6.40 +            count &= ~(SUPERPAGE_NR_PFNS - 1); /* clip non-s.p. tail */
    6.41 +
    6.42 +        /* Attempt to allocate superpage extents. */
    6.43 +        if ( ((count | cur_pages) & (SUPERPAGE_NR_PFNS - 1)) == 0 )
    6.44 +        {
    6.45 +            long done;
    6.46 +            xen_pfn_t sp_extents[2048 >> SUPERPAGE_PFN_SHIFT];
    6.47 +            struct xen_memory_reservation sp_req = {
    6.48 +                .nr_extents   = count >> SUPERPAGE_PFN_SHIFT,
    6.49 +                .extent_order = SUPERPAGE_PFN_SHIFT,
    6.50 +                .domid        = dom
    6.51 +            };
    6.52 +            set_xen_guest_handle(sp_req.extent_start, sp_extents);
    6.53 +            for ( i = 0; i < sp_req.nr_extents; i++ )
    6.54 +                sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_PFN_SHIFT)];
    6.55 +            done = xc_memory_op(xc_handle, XENMEM_populate_physmap, &sp_req);
    6.56 +            if ( done > 0 )
    6.57 +            {
    6.58 +                done <<= SUPERPAGE_PFN_SHIFT;
    6.59 +                cur_pages += done;
    6.60 +                count -= done;
    6.61 +            }
    6.62 +        }
    6.63 +
    6.64 +        /* Fall back to 4kB extents. */
    6.65 +        if ( count != 0 )
    6.66 +        {
    6.67 +            rc = xc_domain_memory_populate_physmap(
    6.68 +                xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
    6.69 +            cur_pages += count;
    6.70 +        }
    6.71      }
    6.72 +
    6.73      if ( rc != 0 )
    6.74      {
    6.75          PERROR("Could not allocate memory for HVM guest.\n");
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/libxc/xc_pm.c	Fri May 16 21:59:38 2008 +0900
     7.3 @@ -0,0 +1,101 @@
     7.4 +/******************************************************************************
     7.5 + * xc_pm.c - Libxc API for Xen Power Management (Px/Cx/Tx, etc.) statistic
     7.6 + *
     7.7 + * Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
     7.8 + *
     7.9 + * Permission is hereby granted, free of charge, to any person obtaining a copy
    7.10 + * of this software and associated documentation files (the "Software"), to
    7.11 + * deal in the Software without restriction, including without limitation the
    7.12 + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    7.13 + * sell copies of the Software, and to permit persons to whom the Software is
    7.14 + * furnished to do so, subject to the following conditions:
    7.15 + *
    7.16 + * The above copyright notice and this permission notice shall be included in
    7.17 + * all copies or substantial portions of the Software.
    7.18 + *
    7.19 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    7.20 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    7.21 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    7.22 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    7.23 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    7.24 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    7.25 + * DEALINGS IN THE SOFTWARE.
    7.26 + *
    7.27 + */
    7.28 +
    7.29 +#include "xc_private.h"
    7.30 +
    7.31 +int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px)
    7.32 +{
    7.33 +    DECLARE_SYSCTL;
    7.34 +    int ret;
    7.35 +
    7.36 +    sysctl.cmd = XEN_SYSCTL_get_pmstat;
    7.37 +    sysctl.u.get_pmstat.type = PMSTAT_get_max_px;
    7.38 +    sysctl.u.get_pmstat.cpuid = cpuid;
    7.39 +    ret = xc_sysctl(xc_handle, &sysctl);
    7.40 +    if ( ret )
    7.41 +        return ret;
    7.42 +
    7.43 +    *max_px = sysctl.u.get_pmstat.u.getpx.total;
    7.44 +    return ret;
    7.45 +}
    7.46 +
    7.47 +int xc_pm_get_pxstat(int xc_handle, int cpuid, struct xc_px_stat *pxpt)
    7.48 +{
    7.49 +    DECLARE_SYSCTL;
    7.50 +    int max_px, ret;
    7.51 +
    7.52 +    if ( !pxpt || !(pxpt->trans_pt) || !(pxpt->pt) )
    7.53 +        return -EINVAL;
    7.54 +
    7.55 +    if ( (ret = xc_pm_get_max_px(xc_handle, cpuid, &max_px)) != 0)
    7.56 +        return ret;
    7.57 +
    7.58 +    if ( (ret = lock_pages(pxpt->trans_pt, 
    7.59 +        max_px * max_px * sizeof(uint64_t))) != 0 )
    7.60 +        return ret;
    7.61 +
    7.62 +    if ( (ret = lock_pages(pxpt->pt, 
    7.63 +        max_px * sizeof(struct xc_px_val))) != 0 )
    7.64 +    {
    7.65 +        unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t));
    7.66 +        return ret;
    7.67 +    }
    7.68 +
    7.69 +    sysctl.cmd = XEN_SYSCTL_get_pmstat;
    7.70 +    sysctl.u.get_pmstat.type = PMSTAT_get_pxstat;
    7.71 +    sysctl.u.get_pmstat.cpuid = cpuid;
    7.72 +    set_xen_guest_handle(sysctl.u.get_pmstat.u.getpx.trans_pt, pxpt->trans_pt);
    7.73 +    set_xen_guest_handle(sysctl.u.get_pmstat.u.getpx.pt, 
    7.74 +                        (pm_px_val_t *)pxpt->pt);
    7.75 +
    7.76 +    ret = xc_sysctl(xc_handle, &sysctl);
    7.77 +    if ( ret )
    7.78 +    {
    7.79 +        unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t));
    7.80 +        unlock_pages(pxpt->pt, max_px * sizeof(struct xc_px_val));
    7.81 +        return ret;
    7.82 +    }
    7.83 +
    7.84 +    pxpt->total = sysctl.u.get_pmstat.u.getpx.total;
    7.85 +    pxpt->usable = sysctl.u.get_pmstat.u.getpx.usable;
    7.86 +    pxpt->last = sysctl.u.get_pmstat.u.getpx.last;
    7.87 +    pxpt->cur = sysctl.u.get_pmstat.u.getpx.cur;
    7.88 +
    7.89 +    unlock_pages(pxpt->trans_pt, max_px * max_px * sizeof(uint64_t));
    7.90 +    unlock_pages(pxpt->pt, max_px * sizeof(struct xc_px_val));
    7.91 +
    7.92 +    return ret;
    7.93 +}
    7.94 +
    7.95 +int xc_pm_reset_pxstat(int xc_handle, int cpuid)
    7.96 +{
    7.97 +    DECLARE_SYSCTL;
    7.98 +
    7.99 +    sysctl.cmd = XEN_SYSCTL_get_pmstat;
   7.100 +    sysctl.u.get_pmstat.type = PMSTAT_reset_pxstat;
   7.101 +    sysctl.u.get_pmstat.cpuid = cpuid;
   7.102 +
   7.103 +    return xc_sysctl(xc_handle, &sysctl);
   7.104 +}
     8.1 --- a/tools/libxc/xenctrl.h	Thu May 15 16:23:56 2008 +0900
     8.2 +++ b/tools/libxc/xenctrl.h	Fri May 16 21:59:38 2008 +0900
     8.3 @@ -1034,4 +1034,23 @@ void xc_cpuid_to_str(const unsigned int 
     8.4                       char **strs);
     8.5  #endif
     8.6  
     8.7 +struct xc_px_val {
     8.8 +    uint64_t freq;        /* Px core frequency */
     8.9 +    uint64_t residency;   /* Px residency time */
    8.10 +    uint64_t count;       /* Px transition count */
    8.11 +};
    8.12 +
    8.13 +struct xc_px_stat {
    8.14 +    uint8_t total;        /* total Px states */
    8.15 +    uint8_t usable;       /* usable Px states */
    8.16 +    uint8_t last;         /* last Px state */
    8.17 +    uint8_t cur;          /* current Px state */
    8.18 +    uint64_t *trans_pt;   /* Px transition table */
    8.19 +    struct xc_px_val *pt;
    8.20 +};
    8.21 +
    8.22 +int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px);
    8.23 +int xc_pm_get_pxstat(int xc_handle, int cpuid, struct xc_px_stat *pxpt);
    8.24 +int xc_pm_reset_pxstat(int xc_handle, int cpuid);
    8.25 +
    8.26  #endif /* XENCTRL_H */
     9.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Thu May 15 16:23:56 2008 +0900
     9.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri May 16 21:59:38 2008 +0900
     9.3 @@ -3013,7 +3013,8 @@ class XendDomainInfo:
     9.4          # shortcut if the domain isn't started because
     9.5          # the devcontrollers will have no better information
     9.6          # than XendConfig.
     9.7 -        if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,):
     9.8 +        if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED,
     9.9 +                                XEN_API_VM_POWER_STATE_SUSPENDED):
    9.10              if dev_config:
    9.11                  return copy.deepcopy(dev_config)
    9.12              return None
    10.1 --- a/xen/arch/ia64/xen/mm.c	Thu May 15 16:23:56 2008 +0900
    10.2 +++ b/xen/arch/ia64/xen/mm.c	Fri May 16 21:59:38 2008 +0900
    10.3 @@ -2424,16 +2424,20 @@ steal_page(struct domain *d, struct page
    10.4  
    10.5  int
    10.6  guest_physmap_add_page(struct domain *d, unsigned long gpfn,
    10.7 -                       unsigned long mfn)
    10.8 +                       unsigned long mfn, unsigned int page_order)
    10.9  {
   10.10 -    BUG_ON(!mfn_valid(mfn));
   10.11 -    BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
   10.12 -    set_gpfn_from_mfn(mfn, gpfn);
   10.13 -    smp_mb();
   10.14 -    assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
   10.15 -                               ASSIGN_writable | ASSIGN_pgc_allocated);
   10.16 -
   10.17 -    //BUG_ON(mfn != ((lookup_domain_mpa(d, gpfn << PAGE_SHIFT) & _PFN_MASK) >> PAGE_SHIFT));
   10.18 +    unsigned long i;
   10.19 +
   10.20 +    for (i = 0; i < (1UL << page_order); i++) {
   10.21 +        BUG_ON(!mfn_valid(mfn));
   10.22 +        BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
   10.23 +        set_gpfn_from_mfn(mfn, gpfn);
   10.24 +        smp_mb();
   10.25 +        assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
   10.26 +                                   ASSIGN_writable | ASSIGN_pgc_allocated);
   10.27 +        mfn++;
   10.28 +        gpfn++;
   10.29 +    }
   10.30  
   10.31      perfc_incr(guest_physmap_add_page);
   10.32      return 0;
   10.33 @@ -2441,10 +2445,15 @@ guest_physmap_add_page(struct domain *d,
   10.34  
   10.35  void
   10.36  guest_physmap_remove_page(struct domain *d, unsigned long gpfn,
   10.37 -                          unsigned long mfn)
   10.38 +                          unsigned long mfn, unsigned int page_order)
   10.39  {
   10.40 +    unsigned long i;
   10.41 +
   10.42      BUG_ON(mfn == 0);//XXX
   10.43 -    zap_domain_page_one(d, gpfn << PAGE_SHIFT, 0, mfn);
   10.44 +
   10.45 +    for (i = 0; i < (1UL << page_order); i++)
   10.46 +        zap_domain_page_one(d, (gpfn+i) << PAGE_SHIFT, 0, mfn+i);
   10.47 +
   10.48      perfc_incr(guest_physmap_remove_page);
   10.49  }
   10.50  
   10.51 @@ -2847,7 +2856,7 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
   10.52          if (prev_mfn && mfn_valid(prev_mfn)) {
   10.53              if (is_xen_heap_mfn(prev_mfn))
   10.54                  /* Xen heap frames are simply unhooked from this phys slot. */
   10.55 -                guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
   10.56 +                guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
   10.57              else
   10.58                  /* Normal domain memory is freed, to avoid leaking memory. */
   10.59                  guest_remove_page(d, xatp.gpfn);
   10.60 @@ -2856,10 +2865,10 @@ arch_memory_op(int op, XEN_GUEST_HANDLE(
   10.61          /* Unmap from old location, if any. */
   10.62          gpfn = get_gpfn_from_mfn(mfn);
   10.63          if (gpfn != INVALID_M2P_ENTRY)
   10.64 -            guest_physmap_remove_page(d, gpfn, mfn);
   10.65 +            guest_physmap_remove_page(d, gpfn, mfn, 0);
   10.66  
   10.67          /* Map at new location. */
   10.68 -        guest_physmap_add_page(d, xatp.gpfn, mfn);
   10.69 +        guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
   10.70  
   10.71      out:
   10.72          domain_unlock(d);
    11.1 --- a/xen/arch/x86/acpi/Makefile	Thu May 15 16:23:56 2008 +0900
    11.2 +++ b/xen/arch/x86/acpi/Makefile	Fri May 16 21:59:38 2008 +0900
    11.3 @@ -2,3 +2,4 @@ subdir-y += cpufreq
    11.4  
    11.5  obj-y += boot.o
    11.6  obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
    11.7 +obj-y += pmstat.o
    12.1 --- a/xen/arch/x86/acpi/boot.c	Thu May 15 16:23:56 2008 +0900
    12.2 +++ b/xen/arch/x86/acpi/boot.c	Fri May 16 21:59:38 2008 +0900
    12.3 @@ -441,17 +441,9 @@ acpi_fadt_parse_sleep_info(struct acpi_t
    12.4  			"FACS is shorter than ACPI spec allow: 0x%x",
    12.5  			facs->length);
    12.6  
    12.7 -	if ((rsdp->revision < 2) || (facs->length < 32)) {
    12.8 -		acpi_sinfo.wakeup_vector = facs_pa + 
    12.9 -			offsetof(struct acpi_table_facs,
   12.10 -				 firmware_waking_vector);
   12.11 -		acpi_sinfo.vector_width = 32;
   12.12 -	} else {
   12.13 -		acpi_sinfo.wakeup_vector = facs_pa +
   12.14 -			offsetof(struct acpi_table_facs,
   12.15 -				 xfirmware_waking_vector);
   12.16 -		acpi_sinfo.vector_width = 64;
   12.17 -	}
   12.18 +	acpi_sinfo.wakeup_vector = facs_pa + 
   12.19 +		offsetof(struct acpi_table_facs, firmware_waking_vector);
   12.20 +	acpi_sinfo.vector_width = 32;
   12.21  
   12.22  	printk(KERN_INFO PREFIX
   12.23  	       "                 wakeup_vec[%"PRIx64"], vec_size[%x]\n",
    13.1 --- a/xen/arch/x86/acpi/cpufreq/cpufreq.c	Thu May 15 16:23:56 2008 +0900
    13.2 +++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c	Fri May 16 21:59:38 2008 +0900
    13.3 @@ -369,6 +369,8 @@ static int acpi_cpufreq_target(struct cp
    13.4      if (!check_freqs(cmd.mask, freqs.new, data))
    13.5          return -EAGAIN;
    13.6  
    13.7 +    px_statistic_update(cmd.mask, perf->state, next_perf_state);
    13.8 +
    13.9      perf->state = next_perf_state;
   13.10      policy->cur = freqs.new;
   13.11  
   13.12 @@ -581,9 +583,13 @@ int acpi_cpufreq_init(void)
   13.13      for_each_online_cpu(i) {
   13.14          xen_px_policy[i].cpu = i;
   13.15  
   13.16 +        ret = px_statistic_init(i);
   13.17 +        if (ret)
   13.18 +            goto out;
   13.19 +
   13.20          ret = acpi_cpufreq_cpu_init(&xen_px_policy[i]);
   13.21          if (ret)
   13.22 -            goto cpufreq_init_out;
   13.23 +            goto out;
   13.24      }
   13.25  
   13.26      /* setup ondemand cpufreq */
   13.27 @@ -593,10 +599,10 @@ int acpi_cpufreq_init(void)
   13.28          i = first_cpu(pt[dom]);
   13.29          ret = cpufreq_governor_dbs(&xen_px_policy[i], CPUFREQ_GOV_START);
   13.30          if (ret)
   13.31 -            goto cpufreq_init_out;
   13.32 +            goto out;
   13.33      }
   13.34  
   13.35 -cpufreq_init_out:
   13.36 +out:
   13.37      xfree(pt);
   13.38     
   13.39      return ret;
    14.1 --- a/xen/arch/x86/acpi/cpufreq/utility.c	Thu May 15 16:23:56 2008 +0900
    14.2 +++ b/xen/arch/x86/acpi/cpufreq/utility.c	Fri May 16 21:59:38 2008 +0900
    14.3 @@ -34,6 +34,83 @@
    14.4  struct cpufreq_driver *cpufreq_driver;
    14.5  
    14.6  /*********************************************************************
    14.7 + *                    Px STATISTIC INFO                              *
    14.8 + *********************************************************************/
    14.9 +
   14.10 +void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
   14.11 +{
   14.12 +    uint32_t i;
   14.13 +    uint64_t now;
   14.14 +
   14.15 +    now = NOW();
   14.16 +
   14.17 +    for_each_cpu_mask(i, cpumask) {
   14.18 +        struct pm_px *pxpt = &px_statistic_data[i];
   14.19 +        uint32_t statnum = processor_pminfo[i].perf.state_count;
   14.20 +
   14.21 +        pxpt->u.last = from;
   14.22 +        pxpt->u.cur = to;
   14.23 +        pxpt->u.pt[to].count++;
   14.24 +        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
   14.25 +
   14.26 +        (*(pxpt->u.trans_pt + from*statnum + to))++;
   14.27 +
   14.28 +        pxpt->prev_state_wall = now;
   14.29 +    }
   14.30 +}
   14.31 +
   14.32 +int px_statistic_init(int cpuid)
   14.33 +{
   14.34 +    uint32_t i, count;
   14.35 +    struct pm_px *pxpt = &px_statistic_data[cpuid];
   14.36 +    struct processor_pminfo *pmpt = &processor_pminfo[cpuid];
   14.37 +
   14.38 +    count = pmpt->perf.state_count;
   14.39 +
   14.40 +    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
   14.41 +    if (!pxpt->u.trans_pt)
   14.42 +        return -ENOMEM;
   14.43 +
   14.44 +    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
   14.45 +    if (!pxpt->u.pt) {
   14.46 +        xfree(pxpt->u.trans_pt);
   14.47 +        return -ENOMEM;
   14.48 +    }
   14.49 +
   14.50 +    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
   14.51 +    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
   14.52 +
   14.53 +    pxpt->u.total = pmpt->perf.state_count;
   14.54 +    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
   14.55 +
   14.56 +    for (i=0; i < pmpt->perf.state_count; i++)
   14.57 +        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
   14.58 +
   14.59 +    pxpt->prev_state_wall = NOW();
   14.60 +
   14.61 +    return 0;
   14.62 +}
   14.63 +
   14.64 +void px_statistic_reset(int cpuid)
   14.65 +{
   14.66 +    uint32_t i, j, count;
   14.67 +    struct pm_px *pxpt = &px_statistic_data[cpuid];
   14.68 +
   14.69 +    count = processor_pminfo[cpuid].perf.state_count;
   14.70 +
   14.71 +    for (i=0; i < count; i++) {
   14.72 +        pxpt->u.pt[i].residency = 0;
   14.73 +        pxpt->u.pt[i].count = 0;
   14.74 +
   14.75 +        for (j=0; j < count; j++)
   14.76 +            *(pxpt->u.trans_pt + i*count + j) = 0;
   14.77 +    }
   14.78 +
   14.79 +    pxpt->prev_state_wall = NOW();
   14.80 +}
   14.81 +
   14.82 +
   14.83 +/*********************************************************************
   14.84   *                   FREQUENCY TABLE HELPERS                         *
   14.85   *********************************************************************/
   14.86  
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/xen/arch/x86/acpi/pmstat.c	Fri May 16 21:59:38 2008 +0900
    15.3 @@ -0,0 +1,110 @@
    15.4 +/*****************************************************************************
    15.5 +#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
    15.6 +#
    15.7 +#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@intel.com>
    15.8 +#
    15.9 +# This program is free software; you can redistribute it and/or modify it 
   15.10 +# under the terms of the GNU General Public License as published by the Free 
   15.11 +# Software Foundation; either version 2 of the License, or (at your option) 
   15.12 +# any later version.
   15.13 +#
   15.14 +# This program is distributed in the hope that it will be useful, but WITHOUT 
   15.15 +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
   15.16 +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
   15.17 +# more details.
   15.18 +#
   15.19 +# You should have received a copy of the GNU General Public License along with
   15.20 +# this program; if not, write to the Free Software Foundation, Inc., 59 
   15.21 +# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
   15.22 +#
   15.23 +# The full GNU General Public License is included in this distribution in the
   15.24 +# file called LICENSE.
   15.25 +#
   15.26 +*****************************************************************************/
   15.27 +
   15.28 +#include <xen/config.h>
   15.29 +#include <xen/lib.h>
   15.30 +#include <xen/errno.h>
   15.31 +#include <xen/sched.h>
   15.32 +#include <xen/event.h>
   15.33 +#include <xen/irq.h>
   15.34 +#include <xen/iocap.h>
   15.35 +#include <xen/compat.h>
   15.36 +#include <xen/guest_access.h>
   15.37 +#include <asm/current.h>
   15.38 +#include <public/xen.h>
   15.39 +#include <xen/cpumask.h>
   15.40 +#include <asm/processor.h>
   15.41 +#include <xen/percpu.h>
   15.42 +
   15.43 +#include <public/sysctl.h>
   15.44 +#include <acpi/cpufreq/cpufreq.h>
   15.45 +
   15.46 +struct pm_px px_statistic_data[NR_CPUS];
   15.47 +
   15.48 +int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
   15.49 +{
   15.50 +    int ret = 0;
   15.51 +    struct pm_px *pxpt = &px_statistic_data[op->cpuid];
   15.52 +    struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
   15.53 +
   15.54 +    /* to protect the case when Px was controlled by dom0-kernel */
   15.55 +    /* or when CPU_FREQ not set in which case ACPI Px objects not parsed */
   15.56 +    if ( !pmpt->perf.init )
   15.57 +        return -EINVAL;
   15.58 +
   15.59 +    if ( !cpu_online(op->cpuid) )
   15.60 +        return -EINVAL;
   15.61 +
   15.62 +    switch( op->type )
   15.63 +    {
   15.64 +    case PMSTAT_get_max_px:
   15.65 +    {
   15.66 +        op->u.getpx.total = pmpt->perf.state_count;
   15.67 +        break;
   15.68 +    }
   15.69 +
   15.70 +    case PMSTAT_get_pxstat:
   15.71 +    {
   15.72 +        uint64_t now, ct;
   15.73 +
   15.74 +        now = NOW();
   15.75 +        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.ppc;
   15.76 +        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
   15.77 +        pxpt->prev_state_wall = now;
   15.78 +
   15.79 +        ct = pmpt->perf.state_count;
   15.80 +        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
   15.81 +        {
   15.82 +            ret = -EFAULT;
   15.83 +            break;
   15.84 +        }
   15.85 +
   15.86 +        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
   15.87 +        {
   15.88 +            ret = -EFAULT;
   15.89 +            break;
   15.90 +        }
   15.91 +
   15.92 +        op->u.getpx.total = pxpt->u.total;
   15.93 +        op->u.getpx.usable = pxpt->u.usable;
   15.94 +        op->u.getpx.last = pxpt->u.last;
   15.95 +        op->u.getpx.cur = pxpt->u.cur;
   15.96 +
   15.97 +        break;
   15.98 +    }
   15.99 +
  15.100 +    case PMSTAT_reset_pxstat:
  15.101 +    {
  15.102 +        px_statistic_reset(op->cpuid);
  15.103 +        break;
  15.104 +    }
  15.105 +
  15.106 +    default:
  15.107 +        printk("not defined sub-hypercall @ do_get_pm_info\n");
  15.108 +        ret = -ENOSYS;
  15.109 +        break;
  15.110 +    }
  15.111 +
  15.112 +    return ret;
  15.113 +}
    16.1 --- a/xen/arch/x86/mm.c	Thu May 15 16:23:56 2008 +0900
    16.2 +++ b/xen/arch/x86/mm.c	Fri May 16 21:59:38 2008 +0900
    16.3 @@ -3297,7 +3297,7 @@ long arch_memory_op(int op, XEN_GUEST_HA
    16.4          {
    16.5              if ( is_xen_heap_mfn(prev_mfn) )
    16.6                  /* Xen heap frames are simply unhooked from this phys slot. */
    16.7 -                guest_physmap_remove_page(d, xatp.gpfn, prev_mfn);
    16.8 +                guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
    16.9              else
   16.10                  /* Normal domain memory is freed, to avoid leaking memory. */
   16.11                  guest_remove_page(d, xatp.gpfn);
   16.12 @@ -3306,10 +3306,10 @@ long arch_memory_op(int op, XEN_GUEST_HA
   16.13          /* Unmap from old location, if any. */
   16.14          gpfn = get_gpfn_from_mfn(mfn);
   16.15          if ( gpfn != INVALID_M2P_ENTRY )
   16.16 -            guest_physmap_remove_page(d, gpfn, mfn);
   16.17 +            guest_physmap_remove_page(d, gpfn, mfn, 0);
   16.18  
   16.19          /* Map at new location. */
   16.20 -        guest_physmap_add_page(d, xatp.gpfn, mfn);
   16.21 +        guest_physmap_add_page(d, xatp.gpfn, mfn, 0);
   16.22  
   16.23          domain_unlock(d);
   16.24  
    17.1 --- a/xen/arch/x86/mm/hap/p2m-ept.c	Thu May 15 16:23:56 2008 +0900
    17.2 +++ b/xen/arch/x86/mm/hap/p2m-ept.c	Fri May 16 21:59:38 2008 +0900
    17.3 @@ -20,6 +20,7 @@
    17.4  #include <xen/domain_page.h>
    17.5  #include <xen/sched.h>
    17.6  #include <asm/current.h>
    17.7 +#include <asm/paging.h>
    17.8  #include <asm/types.h>
    17.9  #include <asm/domain.h>
   17.10  #include <asm/p2m.h>
   17.11 @@ -46,6 +47,9 @@ static void ept_p2m_type_to_flags(ept_en
   17.12      }
   17.13  }
   17.14  
   17.15 +#define GUEST_TABLE_NORMAL_PAGE 1
   17.16 +#define GUEST_TABLE_SUPER_PAGE  2
   17.17 +
   17.18  static int ept_next_level(struct domain *d, bool_t read_only,
   17.19                            ept_entry_t **table, unsigned long *gfn_remainder,
   17.20                            u32 shift)
   17.21 @@ -54,7 +58,6 @@ static int ept_next_level(struct domain 
   17.22      u32 index;
   17.23  
   17.24      index = *gfn_remainder >> shift;
   17.25 -    *gfn_remainder &= (1UL << shift) - 1;
   17.26  
   17.27      ept_entry = (*table) + index;
   17.28  
   17.29 @@ -83,31 +86,53 @@ static int ept_next_level(struct domain 
   17.30          ept_entry->r = ept_entry->w = ept_entry->x = 1;
   17.31      }
   17.32  
   17.33 -    next = map_domain_page(ept_entry->mfn);
   17.34 -    unmap_domain_page(*table);
   17.35 -    *table = next;
   17.36 -
   17.37 -    return 1;
   17.38 +    if ( !ept_entry->sp_avail )
   17.39 +    {
   17.40 +        *gfn_remainder &= (1UL << shift) - 1;
   17.41 +        next = map_domain_page(ept_entry->mfn);
   17.42 +        unmap_domain_page(*table);
   17.43 +        *table = next;
   17.44 +        return GUEST_TABLE_NORMAL_PAGE;
   17.45 +    }
   17.46 +    else
   17.47 +        return GUEST_TABLE_SUPER_PAGE;
   17.48  }
   17.49  
   17.50  static int
   17.51 -ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
   17.52 +ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
   17.53 +              unsigned int order, p2m_type_t p2mt)
   17.54  {
   17.55 -    ept_entry_t *table =
   17.56 -        map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   17.57 -    unsigned long gfn_remainder = gfn;
   17.58 +    ept_entry_t *table = NULL;
   17.59 +    unsigned long gfn_remainder = gfn, offset = 0;
   17.60      ept_entry_t *ept_entry = NULL;
   17.61      u32 index;
   17.62 -    int i, rv = 0;
   17.63 +    int i, rv = 0, ret = 0;
   17.64 +    int walk_level = order / EPT_TABLE_ORDER;
   17.65  
   17.66      /* Should check if gfn obeys GAW here */
   17.67  
   17.68 -    for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
   17.69 -        if ( !ept_next_level(d, 0, &table, &gfn_remainder,
   17.70 -                             i * EPT_TABLE_ORDER) )
   17.71 +    if (  order != 0 )
   17.72 +        if ( (gfn & ((1UL << order) - 1)) )
   17.73 +            return 1;
   17.74 +
   17.75 +    table = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
   17.76 +
   17.77 +    ASSERT(table != NULL);
   17.78 +
   17.79 +    for ( i = EPT_DEFAULT_GAW; i > walk_level; i-- )
   17.80 +    {
   17.81 +        ret = ept_next_level(d, 0, &table, &gfn_remainder,
   17.82 +          i * EPT_TABLE_ORDER);
   17.83 +        if ( !ret )
   17.84              goto out;
   17.85 +        else if ( ret == GUEST_TABLE_SUPER_PAGE )
   17.86 +            break;
   17.87 +    }
   17.88  
   17.89 -    index = gfn_remainder;
   17.90 +    index = gfn_remainder >> ( i ?  (i * EPT_TABLE_ORDER): order);
   17.91 +    walk_level = ( i ? ( i * EPT_TABLE_ORDER) : order) / EPT_TABLE_ORDER;
   17.92 +    offset = (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1)));
   17.93 +
   17.94      ept_entry = table + index;
   17.95  
   17.96      if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
   17.97 @@ -117,9 +142,20 @@ ept_set_entry(struct domain *d, unsigned
   17.98              d->arch.p2m->max_mapped_pfn = gfn;
   17.99  
  17.100          ept_entry->emt = EPT_DEFAULT_MT;
  17.101 -        ept_entry->sp_avail = 0;
  17.102 +        ept_entry->sp_avail = walk_level ? 1 : 0;
  17.103 +
  17.104 +        if ( ret == GUEST_TABLE_SUPER_PAGE )
  17.105 +        {
  17.106 +            ept_entry->mfn = mfn_x(mfn) - offset;
  17.107 +            if ( ept_entry->avail1 == p2m_ram_logdirty &&
  17.108 +              p2mt == p2m_ram_rw )
  17.109 +                for ( i = 0; i < 512; i++ )
  17.110 +                    paging_mark_dirty(d, mfn_x(mfn)-offset+i);
  17.111 +        }
  17.112 +        else
  17.113 +            ept_entry->mfn = mfn_x(mfn);
  17.114 +
  17.115          ept_entry->avail1 = p2mt;
  17.116 -        ept_entry->mfn = mfn_x(mfn);
  17.117          ept_entry->rsvd = 0;
  17.118          ept_entry->avail2 = 0;
  17.119          /* last step */
  17.120 @@ -132,14 +168,42 @@ ept_set_entry(struct domain *d, unsigned
  17.121      /* Success */
  17.122      rv = 1;
  17.123  
  17.124 - out:
  17.125 +out:
  17.126      unmap_domain_page(table);
  17.127  
  17.128      ept_sync_domain(d);
  17.129  
  17.130 +    /* Now the p2m table is not shared with vt-d page table */
  17.131 +
  17.132 +    if ( iommu_enabled && is_hvm_domain(d) )
  17.133 +    {
  17.134 +        if ( p2mt == p2m_ram_rw )
  17.135 +        {
  17.136 +            if ( ret == GUEST_TABLE_SUPER_PAGE )
  17.137 +            {
  17.138 +                for ( i = 0; i < 512; i++ )
  17.139 +                    iommu_map_page(d, gfn-offset+i, mfn_x(mfn)-offset+i);
  17.140 +            }
  17.141 +            else if ( ret )
  17.142 +                iommu_map_page(d, gfn, mfn_x(mfn));
  17.143 +        }
  17.144 +        else
  17.145 +        {
  17.146 +            if ( ret == GUEST_TABLE_SUPER_PAGE )
  17.147 +            {
  17.148 +                for ( i = 0; i < 512; i++ )
  17.149 +                    iommu_unmap_page(d, gfn-offset+i);
  17.150 +            }
  17.151 +            else if ( ret )
  17.152 +                iommu_unmap_page(d, gfn);
  17.153 +        }
  17.154 +    }
  17.155 +
  17.156 +#ifdef P2M_SHARE_WITH_VTD_PAGE_TABLE
  17.157      /* If p2m table is shared with vtd page-table. */
  17.158      if ( iommu_enabled && is_hvm_domain(d) && (p2mt == p2m_mmio_direct) )
  17.159          iommu_flush(d, gfn, (u64*)ept_entry);
  17.160 +#endif
  17.161  
  17.162      return rv;
  17.163  }
  17.164 @@ -152,7 +216,7 @@ static mfn_t ept_get_entry(struct domain
  17.165      unsigned long gfn_remainder = gfn;
  17.166      ept_entry_t *ept_entry;
  17.167      u32 index;
  17.168 -    int i;
  17.169 +    int i, ret=0;
  17.170      mfn_t mfn = _mfn(INVALID_MFN);
  17.171  
  17.172      *t = p2m_mmio_dm;
  17.173 @@ -164,17 +228,31 @@ static mfn_t ept_get_entry(struct domain
  17.174      /* Should check if gfn obeys GAW here. */
  17.175  
  17.176      for ( i = EPT_DEFAULT_GAW; i > 0; i-- )
  17.177 -        if ( !ept_next_level(d, 1, &table, &gfn_remainder,
  17.178 -                             i * EPT_TABLE_ORDER) )
  17.179 +    {
  17.180 +        ret = ept_next_level(d, 1, &table, &gfn_remainder,
  17.181 +                             i * EPT_TABLE_ORDER);
  17.182 +        if ( !ret )
  17.183              goto out;
  17.184 +        else if ( ret == GUEST_TABLE_SUPER_PAGE )
  17.185 +            break;
  17.186 +    }
  17.187  
  17.188 -    index = gfn_remainder;
  17.189 +    index = gfn_remainder >> ( i * EPT_TABLE_ORDER);
  17.190      ept_entry = table + index;
  17.191  
  17.192      if ( ept_entry->avail1 != p2m_invalid )
  17.193      {
  17.194          *t = ept_entry->avail1;
  17.195          mfn = _mfn(ept_entry->mfn);
  17.196 +        if ( i )
  17.197 +        {
  17.198 +            /* we may meet super pages, and to split into 4k pages
  17.199 +             * to emulate p2m table
  17.200 +             */
  17.201 +            unsigned long split_mfn = 
  17.202 +              mfn_x(mfn) + (gfn_remainder & ( ((1 << (i*EPT_TABLE_ORDER)) - 1 )));
  17.203 +            mfn = _mfn(split_mfn);
  17.204 +        }
  17.205      }
  17.206  
  17.207   out:
  17.208 @@ -205,33 +283,63 @@ static void ept_change_entry_type_global
  17.209      l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
  17.210      for (i4 = 0; i4 < EPT_PAGETABLE_ENTRIES; i4++ )
  17.211      {
  17.212 -        if ( !l4e[i4].epte || l4e[i4].sp_avail )
  17.213 +        if ( !l4e[i4].epte )
  17.214              continue;
  17.215 -        l3e = map_domain_page(l4e[i4].mfn);
  17.216 -        for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
  17.217 +        if ( !l4e[i4].sp_avail )
  17.218          {
  17.219 -            if ( !l3e[i3].epte || l3e[i3].sp_avail )
  17.220 -                continue;
  17.221 -            l2e = map_domain_page(l3e[i3].mfn);
  17.222 -            for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
  17.223 +            l3e = map_domain_page(l4e[i4].mfn);
  17.224 +            for ( i3 = 0; i3 < EPT_PAGETABLE_ENTRIES; i3++ )
  17.225              {
  17.226 -                if ( !l2e[i2].epte || l2e[i2].sp_avail )
  17.227 +                if ( !l3e[i3].epte )
  17.228                      continue;
  17.229 -                l1e = map_domain_page(l2e[i2].mfn);
  17.230 -                for ( i1  = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
  17.231 +                if ( !l3e[i3].sp_avail )
  17.232                  {
  17.233 -                    if ( !l1e[i1].epte )
  17.234 -                        continue;
  17.235 -                    if ( l1e[i1].avail1 != ot )
  17.236 +                    l2e = map_domain_page(l3e[i3].mfn);
  17.237 +                    for ( i2 = 0; i2 < EPT_PAGETABLE_ENTRIES; i2++ )
  17.238 +                    {
  17.239 +                        if ( !l2e[i2].epte )
  17.240 +                            continue;
  17.241 +                        if ( !l2e[i2].sp_avail )
  17.242 +                        {
  17.243 +                            l1e = map_domain_page(l2e[i2].mfn);
  17.244 +                            for ( i1  = 0; i1 < EPT_PAGETABLE_ENTRIES; i1++ )
  17.245 +                            {
  17.246 +                                if ( !l1e[i1].epte )
  17.247 +                                    continue;
  17.248 +                                if ( l1e[i1].avail1 != ot )
  17.249 +                                    continue;
  17.250 +                                l1e[i1].avail1 = nt;
  17.251 +                                ept_p2m_type_to_flags(l1e+i1, nt);
  17.252 +                            }
  17.253 +                            unmap_domain_page(l1e);
  17.254 +                        }
  17.255 +                        else
  17.256 +                        {
  17.257 +                            if ( l2e[i2].avail1 != ot )
  17.258 +                                continue;
  17.259 +                            l2e[i2].avail1 = nt;
  17.260 +                            ept_p2m_type_to_flags(l2e+i2, nt);
  17.261 +                        }
  17.262 +                    }
  17.263 +                    unmap_domain_page(l2e);
  17.264 +                }
  17.265 +                else
  17.266 +                {
  17.267 +                    if ( l3e[i3].avail1 != ot )
  17.268                          continue;
  17.269 -                    l1e[i1].avail1 = nt;
  17.270 -                    ept_p2m_type_to_flags(l1e+i1, nt);
  17.271 +                    l3e[i3].avail1 = nt;
  17.272 +                    ept_p2m_type_to_flags(l3e+i3, nt);
  17.273                  }
  17.274 -                unmap_domain_page(l1e);
  17.275              }
  17.276 -            unmap_domain_page(l2e);
  17.277 +            unmap_domain_page(l3e);
  17.278          }
  17.279 -        unmap_domain_page(l3e);
  17.280 +        else
  17.281 +        {
  17.282 +            if ( l4e[i4].avail1 != ot )
  17.283 +                continue;
  17.284 +            l4e[i4].avail1 = nt;
  17.285 +            ept_p2m_type_to_flags(l4e+i4, nt);
  17.286 +        }
  17.287      }
  17.288      unmap_domain_page(l4e);
  17.289  
    18.1 --- a/xen/arch/x86/mm/p2m.c	Thu May 15 16:23:56 2008 +0900
    18.2 +++ b/xen/arch/x86/mm/p2m.c	Fri May 16 21:59:38 2008 +0900
    18.3 @@ -151,9 +151,11 @@ p2m_next_level(struct domain *d, mfn_t *
    18.4                 unsigned long *gfn_remainder, unsigned long gfn, u32 shift,
    18.5                 u32 max, unsigned long type)
    18.6  {
    18.7 +    l1_pgentry_t *l1_entry;
    18.8      l1_pgentry_t *p2m_entry;
    18.9      l1_pgentry_t new_entry;
   18.10      void *next;
   18.11 +    int i;
   18.12      ASSERT(d->arch.p2m->alloc_page);
   18.13  
   18.14      if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn,
   18.15 @@ -194,6 +196,44 @@ p2m_next_level(struct domain *d, mfn_t *
   18.16              break;
   18.17          }
   18.18      }
   18.19 +
   18.20 +    ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
   18.21 +
   18.22 +    /* split single large page into 4KB page in P2M table */
   18.23 +    if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
   18.24 +    {
   18.25 +        unsigned long flags, pfn;
   18.26 +        struct page_info *pg = d->arch.p2m->alloc_page(d);
   18.27 +        if ( pg == NULL )
   18.28 +            return 0;
   18.29 +        list_add_tail(&pg->list, &d->arch.p2m->pages);
   18.30 +        pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
   18.31 +        pg->count_info = 1;
   18.32 +        
   18.33 +        /* New splintered mappings inherit the flags of the old superpage, 
   18.34 +         * with a little reorganisation for the _PAGE_PSE_PAT bit. */
   18.35 +        flags = l1e_get_flags(*p2m_entry);
   18.36 +        pfn = l1e_get_pfn(*p2m_entry);
   18.37 +        if ( pfn & 1 )           /* ==> _PAGE_PSE_PAT was set */
   18.38 +            pfn -= 1;            /* Clear it; _PAGE_PSE becomes _PAGE_PAT */
   18.39 +        else
   18.40 +            flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */
   18.41 +        
   18.42 +        l1_entry = map_domain_page(mfn_x(page_to_mfn(pg)));
   18.43 +        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
   18.44 +        {
   18.45 +            new_entry = l1e_from_pfn(pfn + i, flags);
   18.46 +            paging_write_p2m_entry(d, gfn,
   18.47 +                                   l1_entry+i, *table_mfn, new_entry, 1);
   18.48 +        }
   18.49 +        unmap_domain_page(l1_entry);
   18.50 +        
   18.51 +        new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
   18.52 +                                 __PAGE_HYPERVISOR|_PAGE_USER);
   18.53 +        paging_write_p2m_entry(d, gfn,
   18.54 +                               p2m_entry, *table_mfn, new_entry, 2);
   18.55 +    }
   18.56 +
   18.57      *table_mfn = _mfn(l1e_get_pfn(*p2m_entry));
   18.58      next = map_domain_page(mfn_x(*table_mfn));
   18.59      unmap_domain_page(*table);
   18.60 @@ -204,7 +244,8 @@ p2m_next_level(struct domain *d, mfn_t *
   18.61  
   18.62  // Returns 0 on error (out of memory)
   18.63  static int
   18.64 -p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
   18.65 +p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
   18.66 +              unsigned int page_order, p2m_type_t p2mt)
   18.67  {
   18.68      // XXX -- this might be able to be faster iff current->domain == d
   18.69      mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
   18.70 @@ -212,6 +253,7 @@ p2m_set_entry(struct domain *d, unsigned
   18.71      unsigned long gfn_remainder = gfn;
   18.72      l1_pgentry_t *p2m_entry;
   18.73      l1_pgentry_t entry_content;
   18.74 +    l2_pgentry_t l2e_content;
   18.75      int rv=0;
   18.76  
   18.77  #if CONFIG_PAGING_LEVELS >= 4
   18.78 @@ -235,27 +277,54 @@ p2m_set_entry(struct domain *d, unsigned
   18.79                           PGT_l2_page_table) )
   18.80          goto out;
   18.81  
   18.82 -    if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
   18.83 -                         L2_PAGETABLE_SHIFT - PAGE_SHIFT,
   18.84 -                         L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
   18.85 -        goto out;
   18.86 +    if ( page_order == 0 )
   18.87 +    {
   18.88 +        if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn,
   18.89 +                             L2_PAGETABLE_SHIFT - PAGE_SHIFT,
   18.90 +                             L2_PAGETABLE_ENTRIES, PGT_l1_page_table) )
   18.91 +            goto out;
   18.92  
   18.93 -    p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
   18.94 -                               0, L1_PAGETABLE_ENTRIES);
   18.95 -    ASSERT(p2m_entry);
   18.96 +        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
   18.97 +                                   0, L1_PAGETABLE_ENTRIES);
   18.98 +        ASSERT(p2m_entry);
   18.99 +        
  18.100 +        if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
  18.101 +            entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
  18.102 +        else
  18.103 +            entry_content = l1e_empty();
  18.104 +        
  18.105 +        /* level 1 entry */
  18.106 +        paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
  18.107 +    }
  18.108 +    else 
  18.109 +    {
  18.110 +        p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn,
  18.111 +                                   L2_PAGETABLE_SHIFT - PAGE_SHIFT,
  18.112 +                                   L2_PAGETABLE_ENTRIES);
  18.113 +        ASSERT(p2m_entry);
  18.114 +        
  18.115 +        if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
  18.116 +             !(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
  18.117 +        {
  18.118 +            P2M_ERROR("configure P2M table 4KB L2 entry with large page\n");
  18.119 +            domain_crash(d);
  18.120 +            goto out;
  18.121 +        }
  18.122 +        
  18.123 +        if ( mfn_valid(mfn) )
  18.124 +            l2e_content = l2e_from_pfn(mfn_x(mfn),
  18.125 +                                       p2m_type_to_flags(p2mt) | _PAGE_PSE);
  18.126 +        else
  18.127 +            l2e_content = l2e_empty();
  18.128 +        
  18.129 +        entry_content.l1 = l2e_content.l2;
  18.130 +        paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 2);
  18.131 +    }
  18.132  
  18.133      /* Track the highest gfn for which we have ever had a valid mapping */
  18.134      if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
  18.135          d->arch.p2m->max_mapped_pfn = gfn;
  18.136  
  18.137 -    if ( mfn_valid(mfn) || (p2mt == p2m_mmio_direct) )
  18.138 -        entry_content = l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2mt));
  18.139 -    else
  18.140 -        entry_content = l1e_empty();
  18.141 -
  18.142 -    /* level 1 entry */
  18.143 -    paging_write_p2m_entry(d, gfn, p2m_entry, table_mfn, entry_content, 1);
  18.144 -
  18.145      if ( iommu_enabled && is_hvm_domain(d) )
  18.146      {
  18.147          if ( p2mt == p2m_ram_rw )
  18.148 @@ -335,6 +404,16 @@ p2m_gfn_to_mfn(struct domain *d, unsigne
  18.149          unmap_domain_page(l2e);
  18.150          return _mfn(INVALID_MFN);
  18.151      }
  18.152 +    else if ( (l2e_get_flags(*l2e) & _PAGE_PSE) )
  18.153 +    {
  18.154 +        mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr));
  18.155 +        *t = p2m_flags_to_type(l2e_get_flags(*l2e));
  18.156 +        unmap_domain_page(l2e);
  18.157 +        
  18.158 +        ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
  18.159 +        return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
  18.160 +    }
  18.161 +
  18.162      mfn = _mfn(l2e_get_pfn(*l2e));
  18.163      unmap_domain_page(l2e);
  18.164  
  18.165 @@ -358,6 +437,7 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
  18.166  {
  18.167      mfn_t mfn = _mfn(INVALID_MFN);
  18.168      p2m_type_t p2mt = p2m_mmio_dm;
  18.169 +    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
  18.170      /* XXX This is for compatibility with the old model, where anything not 
  18.171       * XXX marked as RAM was considered to be emulated MMIO space.
  18.172       * XXX Once we start explicitly registering MMIO regions in the p2m 
  18.173 @@ -366,26 +446,45 @@ static mfn_t p2m_gfn_to_mfn_current(unsi
  18.174      if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
  18.175      {
  18.176          l1_pgentry_t l1e = l1e_empty();
  18.177 +        l2_pgentry_t l2e = l2e_empty();
  18.178          int ret;
  18.179  
  18.180          ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
  18.181                 / sizeof(l1_pgentry_t));
  18.182  
  18.183 -        /* Need to __copy_from_user because the p2m is sparse and this
  18.184 -         * part might not exist */
  18.185 -        ret = __copy_from_user(&l1e,
  18.186 -                               &phys_to_machine_mapping[gfn],
  18.187 -                               sizeof(l1e));
  18.188 -
  18.189 -        if ( ret == 0 ) {
  18.190 -            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
  18.191 -            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
  18.192 +        ret = __copy_from_user(&l2e,
  18.193 +                               &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
  18.194 +                               sizeof(l2e));
  18.195 +        
  18.196 +        if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) && 
  18.197 +             (l2e_get_flags(l2e) & _PAGE_PSE) ) 
  18.198 +        {
  18.199 +            p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
  18.200 +            ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
  18.201              if ( p2m_is_valid(p2mt) )
  18.202 -                mfn = _mfn(l1e_get_pfn(l1e));
  18.203 -            else 
  18.204 -                /* XXX see above */
  18.205 +                mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
  18.206 +            else
  18.207                  p2mt = p2m_mmio_dm;
  18.208          }
  18.209 +        else
  18.210 +        {
  18.211 +        
  18.212 +            /* Need to __copy_from_user because the p2m is sparse and this
  18.213 +             * part might not exist */
  18.214 +            ret = __copy_from_user(&l1e,
  18.215 +                                   &phys_to_machine_mapping[gfn],
  18.216 +                                   sizeof(l1e));
  18.217 +            
  18.218 +            if ( ret == 0 ) {
  18.219 +                p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
  18.220 +                ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
  18.221 +                if ( p2m_is_valid(p2mt) )
  18.222 +                    mfn = _mfn(l1e_get_pfn(l1e));
  18.223 +                else 
  18.224 +                    /* XXX see above */
  18.225 +                    p2mt = p2m_mmio_dm;
  18.226 +            }
  18.227 +        }
  18.228      }
  18.229  
  18.230      *t = p2mt;
  18.231 @@ -430,9 +529,10 @@ void p2m_change_entry_type_global(struct
  18.232  }
  18.233  
  18.234  static inline
  18.235 -int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, p2m_type_t p2mt)
  18.236 +int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, 
  18.237 +                  unsigned int page_order, p2m_type_t p2mt)
  18.238  {
  18.239 -    return d->arch.p2m->set_entry(d, gfn, mfn, p2mt);
  18.240 +    return d->arch.p2m->set_entry(d, gfn, mfn, page_order, p2mt);
  18.241  }
  18.242  
  18.243  // Allocate a new p2m table for a domain.
  18.244 @@ -493,7 +593,8 @@ int p2m_alloc_table(struct domain *d,
  18.245      P2M_PRINTK("populating p2m table\n");
  18.246  
  18.247      /* Initialise physmap tables for slot zero. Other code assumes this. */
  18.248 -    if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), p2m_invalid) )
  18.249 +    if ( !set_p2m_entry(d, 0, _mfn(INVALID_MFN), 0,
  18.250 +                        p2m_invalid) )
  18.251          goto error;
  18.252  
  18.253      /* Copy all existing mappings from the page list and m2p */
  18.254 @@ -512,7 +613,7 @@ int p2m_alloc_table(struct domain *d,
  18.255              (gfn != 0x55555555L)
  18.256  #endif
  18.257               && gfn != INVALID_M2P_ENTRY
  18.258 -            && !set_p2m_entry(d, gfn, mfn, p2m_ram_rw) )
  18.259 +            && !set_p2m_entry(d, gfn, mfn, 0, p2m_ram_rw) )
  18.260              goto error;
  18.261      }
  18.262  
  18.263 @@ -688,6 +789,28 @@ static void audit_p2m(struct domain *d)
  18.264                          gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
  18.265                          continue;
  18.266                      }
  18.267 +                    
  18.268 +                    /* check for super page */
  18.269 +                    if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE )
  18.270 +                    {
  18.271 +                        mfn = l2e_get_pfn(l2e[i2]);
  18.272 +                        ASSERT(mfn_valid(_mfn(mfn)));
  18.273 +                        for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
  18.274 +                        {
  18.275 +                            m2pfn = get_gpfn_from_mfn(mfn+i1);
  18.276 +                            if ( m2pfn != (gfn + i) )
  18.277 +                            {
  18.278 +                                pmbad++;
  18.279 +                                P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
  18.280 +                                           " -> gfn %#lx\n", gfn+i, mfn+i,
  18.281 +                                           m2pfn);
  18.282 +                                BUG();
  18.283 +                            }
  18.284 +                        }
  18.285 +                        gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
  18.286 +                        continue;
  18.287 +                    }
  18.288 +
  18.289                      l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
  18.290  
  18.291                      for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
  18.292 @@ -737,32 +860,38 @@ static void audit_p2m(struct domain *d)
  18.293  
  18.294  
  18.295  static void
  18.296 -p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn)
  18.297 +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn,
  18.298 +                unsigned int page_order)
  18.299  {
  18.300 +    unsigned long i;
  18.301 +
  18.302      if ( !paging_mode_translate(d) )
  18.303          return;
  18.304 +
  18.305      P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
  18.306  
  18.307 -    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid);
  18.308 -    set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
  18.309 +    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
  18.310 +    for ( i = 0; i < (1UL << page_order); i++ )
  18.311 +        set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
  18.312  }
  18.313  
  18.314  void
  18.315  guest_physmap_remove_page(struct domain *d, unsigned long gfn,
  18.316 -                          unsigned long mfn)
  18.317 +                          unsigned long mfn, unsigned int page_order)
  18.318  {
  18.319      p2m_lock(d->arch.p2m);
  18.320      audit_p2m(d);
  18.321 -    p2m_remove_page(d, gfn, mfn);
  18.322 +    p2m_remove_page(d, gfn, mfn, page_order);
  18.323      audit_p2m(d);
  18.324      p2m_unlock(d->arch.p2m);
  18.325  }
  18.326  
  18.327  int
  18.328  guest_physmap_add_entry(struct domain *d, unsigned long gfn,
  18.329 -                        unsigned long mfn, p2m_type_t t)
  18.330 +                        unsigned long mfn, unsigned int page_order, 
  18.331 +                        p2m_type_t t)
  18.332  {
  18.333 -    unsigned long ogfn;
  18.334 +    unsigned long i, ogfn;
  18.335      p2m_type_t ot;
  18.336      mfn_t omfn;
  18.337      int rc = 0;
  18.338 @@ -795,7 +924,8 @@ guest_physmap_add_entry(struct domain *d
  18.339      if ( p2m_is_ram(ot) )
  18.340      {
  18.341          ASSERT(mfn_valid(omfn));
  18.342 -        set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
  18.343 +        for ( i = 0; i < (1UL << page_order); i++ )
  18.344 +            set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
  18.345      }
  18.346  
  18.347      ogfn = mfn_to_gfn(d, _mfn(mfn));
  18.348 @@ -818,21 +948,23 @@ guest_physmap_add_entry(struct domain *d
  18.349              P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
  18.350                        ogfn , mfn_x(omfn));
  18.351              if ( mfn_x(omfn) == mfn )
  18.352 -                p2m_remove_page(d, ogfn, mfn);
  18.353 +                p2m_remove_page(d, ogfn, mfn, page_order);
  18.354          }
  18.355      }
  18.356  
  18.357      if ( mfn_valid(_mfn(mfn)) ) 
  18.358      {
  18.359 -        if ( !set_p2m_entry(d, gfn, _mfn(mfn), t) )
  18.360 +        if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) )
  18.361              rc = -EINVAL;
  18.362 -        set_gpfn_from_mfn(mfn, gfn);
  18.363 +        for ( i = 0; i < (1UL << page_order); i++ )
  18.364 +            set_gpfn_from_mfn(mfn+i, gfn+i);
  18.365      }
  18.366      else
  18.367      {
  18.368          gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
  18.369                   gfn, mfn);
  18.370 -        if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), p2m_invalid) )
  18.371 +        if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, 
  18.372 +                            p2m_invalid) )
  18.373              rc = -EINVAL;
  18.374      }
  18.375  
  18.376 @@ -851,7 +983,7 @@ void p2m_change_type_global(struct domai
  18.377      l1_pgentry_t l1e_content;
  18.378      l1_pgentry_t *l1e;
  18.379      l2_pgentry_t *l2e;
  18.380 -    mfn_t l1mfn;
  18.381 +    mfn_t l1mfn, l2mfn;
  18.382      int i1, i2;
  18.383      l3_pgentry_t *l3e;
  18.384      int i3;
  18.385 @@ -891,6 +1023,7 @@ void p2m_change_type_global(struct domai
  18.386              {
  18.387                  continue;
  18.388              }
  18.389 +            l2mfn = _mfn(l3e_get_pfn(l3e[i3]));
  18.390              l2e = map_domain_page(l3e_get_pfn(l3e[i3]));
  18.391              for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
  18.392              {
  18.393 @@ -899,6 +1032,20 @@ void p2m_change_type_global(struct domai
  18.394                      continue;
  18.395                  }
  18.396  
  18.397 +                if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) )
  18.398 +                {
  18.399 +                    flags = l2e_get_flags(l2e[i2]);
  18.400 +                    if ( p2m_flags_to_type(flags) != ot )
  18.401 +                        continue;
  18.402 +                    mfn = l2e_get_pfn(l2e[i2]);
  18.403 +                    gfn = get_gpfn_from_mfn(mfn);
  18.404 +                    flags = p2m_flags_to_type(nt);
  18.405 +                    l1e_content = l1e_from_pfn(mfn, flags | _PAGE_PSE);
  18.406 +                    paging_write_p2m_entry(d, gfn, (l1_pgentry_t *)&l2e[i2],
  18.407 +                                           l2mfn, l1e_content, 2);
  18.408 +                    continue;
  18.409 +                }
  18.410 +
  18.411                  l1mfn = _mfn(l2e_get_pfn(l2e[i2]));
  18.412                  l1e = map_domain_page(mfn_x(l1mfn));
  18.413  
  18.414 @@ -944,7 +1091,7 @@ p2m_type_t p2m_change_type(struct domain
  18.415  
  18.416      mfn = gfn_to_mfn(d, gfn, &pt);
  18.417      if ( pt == ot )
  18.418 -        set_p2m_entry(d, gfn, mfn, nt);
  18.419 +        set_p2m_entry(d, gfn, mfn, 0, nt);
  18.420  
  18.421      p2m_unlock(d->arch.p2m);
  18.422  
  18.423 @@ -968,7 +1115,7 @@ set_mmio_p2m_entry(struct domain *d, uns
  18.424          set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
  18.425      }
  18.426  
  18.427 -    rc = set_p2m_entry(d, gfn, mfn, p2m_mmio_direct);
  18.428 +    rc = set_p2m_entry(d, gfn, mfn, 0, p2m_mmio_direct);
  18.429      if ( 0 == rc )
  18.430          gdprintk(XENLOG_ERR,
  18.431              "set_mmio_p2m_entry: set_p2m_entry failed! mfn=%08lx\n",
  18.432 @@ -992,7 +1139,7 @@ clear_mmio_p2m_entry(struct domain *d, u
  18.433              "clear_mmio_p2m_entry: gfn_to_mfn failed! gfn=%08lx\n", gfn);
  18.434          return 0;
  18.435      }
  18.436 -    rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
  18.437 +    rc = set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0, 0);
  18.438  
  18.439      return rc;
  18.440  }
    19.1 --- a/xen/arch/x86/platform_hypercall.c	Thu May 15 16:23:56 2008 +0900
    19.2 +++ b/xen/arch/x86/platform_hypercall.c	Fri May 16 21:59:38 2008 +0900
    19.3 @@ -403,7 +403,10 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
    19.4  
    19.5              if ( xenpxpt->flags == ( XEN_PX_PCT | XEN_PX_PSS | 
    19.6                  XEN_PX_PSD | XEN_PX_PPC ) )
    19.7 +            {
    19.8 +                pxpt->init =1;
    19.9                  cpu_count++;
   19.10 +            }
   19.11              if ( cpu_count == num_online_cpus() )
   19.12                  ret = acpi_cpufreq_init();
   19.13              break;
    20.1 --- a/xen/common/grant_table.c	Thu May 15 16:23:56 2008 +0900
    20.2 +++ b/xen/common/grant_table.c	Fri May 16 21:59:38 2008 +0900
    20.3 @@ -1159,7 +1159,7 @@ gnttab_transfer(
    20.4          spin_lock(&e->grant_table->lock);
    20.5  
    20.6          sha = &shared_entry(e->grant_table, gop.ref);
    20.7 -        guest_physmap_add_page(e, sha->frame, mfn);
    20.8 +        guest_physmap_add_page(e, sha->frame, mfn, 0);
    20.9          sha->frame = mfn;
   20.10          wmb();
   20.11          sha->flags |= GTF_transfer_completed;
    21.1 --- a/xen/common/memory.c	Thu May 15 16:23:56 2008 +0900
    21.2 +++ b/xen/common/memory.c	Fri May 16 21:59:38 2008 +0900
    21.3 @@ -127,9 +127,7 @@ static void populate_physmap(struct memo
    21.4  
    21.5          if ( unlikely(paging_mode_translate(d)) )
    21.6          {
    21.7 -            for ( j = 0; j < (1 << a->extent_order); j++ )
    21.8 -                if ( guest_physmap_add_page(d, gpfn + j, mfn + j) )
    21.9 -                    goto out;
   21.10 +            guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
   21.11          }
   21.12          else
   21.13          {
   21.14 @@ -172,7 +170,7 @@ int guest_remove_page(struct domain *d, 
   21.15      if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
   21.16          put_page(page);
   21.17  
   21.18 -    guest_physmap_remove_page(d, gmfn, mfn);
   21.19 +    guest_physmap_remove_page(d, gmfn, mfn, 0);
   21.20  
   21.21      put_page(page);
   21.22  
   21.23 @@ -419,7 +417,7 @@ static long memory_exchange(XEN_GUEST_HA
   21.24              if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
   21.25                  BUG();
   21.26              mfn = page_to_mfn(page);
   21.27 -            guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn);
   21.28 +            guest_physmap_remove_page(d, mfn_to_gmfn(d, mfn), mfn, 0);
   21.29              put_page(page);
   21.30          }
   21.31  
   21.32 @@ -440,9 +438,7 @@ static long memory_exchange(XEN_GUEST_HA
   21.33              mfn = page_to_mfn(page);
   21.34              if ( unlikely(paging_mode_translate(d)) )
   21.35              {
   21.36 -                /* Ignore failure here. There's nothing we can do. */
   21.37 -                for ( k = 0; k < (1UL << exch.out.extent_order); k++ )
   21.38 -                    (void)guest_physmap_add_page(d, gpfn + k, mfn + k);
   21.39 +                guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
   21.40              }
   21.41              else
   21.42              {
    22.1 --- a/xen/common/sysctl.c	Thu May 15 16:23:56 2008 +0900
    22.2 +++ b/xen/common/sysctl.c	Fri May 16 21:59:38 2008 +0900
    22.3 @@ -25,6 +25,8 @@
    22.4  #include <xen/nodemask.h>
    22.5  #include <xsm/xsm.h>
    22.6  
    22.7 +extern int do_get_pm_info(struct xen_sysctl_get_pmstat *op);
    22.8 +
    22.9  extern long arch_do_sysctl(
   22.10      struct xen_sysctl *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
   22.11  
   22.12 @@ -196,6 +198,20 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
   22.13      }
   22.14      break;
   22.15  
   22.16 +    case XEN_SYSCTL_get_pmstat:
   22.17 +    {
   22.18 +        ret = do_get_pm_info(&op->u.get_pmstat);
   22.19 +        if ( ret )
   22.20 +            break;
   22.21 +
   22.22 +        if ( copy_to_guest(u_sysctl, op, 1) )
   22.23 +        {
   22.24 +            ret = -EFAULT;
   22.25 +            break;
   22.26 +        }
   22.27 +    }
   22.28 +    break;
   22.29 +
   22.30      default:
   22.31          ret = arch_do_sysctl(op, u_sysctl);
   22.32          break;
    23.1 --- a/xen/include/acpi/cpufreq/processor_perf.h	Thu May 15 16:23:56 2008 +0900
    23.2 +++ b/xen/include/acpi/cpufreq/processor_perf.h	Fri May 16 21:59:38 2008 +0900
    23.3 @@ -2,9 +2,13 @@
    23.4  #define __XEN_PROCESSOR_PM_H__
    23.5  
    23.6  #include <public/platform.h>
    23.7 +#include <public/sysctl.h>
    23.8  
    23.9  int get_cpu_id(u8);
   23.10  int acpi_cpufreq_init(void);
   23.11 +void px_statistic_update(cpumask_t, uint8_t, uint8_t);
   23.12 +int  px_statistic_init(int);
   23.13 +void px_statistic_reset(int);
   23.14  
   23.15  struct processor_performance {
   23.16      uint32_t state;
   23.17 @@ -16,15 +20,32 @@ struct processor_performance {
   23.18      struct xen_psd_package domain_info;
   23.19      cpumask_t shared_cpu_map;
   23.20      uint32_t shared_type;
   23.21 +
   23.22 +    uint32_t init;
   23.23  };
   23.24  
   23.25  struct processor_pminfo {
   23.26      uint32_t acpi_id;
   23.27      uint32_t id;
   23.28 -    uint32_t flag;
   23.29      struct processor_performance    perf;
   23.30  };
   23.31  
   23.32  extern struct processor_pminfo processor_pminfo[NR_CPUS];
   23.33  
   23.34 +struct px_stat {
   23.35 +    uint8_t total;        /* total Px states */
   23.36 +    uint8_t usable;       /* usable Px states */
   23.37 +    uint8_t last;         /* last Px state */
   23.38 +    uint8_t cur;          /* current Px state */
   23.39 +    uint64_t *trans_pt;   /* Px transition table */
   23.40 +    pm_px_val_t *pt;
   23.41 +};
   23.42 +
   23.43 +struct pm_px {
   23.44 +    struct px_stat u;
   23.45 +    uint64_t prev_state_wall;
   23.46 +};
   23.47 +
   23.48 +extern struct pm_px px_statistic_data[NR_CPUS];
   23.49 +
   23.50  #endif /* __XEN_PROCESSOR_PM_H__ */
    24.1 --- a/xen/include/asm-ia64/grant_table.h	Thu May 15 16:23:56 2008 +0900
    24.2 +++ b/xen/include/asm-ia64/grant_table.h	Fri May 16 21:59:38 2008 +0900
    24.3 @@ -13,7 +13,7 @@ int create_grant_host_mapping(unsigned l
    24.4  int replace_grant_host_mapping(unsigned long gpaddr, unsigned long mfn, unsigned long new_gpaddr, unsigned int flags);
    24.5  
    24.6  // for grant transfer
    24.7 -int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
    24.8 +int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn, unsigned int page_order);
    24.9  
   24.10  /* XXX
   24.11   * somewhere appropriate
    25.1 --- a/xen/include/asm-ia64/shadow.h	Thu May 15 16:23:56 2008 +0900
    25.2 +++ b/xen/include/asm-ia64/shadow.h	Fri May 16 21:59:38 2008 +0900
    25.3 @@ -40,8 +40,10 @@
    25.4   * Utilities to change relationship of gpfn->mfn for designated domain,
    25.5   * which is required by gnttab transfer, balloon, device model and etc.
    25.6   */
    25.7 -int guest_physmap_add_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
    25.8 -void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, unsigned long mfn);
    25.9 +int guest_physmap_add_page(struct domain *d, unsigned long gpfn, 
   25.10 +                           unsigned long mfn, unsigned int page_order);
   25.11 +void guest_physmap_remove_page(struct domain *d, unsigned long gpfn, 
   25.12 +                               unsigned long mfn, unsigned int page_order);
   25.13  
   25.14  static inline int
   25.15  shadow_mode_enabled(struct domain *d)
    26.1 --- a/xen/include/asm-x86/p2m.h	Thu May 15 16:23:56 2008 +0900
    26.2 +++ b/xen/include/asm-x86/p2m.h	Fri May 16 21:59:38 2008 +0900
    26.3 @@ -102,7 +102,8 @@ struct p2m_domain {
    26.4      void               (*free_page   )(struct domain *d,
    26.5                                         struct page_info *pg);
    26.6      int                (*set_entry   )(struct domain *d, unsigned long gfn,
    26.7 -                                       mfn_t mfn, p2m_type_t p2mt);
    26.8 +                                       mfn_t mfn, unsigned int page_order,
    26.9 +                                       p2m_type_t p2mt);
   26.10      mfn_t              (*get_entry   )(struct domain *d, unsigned long gfn,
   26.11                                         p2m_type_t *p2mt);
   26.12      mfn_t              (*get_entry_current)(unsigned long gfn,
   26.13 @@ -203,21 +204,23 @@ void p2m_final_teardown(struct domain *d
   26.14  
   26.15  /* Add a page to a domain's p2m table */
   26.16  int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
   26.17 -                             unsigned long mfn, p2m_type_t t);
   26.18 +                            unsigned long mfn, unsigned int page_order, 
   26.19 +                            p2m_type_t t);
   26.20  
   26.21  /* Untyped version for RAM only, for compatibility 
   26.22   *
   26.23   * Return 0 for success
   26.24   */
   26.25  static inline int guest_physmap_add_page(struct domain *d, unsigned long gfn,
   26.26 -                                         unsigned long mfn)
   26.27 +                                         unsigned long mfn,
   26.28 +                                         unsigned int page_order)
   26.29  {
   26.30 -    return guest_physmap_add_entry(d, gfn, mfn, p2m_ram_rw);
   26.31 +    return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
   26.32  }
   26.33  
   26.34  /* Remove a page from a domain's p2m table */
   26.35  void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
   26.36 -                               unsigned long mfn);
   26.37 +                               unsigned long mfn, unsigned int page_order);
   26.38  
   26.39  /* Change types across all p2m entries in a domain */
   26.40  void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
    27.1 --- a/xen/include/public/sysctl.h	Thu May 15 16:23:56 2008 +0900
    27.2 +++ b/xen/include/public/sysctl.h	Fri May 16 21:59:38 2008 +0900
    27.3 @@ -212,7 +212,41 @@ struct xen_sysctl_availheap {
    27.4  };
    27.5  typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
    27.6  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
    27.7 - 
    27.8 +
    27.9 +#define XEN_SYSCTL_get_pmstat        10
   27.10 +struct pm_px_val {
   27.11 +    uint64_aligned_t freq;        /* Px core frequency */
   27.12 +    uint64_aligned_t residency;   /* Px residency time */
   27.13 +    uint64_aligned_t count;       /* Px transition count */
   27.14 +};
   27.15 +typedef struct pm_px_val pm_px_val_t;
   27.16 +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t);
   27.17 +
   27.18 +struct pm_px_stat {
   27.19 +    uint8_t total;        /* total Px states */
   27.20 +    uint8_t usable;       /* usable Px states */
   27.21 +    uint8_t last;         /* last Px state */
   27.22 +    uint8_t cur;          /* current Px state */
   27.23 +    XEN_GUEST_HANDLE_64(uint64) trans_pt;   /* Px transition table */
   27.24 +    XEN_GUEST_HANDLE_64(pm_px_val_t) pt;
   27.25 +};
   27.26 +typedef struct pm_px_stat pm_px_stat_t;
   27.27 +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);
   27.28 +
   27.29 +struct xen_sysctl_get_pmstat {
   27.30 +#define PMSTAT_get_max_px   0x11
   27.31 +#define PMSTAT_get_pxstat   0x12
   27.32 +#define PMSTAT_reset_pxstat 0x13
   27.33 +    uint32_t type;
   27.34 +    uint32_t cpuid;
   27.35 +    union {
   27.36 +        struct pm_px_stat getpx;
   27.37 +        /* other struct for cx, tx, etc */
   27.38 +    } u;
   27.39 +};
   27.40 +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
   27.41 +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
   27.42 +
   27.43  struct xen_sysctl {
   27.44      uint32_t cmd;
   27.45      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
   27.46 @@ -226,6 +260,7 @@ struct xen_sysctl {
   27.47          struct xen_sysctl_debug_keys        debug_keys;
   27.48          struct xen_sysctl_getcpuinfo        getcpuinfo;
   27.49          struct xen_sysctl_availheap         availheap;
   27.50 +        struct xen_sysctl_get_pmstat        get_pmstat;
   27.51          uint8_t                             pad[128];
   27.52      } u;
   27.53  };
    28.1 --- a/xen/include/xen/paging.h	Thu May 15 16:23:56 2008 +0900
    28.2 +++ b/xen/include/xen/paging.h	Fri May 16 21:59:38 2008 +0900
    28.3 @@ -18,8 +18,8 @@
    28.4  #else
    28.5  
    28.6  #define paging_mode_translate(d)              (0)
    28.7 -#define guest_physmap_add_page(d, p, m)       (0)
    28.8 -#define guest_physmap_remove_page(d, p, m)    ((void)0)
    28.9 +#define guest_physmap_add_page(d, p, m, o)    (0)
   28.10 +#define guest_physmap_remove_page(d, p, m, o) ((void)0)
   28.11  
   28.12  #endif
   28.13