ia64/xen-unstable

changeset 19289:dd489125a2e7

Page offline support in Xen side

This patch add support to offline a page. The basical idea is, when a
page is assigned, it will be marked offline pending and be moved out of
buddy when freed, when a page is free, it will be moved out of buddy directly.

One notice after this change is, now the page->count_info is not
always 0, especially for shadow page, since the PGC_offlining bit may be set.

Signed-off-by: Wang, Shane <shane.wang@intel.com>
Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Mar 06 19:18:39 2009 +0000 (2009-03-06)
parents f1080b20cd15
children 5a981686bbf8
files xen/common/page_alloc.c xen/common/sysctl.c xen/include/asm-x86/mm.h xen/include/public/sysctl.h xen/include/public/xen.h xen/include/xen/mm.h
line diff
     1.1 --- a/xen/common/page_alloc.c	Fri Mar 06 19:14:50 2009 +0000
     1.2 +++ b/xen/common/page_alloc.c	Fri Mar 06 19:18:39 2009 +0000
     1.3 @@ -35,6 +35,7 @@
     1.4  #include <xen/perfc.h>
     1.5  #include <xen/numa.h>
     1.6  #include <xen/nodemask.h>
     1.7 +#include <public/sysctl.h>
     1.8  #include <asm/page.h>
     1.9  #include <asm/numa.h>
    1.10  #include <asm/flushtlb.h>
    1.11 @@ -74,6 +75,11 @@ static DEFINE_SPINLOCK(page_scrub_lock);
    1.12  PAGE_LIST_HEAD(page_scrub_list);
    1.13  static unsigned long scrub_pages;
    1.14  
    1.15 +/* Offlined page list, protected by heap_lock */
    1.16 +PAGE_LIST_HEAD(page_offlined_list);
    1.17 +
    1.18 +/* Broken page list, protected by heap_lock */
    1.19 +PAGE_LIST_HEAD(page_broken_list);
    1.20  /*********************
    1.21   * ALLOCATION BITMAP
    1.22   *  One bit per page of memory. Bit set => page is allocated.
    1.23 @@ -421,12 +427,92 @@ static struct page_info *alloc_heap_page
    1.24      return pg;
    1.25  }
    1.26  
    1.27 +/*
    1.28 + * Remove any offlined page in the buddy poined by head
    1.29 + */
    1.30 +static int reserve_offlined_page(struct page_info *head)
    1.31 +{
    1.32 +    unsigned int node = phys_to_nid(page_to_maddr(head));
    1.33 +    int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
    1.34 +    struct page_info *cur_head;
    1.35 +    int cur_order;
    1.36 +
    1.37 +    ASSERT(spin_is_locked(&heap_lock));
    1.38 +
    1.39 +    cur_head = head;
    1.40 +
    1.41 +    page_list_del(head, &heap(node, zone, head_order));
    1.42 +
    1.43 +    while ( cur_head < (head + (1 << head_order)) )
    1.44 +    {
    1.45 +        struct page_info *pg;
    1.46 +        int next_order;
    1.47 +
    1.48 +        if (test_bit(_PGC_offlined, &cur_head->count_info))
    1.49 +        {
    1.50 +            cur_head++;
    1.51 +            continue;
    1.52 +        }
    1.53 +
    1.54 +        next_order = cur_order = 0;
    1.55 +
    1.56 +        while (cur_order < head_order)
    1.57 +        {
    1.58 +            next_order = cur_order + 1;
    1.59 +
    1.60 +            if ( (cur_head + (1 << next_order)) >= (head + ( 1 << head_order)))
    1.61 +                goto merge;
    1.62 +
    1.63 +            for (i = (1 << cur_order), pg = cur_head + (1 << cur_order);
    1.64 +              i < (1 << next_order);
    1.65 +              i++, pg ++)
    1.66 +                if (test_bit(_PGC_offlined, &pg->count_info))
    1.67 +                    break;
    1.68 +            if (i == ( 1 << next_order))
    1.69 +            {
    1.70 +                cur_order = next_order;
    1.71 +                continue;
    1.72 +            }
    1.73 +            else
    1.74 +            {
    1.75 +                /*
    1.76 +                 * We don't need considering merge outside the head_order
    1.77 +                 */
    1.78 +merge:
    1.79 +                page_list_add_tail(cur_head, &heap(node, zone, cur_order));
    1.80 +                PFN_ORDER(cur_head) = cur_order;
    1.81 +                cur_head += (1 << cur_order);
    1.82 +                break;
    1.83 +            }
    1.84 +        }
    1.85 +    }
    1.86 +
    1.87 +    for (cur_head = head; cur_head < head + ( 1UL << head_order); cur_head++)
    1.88 +    {
    1.89 +        if (!test_bit(_PGC_offlined, &cur_head->count_info))
    1.90 +            continue;
    1.91 +
    1.92 +        avail[node][zone] --;
    1.93 +
    1.94 +        map_alloc(page_to_mfn(cur_head), 1);
    1.95 +
    1.96 +        if (test_bit(_PGC_broken, &cur_head->count_info))
    1.97 +            page_list_add_tail(cur_head, &page_broken_list);
    1.98 +        else
    1.99 +            page_list_add_tail(cur_head, &page_offlined_list);
   1.100 +
   1.101 +        count ++;
   1.102 +    }
   1.103 +
   1.104 +    return count;
   1.105 +}
   1.106 +
   1.107  /* Free 2^@order set of pages. */
   1.108  static void free_heap_pages(
   1.109      struct page_info *pg, unsigned int order)
   1.110  {
   1.111      unsigned long mask;
   1.112 -    unsigned int i, node = phys_to_nid(page_to_maddr(pg));
   1.113 +    unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
   1.114      unsigned int zone = page_to_zone(pg);
   1.115  
   1.116      ASSERT(order <= MAX_ORDER);
   1.117 @@ -446,7 +532,14 @@ static void free_heap_pages(
   1.118           *     in its pseudophysical address space).
   1.119           * In all the above cases there can be no guest mappings of this page.
   1.120           */
   1.121 -        pg[i].count_info = 0;
   1.122 +        ASSERT(!(pg[i].count_info & PGC_offlined));
   1.123 +        pg[i].count_info &= PGC_offlining | PGC_broken;
   1.124 +        if (pg[i].count_info & PGC_offlining)
   1.125 +        {
   1.126 +            pg[i].count_info &= ~PGC_offlining;
   1.127 +            pg[i].count_info |= PGC_offlined;
   1.128 +            tainted = 1;
   1.129 +        }
   1.130  
   1.131          /* If a page has no owner it will need no safety TLB flush. */
   1.132          pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
   1.133 @@ -481,7 +574,7 @@ static void free_heap_pages(
   1.134                  break;
   1.135              page_list_del(pg + mask, &heap(node, zone, order));
   1.136          }
   1.137 -        
   1.138 +
   1.139          order++;
   1.140  
   1.141          /* After merging, pg should remain in the same node. */
   1.142 @@ -491,9 +584,251 @@ static void free_heap_pages(
   1.143      PFN_ORDER(pg) = order;
   1.144      page_list_add_tail(pg, &heap(node, zone, order));
   1.145  
   1.146 +    if (tainted)
   1.147 +        reserve_offlined_page(pg);
   1.148 +
   1.149      spin_unlock(&heap_lock);
   1.150  }
   1.151  
   1.152 +
   1.153 +/*
   1.154 + * Following possible status for a page:
   1.155 + * free and Online; free and offlined; free and offlined and broken;
   1.156 + * assigned and online; assigned and offlining; assigned and offling and broken
   1.157 + *
   1.158 + * Following rules applied for page offline:
   1.159 + * Once a page is broken, it can't be assigned anymore
   1.160 + * A page will be offlined only if it is free
   1.161 + * return original count_info
   1.162 + *
   1.163 + */
   1.164 +static unsigned long mark_page_offline(struct page_info *pg, int broken)
   1.165 +{
   1.166 +    unsigned long nx, x, y = pg->count_info;
   1.167 +
   1.168 +    ASSERT(page_is_ram_type(page_to_mfn(pg), RAM_TYPE_CONVENTIONAL));
   1.169 +    /*
   1.170 +     * Caller gurantee the page will not be reassigned during this process
   1.171 +     */
   1.172 +    ASSERT(spin_is_locked(&heap_lock));
   1.173 +
   1.174 +    do {
   1.175 +        nx = x = y;
   1.176 +
   1.177 +        if ( ((x & PGC_offlined_broken) == PGC_offlined_broken) )
   1.178 +            return y;
   1.179 +        /* PGC_offlined means it is free pages */
   1.180 +        if (x & PGC_offlined)
   1.181 +        {
   1.182 +            if (broken && !(nx & PGC_broken))
   1.183 +                nx |= PGC_broken;
   1.184 +            else
   1.185 +                return y;
   1.186 +        }
   1.187 +        /* It is not offlined, not reserved page */
   1.188 +        else if ( allocated_in_map(page_to_mfn(pg)) )
   1.189 +            nx |= PGC_offlining;
   1.190 +        else
   1.191 +            nx |= PGC_offlined;
   1.192 +
   1.193 +        if (broken)
   1.194 +            nx |= PGC_broken;
   1.195 +    } while ( (y = cmpxchg(&pg->count_info, x, nx)) != x );
   1.196 +
   1.197 +    return y;
   1.198 +}
   1.199 +
   1.200 +static int reserve_heap_page(struct page_info *pg)
   1.201 +{
   1.202 +    struct page_info *head = NULL;
   1.203 +    unsigned int i, node = phys_to_nid(page_to_maddr(pg));
   1.204 +    unsigned int zone = page_to_zone(pg);
   1.205 +
   1.206 +    /* get the header */
   1.207 +    for ( i = 0; i <= MAX_ORDER; i++ )
   1.208 +    {
   1.209 +        struct page_info *tmp;
   1.210 +
   1.211 +        if ( page_list_empty(&heap(node, zone, i)) )
   1.212 +            continue;
   1.213 +
   1.214 +        page_list_for_each_safe(head, tmp, &heap(node, zone, i))
   1.215 +        {
   1.216 +            if ( (head <= pg) &&
   1.217 +                 (head + (1UL << i) > pg) )
   1.218 +                return reserve_offlined_page(head);
   1.219 +        }
   1.220 +    }
   1.221 +
   1.222 +    return -EINVAL;
   1.223 +
   1.224 +}
   1.225 +
   1.226 +/*
   1.227 + * offline one page
   1.228 + */
   1.229 +int offline_page(unsigned long mfn, int broken, uint32_t *status)
   1.230 +{
   1.231 +    unsigned long old_info = 0;
   1.232 +    struct domain *owner;
   1.233 +    int ret = 0;
   1.234 +    struct page_info *pg;
   1.235 +
   1.236 +    if (mfn > max_page)
   1.237 +    {
   1.238 +        dprintk(XENLOG_WARNING,
   1.239 +                "try to offline page out of range %lx\n", mfn);
   1.240 +        return -EINVAL;
   1.241 +    }
   1.242 +
   1.243 +    *status = 0;
   1.244 +    pg = mfn_to_page(mfn);
   1.245 +
   1.246 +
   1.247 +#if defined(__x86_64__)
   1.248 +     /* Xen's txt mfn in x86_64 is reserved in e820 */
   1.249 +    if ( is_xen_fixed_mfn(mfn) )
   1.250 +#elif defined(__i386__)
   1.251 +    if ( is_xen_heap_mfn(mfn) )
   1.252 +#endif
   1.253 +    {
   1.254 +        *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_FAILED |
   1.255 +          (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
   1.256 +        return -EPERM;
   1.257 +    }
   1.258 +
   1.259 +    /*
   1.260 +     * N.B. xen's txt in x86_64 is marked reserved and handled already
   1.261 +     *  Also kexec range is reserved
   1.262 +     */
   1.263 +     if (!page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL))
   1.264 +     {
   1.265 +        *status = PG_OFFLINE_FAILED | PG_OFFLINE_NOT_CONV_RAM;
   1.266 +        return -EINVAL;
   1.267 +     }
   1.268 +
   1.269 +    spin_lock(&heap_lock);
   1.270 +
   1.271 +    old_info = mark_page_offline(pg, broken);
   1.272 +
   1.273 +    if ( !allocated_in_map(mfn) )
   1.274 +    {
   1.275 +        /* Free pages are reserve directly */
   1.276 +        reserve_heap_page(pg);
   1.277 +        *status = PG_OFFLINE_OFFLINED;
   1.278 +    }
   1.279 +    else if (test_bit(_PGC_offlined, &pg->count_info))
   1.280 +    {
   1.281 +        *status = PG_OFFLINE_OFFLINED;
   1.282 +    }
   1.283 +    else if ((owner = page_get_owner_and_reference(pg)))
   1.284 +    {
   1.285 +            *status = PG_OFFLINE_OWNED | PG_OFFLINE_PENDING |
   1.286 +              (owner->domain_id << PG_OFFLINE_OWNER_SHIFT);
   1.287 +            /* Release the reference since it will not be allocated anymore */
   1.288 +            put_page(pg);
   1.289 +    }
   1.290 +    else if ( old_info & PGC_xen_heap)
   1.291 +    {
   1.292 +        *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_PENDING |
   1.293 +          (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
   1.294 +    }
   1.295 +    else
   1.296 +    {
   1.297 +        /*
   1.298 +         * assign_pages does not hold heap_lock, so small window that the owner
   1.299 +         * may be set later, but please notice owner will only change from
   1.300 +         * NULL to be set, not verse, since page is offlining now.
   1.301 +         * No windows If called from #MC handler, since all CPU are in softirq
   1.302 +         * If called from user space like CE handling, tools can wait some time
   1.303 +         * before call again.
   1.304 +         */
   1.305 +        *status = PG_OFFLINE_ANONYMOUS | PG_OFFLINE_FAILED |
   1.306 +                  (DOMID_INVALID << PG_OFFLINE_OWNER_SHIFT );
   1.307 +    }
   1.308 +
   1.309 +    if (broken)
   1.310 +        *status |= PG_OFFLINE_BROKEN;
   1.311 +
   1.312 +    spin_unlock(&heap_lock);
   1.313 +
   1.314 +    return ret;
   1.315 +}
   1.316 +
   1.317 +/*
   1.318 + * Online the memory.
   1.319 + *   The caller should make sure end_pfn <= max_page,
   1.320 + *   if not, expand_pages() should be called prior to online_page().
   1.321 + */
   1.322 +unsigned int online_page(unsigned long mfn, uint32_t *status)
   1.323 +{
   1.324 +    struct page_info *pg;
   1.325 +    int ret = 0, free = 0;
   1.326 +
   1.327 +    if ( mfn > max_page )
   1.328 +    {
   1.329 +        dprintk(XENLOG_WARNING, "call expand_pages() first\n");
   1.330 +        return -EINVAL;
   1.331 +    }
   1.332 +
   1.333 +    pg = mfn_to_page(mfn);
   1.334 +
   1.335 +    *status = 0;
   1.336 +
   1.337 +    spin_lock(&heap_lock);
   1.338 +
   1.339 +    if ( unlikely(is_page_broken(pg)) )
   1.340 +    {
   1.341 +        ret = -EINVAL;
   1.342 +        *status = PG_ONLINE_FAILED |PG_ONLINE_BROKEN;
   1.343 +    }
   1.344 +    else if (pg->count_info & PGC_offlined)
   1.345 +    {
   1.346 +        clear_bit(_PGC_offlined, &pg->count_info);
   1.347 +        page_list_del(pg, &page_offlined_list);
   1.348 +        *status = PG_ONLINE_ONLINED;
   1.349 +        free = 1;
   1.350 +    }
   1.351 +    else if (pg->count_info & PGC_offlining)
   1.352 +    {
   1.353 +        clear_bit(_PGC_offlining, &pg->count_info);
   1.354 +        *status = PG_ONLINE_ONLINED;
   1.355 +    }
   1.356 +    spin_unlock(&heap_lock);
   1.357 +
   1.358 +    if (free)
   1.359 +        free_heap_pages(pg, 0);
   1.360 +
   1.361 +    return ret;
   1.362 +}
   1.363 +
   1.364 +int query_page_offline(unsigned long mfn, uint32_t *status)
   1.365 +{
   1.366 +    struct page_info *pg;
   1.367 +
   1.368 +    if ( (mfn > max_page) || !page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL) )
   1.369 +    {
   1.370 +        dprintk(XENLOG_WARNING, "call expand_pages() first\n");
   1.371 +        return -EINVAL;
   1.372 +    }
   1.373 +
   1.374 +    *status = 0;
   1.375 +    spin_lock(&heap_lock);
   1.376 +
   1.377 +    pg = mfn_to_page(mfn);
   1.378 +
   1.379 +    if (pg->count_info & PGC_offlining)
   1.380 +        *status |= PG_OFFLINE_STATUS_OFFLINE_PENDING;
   1.381 +    if (pg->count_info & PGC_broken)
   1.382 +        *status |= PG_OFFLINE_STATUS_BROKEN;
   1.383 +    if (pg->count_info & PGC_offlined)
   1.384 +        *status |= PG_OFFLINE_STATUS_OFFLINED;
   1.385 +
   1.386 +    spin_unlock(&heap_lock);
   1.387 +
   1.388 +    return 0;
   1.389 +}
   1.390 +
   1.391  /*
   1.392   * Hand the specified arbitrary page range to the specified heap zone
   1.393   * checking the node_id of the previous page.  If they differ and the
     2.1 --- a/xen/common/sysctl.c	Fri Mar 06 19:14:50 2009 +0000
     2.2 +++ b/xen/common/sysctl.c	Fri Mar 06 19:18:39 2009 +0000
     2.3 @@ -233,6 +233,61 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
     2.4      }
     2.5      break;
     2.6  
     2.7 +    case XEN_SYSCTL_page_offline_op:
     2.8 +    {
     2.9 +        uint32_t *status, *ptr;
    2.10 +        unsigned long pfn;
    2.11 +
    2.12 +        ptr = status = xmalloc_bytes( sizeof(uint32_t) *
    2.13 +                                (op->u.page_offline.end -
    2.14 +                                  op->u.page_offline.start + 1));
    2.15 +        if (!status)
    2.16 +        {
    2.17 +            dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
    2.18 +            ret = -ENOMEM;
    2.19 +            break;
    2.20 +        }
    2.21 +
    2.22 +        memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
    2.23 +                      (op->u.page_offline.end - op->u.page_offline.start + 1));
    2.24 +
    2.25 +        for ( pfn = op->u.page_offline.start;
    2.26 +              pfn <= op->u.page_offline.end;
    2.27 +              pfn ++ )
    2.28 +        {
    2.29 +            switch (op->u.page_offline.cmd)
    2.30 +            {
    2.31 +                /* Shall revert her if failed, or leave caller do it? */
    2.32 +                case sysctl_page_offline:
    2.33 +                    ret = offline_page(pfn, 0, ptr++);
    2.34 +                    break;
    2.35 +                case sysctl_page_online:
    2.36 +                    ret = online_page(pfn, ptr++);
    2.37 +                    break;
    2.38 +                case sysctl_query_page_offline:
    2.39 +                    ret = query_page_offline(pfn, ptr++);
    2.40 +                    break;
    2.41 +                default:
    2.42 +                    gdprintk(XENLOG_WARNING, "invalid page offline op %x\n",
    2.43 +                            op->u.page_offline.cmd);
    2.44 +                    ret = -EINVAL;
    2.45 +                    break;
    2.46 +            }
    2.47 +
    2.48 +            if (ret)
    2.49 +                break;
    2.50 +        }
    2.51 +
    2.52 +        if (copy_to_guest(op->u.page_offline.status, status,
    2.53 +                          op->u.page_offline.end - op->u.page_offline.start + 1))
    2.54 +        {
    2.55 +            ret = -EFAULT;
    2.56 +            break;
    2.57 +        }
    2.58 +        xfree(status);
    2.59 +    }
    2.60 +    break;
    2.61 +
    2.62      default:
    2.63          ret = arch_do_sysctl(op, u_sysctl);
    2.64          break;
     3.1 --- a/xen/include/asm-x86/mm.h	Fri Mar 06 19:14:50 2009 +0000
     3.2 +++ b/xen/include/asm-x86/mm.h	Fri Mar 06 19:18:39 2009 +0000
     3.3 @@ -198,8 +198,25 @@ struct page_info
     3.4   /* 3-bit PAT/PCD/PWT cache-attribute hint. */
     3.5  #define PGC_cacheattr_base PG_shift(6)
     3.6  #define PGC_cacheattr_mask PG_mask(7, 6)
     3.7 +
     3.8 +  /* Page is broken? */
     3.9 + #define _PGC_broken         PG_shift(7)
    3.10 + #define PGC_broken          PG_mask(1, 7)
    3.11 +  /* Page is offline pending ? */
    3.12 + #define _PGC_offlining      PG_shift(8)
    3.13 + #define PGC_offlining       PG_mask(1, 8)
    3.14 +  /* Page is offlined */
    3.15 + #define _PGC_offlined       PG_shift(9)
    3.16 + #define PGC_offlined        PG_mask(1, 9)
    3.17 + #define PGC_offlined_broken (PGC_offlined | PGC_broken)
    3.18 +
    3.19 + #define is_page_offlining(page)          ((page)->count_info & PGC_offlining)
    3.20 + #define is_page_offlined(page)          ((page)->count_info & PGC_offlined)
    3.21 + #define is_page_broken(page)           ((page)->count_info & PGC_broken)
    3.22 + #define is_page_online(page)           (!is_page_offlined(page))
    3.23 +
    3.24   /* Count of references to this frame. */
    3.25 -#define PGC_count_width   PG_shift(6)
    3.26 +#define PGC_count_width   PG_shift(9)
    3.27  #define PGC_count_mask    ((1UL<<PGC_count_width)-1)
    3.28  
    3.29  #if defined(__i386__)
    3.30 @@ -209,9 +226,13 @@ struct page_info
    3.31      (_mfn < paddr_to_pfn(xenheap_phys_end));            \
    3.32  })
    3.33  #else
    3.34 +extern unsigned long allocator_bitmap_end;
    3.35  #define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
    3.36  #define is_xen_heap_mfn(mfn) \
    3.37      (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
    3.38 +#define is_xen_fixed_mfn(mfn) \
    3.39 +    ( (mfn << PAGE_SHIFT) >= __pa(&_start) &&    \
    3.40 +          (mfn << PAGE_SHIFT) <= allocator_bitmap_end )
    3.41  #endif
    3.42  
    3.43  #if defined(__i386__)
     4.1 --- a/xen/include/public/sysctl.h	Fri Mar 06 19:14:50 2009 +0000
     4.2 +++ b/xen/include/public/sysctl.h	Fri Mar 06 19:18:39 2009 +0000
     4.3 @@ -359,6 +359,54 @@ struct xen_sysctl_pm_op {
     4.4      };
     4.5  };
     4.6  
     4.7 +#define XEN_SYSCTL_page_offline_op        14
     4.8 +struct xen_sysctl_page_offline_op {
     4.9 +    /* IN: range of page to be offlined */
    4.10 +#define sysctl_page_offline     1
    4.11 +#define sysctl_page_online      2
    4.12 +#define sysctl_query_page_offline  3
    4.13 +    uint32_t cmd;
    4.14 +    uint32_t start;
    4.15 +    uint32_t end;
    4.16 +    /* OUT: result of page offline request */
    4.17 +    /*
    4.18 +     * bit 0~15: result flags
    4.19 +     * bit 16~31: owner
    4.20 +     */
    4.21 +    XEN_GUEST_HANDLE(uint32) status;
    4.22 +};
    4.23 +
    4.24 +#define PG_OFFLINE_STATUS_MASK    (0xFFUL)
    4.25 +
    4.26 +/* The result is invalid, i.e. HV does not handle it */
    4.27 +#define PG_OFFLINE_INVALID   (0x1UL << 0)
    4.28 +
    4.29 +#define PG_OFFLINE_OFFLINED  (0x1UL << 1)
    4.30 +#define PG_OFFLINE_PENDING   (0x1UL << 2)
    4.31 +#define PG_OFFLINE_FAILED    (0x1UL << 3)
    4.32 +
    4.33 +#define PG_ONLINE_FAILED     PG_OFFLINE_FAILED
    4.34 +#define PG_ONLINE_ONLINED    PG_OFFLINE_OFFLINED
    4.35 +
    4.36 +#define PG_OFFLINE_STATUS_OFFLINED              (0x1UL << 1)
    4.37 +#define PG_OFFLINE_STATUS_ONLINE                (0x1UL << 2)
    4.38 +#define PG_OFFLINE_STATUS_OFFLINE_PENDING       (0x1UL << 3)
    4.39 +#define PG_OFFLINE_STATUS_BROKEN                (0x1UL << 4)
    4.40 +
    4.41 +#define PG_OFFLINE_MISC_MASK    (0xFFUL << 4)
    4.42 +
    4.43 +/* only valid when PG_OFFLINE_FAILED */
    4.44 +#define PG_OFFLINE_XENPAGE   (0x1UL << 8)
    4.45 +#define PG_OFFLINE_DOM0PAGE  (0x1UL << 9)
    4.46 +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10)
    4.47 +#define PG_OFFLINE_NOT_CONV_RAM   (0x1UL << 11)
    4.48 +#define PG_OFFLINE_OWNED     (0x1UL << 12)
    4.49 +
    4.50 +#define PG_OFFLINE_BROKEN    (0x1UL << 13)
    4.51 +#define PG_ONLINE_BROKEN     PG_OFFLINE_BROKEN
    4.52 +
    4.53 +#define PG_OFFLINE_OWNER_SHIFT 16
    4.54 +
    4.55  struct xen_sysctl {
    4.56      uint32_t cmd;
    4.57      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
    4.58 @@ -375,6 +423,7 @@ struct xen_sysctl {
    4.59          struct xen_sysctl_get_pmstat        get_pmstat;
    4.60          struct xen_sysctl_cpu_hotplug       cpu_hotplug;
    4.61          struct xen_sysctl_pm_op             pm_op;
    4.62 +        struct xen_sysctl_page_offline_op   page_offline;
    4.63          uint8_t                             pad[128];
    4.64      } u;
    4.65  };
     5.1 --- a/xen/include/public/xen.h	Fri Mar 06 19:14:50 2009 +0000
     5.2 +++ b/xen/include/public/xen.h	Fri Mar 06 19:18:39 2009 +0000
     5.3 @@ -354,6 +354,9 @@ typedef uint16_t domid_t;
     5.4   */
     5.5  #define DOMID_XEN  (0x7FF2U)
     5.6  
     5.7 +/* DOMID_INVALID is used to identity invalid domid */
     5.8 +#define DOMID_INVALID (0x7FFFU)
     5.9 +
    5.10  /*
    5.11   * Send an array of these to HYPERVISOR_mmu_update().
    5.12   * NB. The fields are natural pointer/address size for this architecture.
     6.1 --- a/xen/include/xen/mm.h	Fri Mar 06 19:14:50 2009 +0000
     6.2 +++ b/xen/include/xen/mm.h	Fri Mar 06 19:18:39 2009 +0000
     6.3 @@ -60,6 +60,9 @@ unsigned long avail_domheap_pages_region
     6.4  unsigned long avail_domheap_pages(void);
     6.5  #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
     6.6  #define free_domheap_page(p)  (free_domheap_pages(p,0))
     6.7 +unsigned int online_page(unsigned long mfn, uint32_t *status);
     6.8 +int offline_page(unsigned long mfn, int broken, uint32_t *status);
     6.9 +int query_page_offline(unsigned long mfn, uint32_t *status);
    6.10  
    6.11  void scrub_heap_pages(void);
    6.12