ia64/xen-unstable

annotate xen/include/xen/mm.h @ 19646:f210a633571c

Transcendent memory ("tmem") for Xen.

Tmem, when called from a tmem-capable (paravirtualized) guest, makes
use of otherwise unutilized ("fallow") memory to create and manage
pools of pages that can be accessed from the guest either as
"ephemeral" pages or as "persistent" pages. In either case, the pages
are not directly addressible by the guest, only copied to and fro via
the tmem interface. Ephemeral pages are a nice place for a guest to
put recently evicted clean pages that it might need again; these pages
can be reclaimed synchronously by Xen for other guests or other uses.
Persistent pages are a nice place for a guest to put "swap" pages to
avoid sending them to disk. These pages retain data as long as the
guest lives, but count against the guest memory allocation.

Tmem pages may optionally be compressed and, in certain cases, can be
shared between guests. Tmem also handles concurrency nicely and
provides limited QoS settings to combat malicious DoS attempts.
Save/restore and live migration support is not yet provided.

Tmem is primarily targeted for an x86 64-bit hypervisor. On a 32-bit
x86 hypervisor, it has limited functionality and testing due to
limitations of the xen heap. Nearly all of tmem is
architecture-independent; three routines remain to be ported to ia64
and it should work on that architecture too. It is also structured to
be portable to non-Xen environments.

Tmem defaults off (for now) and must be enabled with a "tmem" xen boot
option (and does nothing unless a tmem-capable guest is running). The
"tmem_compress" boot option enables compression which takes about 10x
more CPU but approximately doubles the number of pages that can be
stored.

Tmem can be controlled via several "xm" commands and many interesting
tmem statistics can be obtained. A README and internal specification
will follow, but lots of useful prose about tmem, as well as Linux
patches, can be found at http://oss.oracle.com/projects/tmem .

Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue May 26 11:05:04 2009 +0100 (2009-05-26)
parents dd489125a2e7
children 1c01814f9a25
rev   line source
kaf24@8726 1 /******************************************************************************
kaf24@8726 2 * include/xen/mm.h
kaf24@8726 3 *
kaf24@8726 4 * Definitions for memory pages, frame numbers, addresses, allocations, etc.
kaf24@8726 5 *
kaf24@8726 6 * Note that Xen must handle several different physical 'address spaces' and
kaf24@8726 7 * there is a consistent terminology for these:
kaf24@8726 8 *
kaf24@8726 9 * 1. gpfn/gpaddr: A guest-specific pseudo-physical frame number or address.
kaf24@8726 10 * 2. gmfn/gmaddr: A machine address from the p.o.v. of a particular guest.
kaf24@8726 11 * 3. mfn/maddr: A real machine frame number or address.
kaf24@8726 12 * 4. pfn/paddr: Used in 'polymorphic' functions that work across all
kaf24@8726 13 * address spaces, depending on context. See the pagetable
kaf24@8726 14 * conversion macros in asm-x86/page.h for examples.
kaf24@8726 15 * Also 'paddr_t' is big enough to store any physical address.
kaf24@8726 16 *
kaf24@8726 17 * This scheme provides consistent function and variable names even when
kaf24@8726 18 * different guests are running in different memory-management modes.
kaf24@8726 19 * 1. A guest running in auto-translated mode (e.g., shadow_mode_translate())
kaf24@8726 20 * will have gpfn == gmfn and gmfn != mfn.
kaf24@8726 21 * 2. A paravirtualised x86 guest will have gpfn != gmfn and gmfn == mfn.
kaf24@8726 22 * 3. A paravirtualised guest with no pseudophysical overlay will have
kaf24@8726 23 * gpfn == gpmfn == mfn.
kaf24@8726 24 *
kaf24@8726 25 * Copyright (c) 2002-2006, K A Fraser <keir@xensource.com>
kaf24@8726 26 */
kaf24@1210 27
kaf24@1211 28 #ifndef __XEN_MM_H__
kaf24@1211 29 #define __XEN_MM_H__
kaf24@1210 30
kaf24@4267 31 #include <xen/config.h>
kaf24@5398 32 #include <xen/types.h>
kaf24@4267 33 #include <xen/list.h>
kaf24@4267 34 #include <xen/spinlock.h>
kaf24@4267 35
kaf24@1941 36 struct domain;
kaf24@8726 37 struct page_info;
kaf24@1936 38
kaf24@3354 39 /* Boot-time allocator. Turns into generic allocator after bootstrap. */
kaf24@8726 40 paddr_t init_boot_allocator(paddr_t bitmap_start);
kaf24@8726 41 void init_boot_pages(paddr_t ps, paddr_t pe);
kfraser@14083 42 unsigned long alloc_boot_pages(
kfraser@14083 43 unsigned long nr_pfns, unsigned long pfn_align);
kaf24@3354 44 void end_boot_allocator(void);
kaf24@3354 45
kaf24@2806 46 /* Xen suballocator. These functions are interrupt-safe. */
kaf24@8726 47 void init_xenheap_pages(paddr_t ps, paddr_t pe);
keir@19107 48 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
kaf24@5398 49 void free_xenheap_pages(void *v, unsigned int order);
keir@19107 50 #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
kaf24@5398 51 #define free_xenheap_page(v) (free_xenheap_pages(v,0))
kaf24@1749 52
kaf24@2806 53 /* Domain suballocator. These functions are *not* interrupt-safe.*/
kaf24@8726 54 void init_domheap_pages(paddr_t ps, paddr_t pe);
kaf24@8726 55 struct page_info *alloc_domheap_pages(
kfraser@10418 56 struct domain *d, unsigned int order, unsigned int memflags);
kaf24@8726 57 void free_domheap_pages(struct page_info *pg, unsigned int order);
kfraser@15580 58 unsigned long avail_domheap_pages_region(
kfraser@15580 59 unsigned int node, unsigned int min_width, unsigned int max_width);
kaf24@1936 60 unsigned long avail_domheap_pages(void);
keir@17385 61 #define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
kaf24@5398 62 #define free_domheap_page(p) (free_domheap_pages(p,0))
keir@19289 63 unsigned int online_page(unsigned long mfn, uint32_t *status);
keir@19289 64 int offline_page(unsigned long mfn, int broken, uint32_t *status);
keir@19289 65 int query_page_offline(unsigned long mfn, uint32_t *status);
kaf24@1210 66
kfraser@14098 67 void scrub_heap_pages(void);
kfraser@14098 68
kfraser@10418 69 int assign_pages(
kfraser@10418 70 struct domain *d,
kfraser@10418 71 struct page_info *pg,
kfraser@10418 72 unsigned int order,
kfraser@10418 73 unsigned int memflags);
kfraser@10418 74
kfraser@10418 75 /* memflags: */
kfraser@14103 76 #define _MEMF_no_refcount 0
kfraser@10418 77 #define MEMF_no_refcount (1U<<_MEMF_no_refcount)
keir@18975 78 #define _MEMF_populate_on_demand 1
keir@18975 79 #define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
keir@19646 80 #define _MEMF_tmem 2
keir@19646 81 #define MEMF_tmem (1U<<_MEMF_tmem)
keir@17385 82 #define _MEMF_node 8
keir@17385 83 #define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node)
kfraser@14103 84 #define _MEMF_bits 24
kfraser@14103 85 #define MEMF_bits(n) ((n)<<_MEMF_bits)
kaf24@5929 86
kaf24@10340 87 #ifdef CONFIG_PAGEALLOC_MAX_ORDER
kaf24@10340 88 #define MAX_ORDER CONFIG_PAGEALLOC_MAX_ORDER
kaf24@10340 89 #else
kaf24@10341 90 #define MAX_ORDER 20 /* 2^20 contiguous pages */
kaf24@10340 91 #endif
kaf24@9456 92
keir@19134 93 #define page_list_entry list_head
keir@19134 94
keir@19134 95 #include <asm/mm.h>
keir@19134 96
keir@19134 97 #ifndef page_list_entry
keir@19134 98 struct page_list_head
keir@19134 99 {
keir@19134 100 struct page_info *next, *tail;
keir@19134 101 };
keir@19134 102 /* These must only have instances in struct page_info. */
keir@19134 103 # define page_list_entry
keir@19134 104
keir@19134 105 # define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
keir@19134 106 # define PAGE_LIST_HEAD(name) \
keir@19134 107 struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
keir@19134 108 # define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
keir@19134 109 # define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
keir@19134 110
keir@19134 111 static inline int
keir@19134 112 page_list_empty(const struct page_list_head *head)
keir@19134 113 {
keir@19134 114 return !head->next;
keir@19134 115 }
keir@19134 116 static inline struct page_info *
keir@19134 117 page_list_first(const struct page_list_head *head)
keir@19134 118 {
keir@19134 119 return head->next;
keir@19134 120 }
keir@19134 121 static inline struct page_info *
keir@19134 122 page_list_next(const struct page_info *page,
keir@19134 123 const struct page_list_head *head)
keir@19134 124 {
keir@19134 125 return page != head->tail ? mfn_to_page(page->list.next) : NULL;
keir@19134 126 }
keir@19134 127 static inline struct page_info *
keir@19134 128 page_list_prev(const struct page_info *page,
keir@19134 129 const struct page_list_head *head)
keir@19134 130 {
keir@19134 131 return page != head->next ? mfn_to_page(page->list.prev) : NULL;
keir@19134 132 }
keir@19134 133 static inline void
keir@19134 134 page_list_add(struct page_info *page, struct page_list_head *head)
keir@19134 135 {
keir@19134 136 if ( head->next )
keir@19134 137 {
keir@19134 138 page->list.next = page_to_mfn(head->next);
keir@19134 139 head->next->list.prev = page_to_mfn(page);
keir@19134 140 }
keir@19134 141 else
keir@19134 142 {
keir@19134 143 head->tail = page;
keir@19134 144 page->list.next = ~0;
keir@19134 145 }
keir@19134 146 page->list.prev = ~0;
keir@19134 147 head->next = page;
keir@19134 148 }
keir@19134 149 static inline void
keir@19134 150 page_list_add_tail(struct page_info *page, struct page_list_head *head)
keir@19134 151 {
keir@19134 152 page->list.next = ~0;
keir@19134 153 if ( head->next )
keir@19134 154 {
keir@19134 155 page->list.prev = page_to_mfn(head->tail);
keir@19134 156 head->tail->list.next = page_to_mfn(page);
keir@19134 157 }
keir@19134 158 else
keir@19134 159 {
keir@19134 160 page->list.prev = ~0;
keir@19134 161 head->next = page;
keir@19134 162 }
keir@19134 163 head->tail = page;
keir@19134 164 }
keir@19134 165 static inline bool_t
keir@19134 166 __page_list_del_head(struct page_info *page, struct page_list_head *head,
keir@19134 167 struct page_info *next, struct page_info *prev)
keir@19134 168 {
keir@19134 169 if ( head->next == page )
keir@19134 170 {
keir@19134 171 if ( head->tail != page )
keir@19134 172 {
keir@19134 173 next->list.prev = ~0;
keir@19134 174 head->next = next;
keir@19134 175 }
keir@19134 176 else
keir@19134 177 head->tail = head->next = NULL;
keir@19134 178 return 1;
keir@19134 179 }
keir@19134 180
keir@19134 181 if ( head->tail == page )
keir@19134 182 {
keir@19134 183 prev->list.next = ~0;
keir@19134 184 head->tail = prev;
keir@19134 185 return 1;
keir@19134 186 }
keir@19134 187
keir@19134 188 return 0;
keir@19134 189 }
keir@19134 190 static inline void
keir@19134 191 page_list_del(struct page_info *page, struct page_list_head *head)
keir@19134 192 {
keir@19134 193 struct page_info *next = mfn_to_page(page->list.next);
keir@19134 194 struct page_info *prev = mfn_to_page(page->list.prev);
keir@19134 195
keir@19134 196 if ( !__page_list_del_head(page, head, next, prev) )
keir@19134 197 {
keir@19134 198 next->list.prev = page->list.prev;
keir@19134 199 prev->list.next = page->list.next;
keir@19134 200 }
keir@19134 201 }
keir@19134 202 static inline void
keir@19134 203 page_list_del2(struct page_info *page, struct page_list_head *head1,
keir@19134 204 struct page_list_head *head2)
keir@19134 205 {
keir@19134 206 struct page_info *next = mfn_to_page(page->list.next);
keir@19134 207 struct page_info *prev = mfn_to_page(page->list.prev);
keir@19134 208
keir@19134 209 if ( !__page_list_del_head(page, head1, next, prev) &&
keir@19134 210 !__page_list_del_head(page, head2, next, prev) )
keir@19134 211 {
keir@19134 212 next->list.prev = page->list.prev;
keir@19134 213 prev->list.next = page->list.next;
keir@19134 214 }
keir@19134 215 }
keir@19134 216 static inline struct page_info *
keir@19134 217 page_list_remove_head(struct page_list_head *head)
keir@19134 218 {
keir@19134 219 struct page_info *page = head->next;
keir@19134 220
keir@19134 221 if ( page )
keir@19134 222 page_list_del(page, head);
keir@19134 223
keir@19134 224 return page;
keir@19134 225 }
keir@19134 226
keir@19646 227 static inline void
keir@19646 228 page_list_splice(struct page_list_head *list, struct page_list_head *head)
keir@19646 229 {
keir@19646 230 struct page_info *first, *last, *at;
keir@19646 231
keir@19646 232 if ( page_list_empty(list) )
keir@19646 233 return;
keir@19646 234
keir@19646 235 if ( page_list_empty(head) )
keir@19646 236 {
keir@19646 237 head->next = list->next;
keir@19646 238 head->tail = list->tail;
keir@19646 239 return;
keir@19646 240 }
keir@19646 241
keir@19646 242 first = list->next;
keir@19646 243 last = list->tail;
keir@19646 244 at = head->next;
keir@19646 245
keir@19646 246 first->list.prev = page_to_mfn(head->next);
keir@19646 247 head->next = first;
keir@19646 248
keir@19646 249 last->list.next = page_to_mfn(at);
keir@19646 250 at->list.prev = page_to_mfn(last);
keir@19646 251 }
keir@19646 252
keir@19134 253 #define page_list_for_each(pos, head) \
keir@19134 254 for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
keir@19134 255 #define page_list_for_each_safe(pos, tmp, head) \
keir@19134 256 for ( pos = (head)->next; \
keir@19134 257 pos ? (tmp = page_list_next(pos, head), 1) : 0; \
keir@19134 258 pos = tmp )
keir@19134 259 #define page_list_for_each_safe_reverse(pos, tmp, head) \
keir@19134 260 for ( pos = (head)->tail; \
keir@19134 261 pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
keir@19134 262 pos = tmp )
keir@19134 263 #else
keir@19134 264 # define page_list_head list_head
keir@19134 265 # define PAGE_LIST_HEAD_INIT LIST_HEAD_INIT
keir@19134 266 # define PAGE_LIST_HEAD LIST_HEAD
keir@19134 267 # define INIT_PAGE_LIST_HEAD INIT_LIST_HEAD
keir@19134 268 # define INIT_PAGE_LIST_ENTRY INIT_LIST_HEAD
keir@19134 269 # define page_list_empty list_empty
keir@19134 270 # define page_list_first(hd) list_entry((hd)->next, \
keir@19134 271 struct page_info, list)
keir@19134 272 # define page_list_next(pg, hd) list_entry((pg)->list.next, \
keir@19134 273 struct page_info, list)
keir@19134 274 # define page_list_add(pg, hd) list_add(&(pg)->list, hd)
keir@19134 275 # define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd)
keir@19134 276 # define page_list_del(pg, hd) list_del(&(pg)->list)
keir@19134 277 # define page_list_del2(pg, hd1, hd2) list_del(&(pg)->list)
keir@19134 278 # define page_list_remove_head(hd) (!page_list_empty(hd) ? \
keir@19134 279 ({ \
keir@19134 280 struct page_info *__pg = page_list_first(hd); \
keir@19134 281 list_del(&__pg->list); \
keir@19134 282 __pg; \
keir@19134 283 }) : NULL)
keir@19134 284 # define page_list_for_each(pos, head) list_for_each_entry(pos, head, list)
keir@19134 285 # define page_list_for_each_safe(pos, tmp, head) \
keir@19134 286 list_for_each_entry_safe(pos, tmp, head, list)
keir@19134 287 # define page_list_for_each_safe_reverse(pos, tmp, head) \
keir@19134 288 list_for_each_entry_safe_reverse(pos, tmp, head, list)
keir@19646 289 # define page_list_splice(list, hd) list_splice(list, hd)
keir@19134 290 #endif
keir@19134 291
kaf24@4267 292 /* Automatic page scrubbing for dead domains. */
keir@19134 293 extern struct page_list_head page_scrub_list;
keir@19134 294 #define page_scrub_schedule_work() \
keir@19134 295 do { \
keir@19134 296 if ( !page_list_empty(&page_scrub_list) ) \
keir@19134 297 raise_softirq(PAGE_SCRUB_SOFTIRQ); \
kaf24@4267 298 } while ( 0 )
kfraser@14340 299 #define page_scrub_kick() \
kfraser@14340 300 do { \
keir@19134 301 if ( !page_list_empty(&page_scrub_list) ) \
kfraser@14340 302 cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ); \
kfraser@14340 303 } while ( 0 )
keir@19646 304 void scrub_list_splice(struct page_list_head *);
keir@19646 305 void scrub_list_add(struct page_info *);
keir@19646 306 void scrub_one_page(struct page_info *);
kaf24@10541 307 unsigned long avail_scrub_pages(void);
kaf24@4267 308
cl349@9211 309 int guest_remove_page(struct domain *d, unsigned long gmfn);
cl349@9211 310
keir@19259 311 #define RAM_TYPE_CONVENTIONAL 0x00000001
keir@19259 312 #define RAM_TYPE_RESERVED 0x00000002
keir@19259 313 #define RAM_TYPE_UNUSABLE 0x00000004
keir@19259 314 #define RAM_TYPE_ACPI 0x00000008
keir@19259 315 /* Returns TRUE if the whole page at @mfn is of the requested RAM type(s) above. */
keir@19259 316 int page_is_ram_type(unsigned long mfn, unsigned long mem_type);
kfraser@11177 317
keir@17853 318 extern unsigned long *alloc_bitmap; /* for vmcoreinfo */
keir@17853 319
kaf24@1211 320 #endif /* __XEN_MM_H__ */