ia64/xen-unstable
changeset 957:603ae09a1314
bitkeeper revision 1.612 (3fbba6e271_EVTI6k2Ndd7VThsLqTA)
Many files:
new file
Makefile:
New library (libxi) which makes it easier to access the control interfaces.
Many files:
new file
Makefile:
New library (libxi) which makes it easier to access the control interfaces.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Wed Nov 19 17:22:42 2003 +0000 (2003-11-19) |
parents | 45296ed1d50d |
children | b86d78981ae9 |
files | .rootkeys tools/Makefile tools/libxi/Makefile tools/libxi/libxi_bvtsched.c tools/libxi/libxi_domain.c tools/libxi/libxi_linux_build.c tools/libxi/libxi_linux_restore.c tools/libxi/libxi_linux_save.c tools/libxi/libxi_misc.c tools/libxi/libxi_private.c tools/libxi/libxi_private.h tools/libxi/libxi_vbd.c tools/libxi/libxi_vif.c tools/libxi/rpm.spec tools/libxi/xi.h |
line diff
1.1 --- a/.rootkeys Wed Nov 19 10:08:11 2003 +0000 1.2 +++ b/.rootkeys Wed Nov 19 17:22:42 2003 +0000 1.3 @@ -184,6 +184,19 @@ 3fb01fd54I4P44vZDb1CtDt1BytDtA tools/int 1.4 3fb01fd5B-UeibZkmSCOUZckNyNFYA tools/internal/xi_vbd_list.c 1.5 3f86be322bd0h9jG3krZFOUgCDoxZg tools/internal/xi_vif_params.c 1.6 3eb781fd7211MZsLxJSiuy7W4KnJXg tools/internal/xi_vifinit 1.7 +3fbba6dbDfYvJSsw9500b4SZyUhxjQ tools/libxi/Makefile 1.8 +3fbba6dbEVkVMX0JuDFzap9jeaucGA tools/libxi/libxi_bvtsched.c 1.9 +3fbba6dbasJQV-MVElDC0DGSHMiL5w tools/libxi/libxi_domain.c 1.10 +3fbba6dbNCU7U6nsMYiXzKkp3ztaJg tools/libxi/libxi_linux_build.c 1.11 +3fbba6dbl267zZOAVHYLOdLCdhcZMw tools/libxi/libxi_linux_restore.c 1.12 +3fbba6db7li3FJiABYtCmuGxOJxEGw tools/libxi/libxi_linux_save.c 1.13 +3fbba6db7WnnJr0KFrIFrqNlSKvFYg tools/libxi/libxi_misc.c 1.14 +3fbba6dctWRWlFJkYb6hdix2X4WMuw tools/libxi/libxi_private.c 1.15 +3fbba6dcbVrG2hPzEzwdeV_UC8kydQ tools/libxi/libxi_private.h 1.16 +3fbba6dcoGq9hQlksrBUfC2P5F6sGg tools/libxi/libxi_vbd.c 1.17 +3fbba6dc38q-ioRlwSR_quw4G3qUeQ tools/libxi/libxi_vif.c 1.18 +3fbba6dc1uU7U3IFeF6A-XEOYF2MkQ tools/libxi/rpm.spec 1.19 +3fbba6dcrNxtygEcgJYAJJ1gCQqfsA tools/libxi/xi.h 1.20 3f776bd2Xd-dUcPKlPN2vG89VGtfvQ tools/misc/Makefile 1.21 3f6dc136ZKOjd8PIqLbFBl_v-rnkGg tools/misc/miniterm/Makefile 1.22 3f6dc140C8tAeBfroAF24VrmCS4v_w tools/misc/miniterm/README
2.1 --- a/tools/Makefile Wed Nov 19 10:08:11 2003 +0000 2.2 +++ b/tools/Makefile Wed Nov 19 17:22:42 2003 +0000 2.3 @@ -21,17 +21,20 @@ all: 2.4 $(MAKE) -C balloon 2.5 $(MAKE) -C control 2.6 $(MAKE) -C internal 2.7 + $(MAKE) -C libxi 2.8 $(MAKE) -C misc 2.9 2.10 install: all 2.11 $(MAKE) -C balloon install 2.12 $(MAKE) -C control install 2.13 $(MAKE) -C internal install 2.14 + $(MAKE) -C libxi install 2.15 $(MAKE) -C misc install 2.16 2.17 clean: 2.18 $(MAKE) -C balloon clean 2.19 $(MAKE) -C control clean 2.20 $(MAKE) -C internal clean 2.21 + $(MAKE) -C libxi clean 2.22 $(MAKE) -C misc clean 2.23
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/tools/libxi/Makefile Wed Nov 19 17:22:42 2003 +0000 3.3 @@ -0,0 +1,48 @@ 3.4 + 3.5 +CC = gcc 3.6 +CFLAGS = -c -Wall -O3 3.7 +CFLAGS += -I../../xen/include -I../../xenolinux-sparse/include 3.8 + 3.9 +HDRS = $(wildcard *.h) 3.10 +OBJS = $(patsubst %.c,%.o,$(wildcard libxi_*.c)) 3.11 + 3.12 +LIBS = libxi.a libxi.so 3.13 + 3.14 +all: check-for-zlib $(LIBS) 3.15 + ranlib libxi.a 3.16 + 3.17 +check-for-zlib: 3.18 + @if [ ! -e /usr/include/zlib.h ]; then \ 3.19 + echo "***********************************************************"; \ 3.20 + echo "ERROR: install zlib header files (http://www.gzip.org/zlib)"; \ 3.21 + echo "***********************************************************"; \ 3.22 + false; \ 3.23 + fi 3.24 + 3.25 +install: all 3.26 + mkdir -p ../../../install/lib 3.27 + mkdir -p ../../../install/include 3.28 + cp -a $(LIBS) ../../../install/lib 3.29 + for i in $(LIBS) do ; chmod 755 ../../../install/bin/$i ; done 3.30 + cp -a xi.h ../../../install/include 3.31 + chmod 644 ../../../install/include/xi.h 3.32 + 3.33 +clean: 3.34 + $(RM) *.a *.so *.o *.rpm $(LIBS) 3.35 + 3.36 +rpm: all 3.37 + rm -rf staging 3.38 + mkdir staging 3.39 + mkdir staging/i386 3.40 + rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \ 3.41 + --define "_rpmdir$$PWD/staging" -bb rpm.spec 3.42 + mv staging/i386/*.rpm . 3.43 + rm -rf staging 3.44 + 3.45 +libxi.so: $(OBJS) 3.46 + $(LD) -shared -o $@ $^ -lz 3.47 + 3.48 +libxi.a: libxi.a($(OBJS)) 3.49 + 3.50 +%.o: %.c $(HDRS) Makefile 3.51 + $(CC) $(CFLAGS) -o $@ $<
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/tools/libxi/libxi_bvtsched.c Wed Nov 19 17:22:42 2003 +0000 4.3 @@ -0,0 +1,33 @@ 4.4 +/****************************************************************************** 4.5 + * libxi_bvtsched.c 4.6 + * 4.7 + * API for manipulating parameters of the Borrowed Virtual Time scheduler. 4.8 + * 4.9 + * Copyright (c) 2003, K A Fraser. 4.10 + */ 4.11 + 4.12 +#include "libxi_private.h" 4.13 + 4.14 +int xi_bvtsched_global_set(unsigned long ctx_allow) 4.15 +{ 4.16 + dom0_op_t op; 4.17 + op.cmd = DOM0_BVTCTL; 4.18 + op.u.bvtctl.ctx_allow = ctx_allow; 4.19 + return do_dom0_op(&op); 4.20 +} 4.21 + 4.22 +int xi_bvtsched_domain_set(unsigned int domid, 4.23 + unsigned long mcuadv, 4.24 + unsigned long warp, 4.25 + unsigned long warpl, 4.26 + unsigned long warpu) 4.27 +{ 4.28 + dom0_op_t op; 4.29 + op.cmd = DOM0_ADJUSTDOM; 4.30 + op.u.adjustdom.domain = domid; 4.31 + op.u.adjustdom.mcu_adv = mcuadv; 4.32 + op.u.adjustdom.warp = warp; 4.33 + op.u.adjustdom.warpl = warpl; 4.34 + op.u.adjustdom.warpu = warpu; 4.35 + return do_dom0_op(&op); 4.36 +}
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/tools/libxi/libxi_domain.c Wed Nov 19 17:22:42 2003 +0000 5.3 @@ -0,0 +1,80 @@ 5.4 +/****************************************************************************** 5.5 + * libxi_domain.c 5.6 + * 5.7 + * API for manipulating and obtaining information on domains. 5.8 + * 5.9 + * Copyright (c) 2003, K A Fraser. 5.10 + */ 5.11 + 5.12 +#include "libxi_private.h" 5.13 + 5.14 +int xi_domain_create(unsigned int mem_kb, const char *name) 5.15 +{ 5.16 + int err; 5.17 + dom0_op_t op; 5.18 + 5.19 + op.cmd = DOM0_CREATEDOMAIN; 5.20 + op.u.createdomain.memory_kb = mem_kb; 5.21 + strncpy(op.u.createdomain.name, name, MAX_DOMAIN_NAME); 5.22 + op.u.createdomain.name[MAX_DOMAIN_NAME-1] = '\0'; 5.23 + 5.24 + err = do_dom0_op(&op); 5.25 + 5.26 + return (err < 0) ? err : op.u.createdomain.domain; 5.27 +} 5.28 + 5.29 + 5.30 +int xi_domain_start(unsigned int domid) 5.31 +{ 5.32 + dom0_op_t op; 5.33 + op.cmd = DOM0_STARTDOMAIN; 5.34 + op.u.startdomain.domain = domid; 5.35 + return do_dom0_op(&op); 5.36 +} 5.37 + 5.38 + 5.39 +int xi_domain_stop(unsigned int domid) 5.40 +{ 5.41 + dom0_op_t op; 5.42 + op.cmd = DOM0_STOPDOMAIN; 5.43 + op.u.stopdomain.domain = domid; 5.44 + return do_dom0_op(&op); 5.45 +} 5.46 + 5.47 + 5.48 +int xi_domain_destroy(unsigned int domid, int force) 5.49 +{ 5.50 + dom0_op_t op; 5.51 + op.cmd = DOM0_DESTROYDOMAIN; 5.52 + op.u.destroydomain.domain = domid; 5.53 + op.u.destroydomain.force = !!force; 5.54 + return do_dom0_op(&op); 5.55 +} 5.56 + 5.57 +int xi_domain_getinfo(unsigned int first_domid, 5.58 + unsigned int max_doms, 5.59 + xi_dominfo_t *info) 5.60 +{ 5.61 + unsigned int nr_doms, next_domid = first_domid; 5.62 + dom0_op_t op; 5.63 + 5.64 + for ( nr_doms = 0; nr_doms < max_doms; nr_doms++ ) 5.65 + { 5.66 + op.cmd = DOM0_GETDOMAININFO; 5.67 + op.u.getdomaininfo.domain = next_domid; 5.68 + if ( do_dom0_op(&op) < 0 ) 5.69 + break; 5.70 + info->domid = op.u.getdomaininfo.domain; 5.71 + info->cpu = op.u.getdomaininfo.processor; 5.72 + info->has_cpu = op.u.getdomaininfo.has_cpu; 5.73 + info->stopped = (op.u.getdomaininfo.state == DOMSTATE_STOPPED); 5.74 + info->nr_pages = op.u.getdomaininfo.tot_pages; 5.75 + info->cpu_time = op.u.getdomaininfo.cpu_time; 5.76 + strncpy(info->name, op.u.getdomaininfo.name, XI_DOMINFO_MAXNAME); 5.77 + info->name[XI_DOMINFO_MAXNAME-1] = '\0'; 5.78 + 5.79 + next_domid = op.u.getdomaininfo.domain + 1; 5.80 + } 5.81 + 5.82 + return nr_doms; 5.83 +}
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/tools/libxi/libxi_linux_build.c Wed Nov 19 17:22:42 2003 +0000 6.3 @@ -0,0 +1,481 @@ 6.4 +/****************************************************************************** 6.5 + * libxi_linux_build.c 6.6 + */ 6.7 + 6.8 +#include "libxi_private.h" 6.9 +#include <zlib.h> 6.10 + 6.11 +/* This string is written to the head of every guest kernel image. */ 6.12 +#define GUEST_SIG "XenoGues" 6.13 +#define SIG_LEN 8 6.14 + 6.15 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED) 6.16 +#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER) 6.17 + 6.18 +static long get_tot_pages(int domid) 6.19 +{ 6.20 + dom0_op_t op; 6.21 + op.cmd = DOM0_GETDOMAININFO; 6.22 + op.u.getdomaininfo.domain = domid; 6.23 + return (do_dom0_op(&op) < 0) ? -1 : op.u.getdomaininfo.tot_pages; 6.24 +} 6.25 + 6.26 +static int get_pfn_list( 6.27 + int domid, unsigned long *pfn_buf, unsigned long max_pfns) 6.28 +{ 6.29 + dom0_op_t op; 6.30 + int ret; 6.31 + op.cmd = DOM0_GETMEMLIST; 6.32 + op.u.getmemlist.domain = domid; 6.33 + op.u.getmemlist.max_pfns = max_pfns; 6.34 + op.u.getmemlist.buffer = pfn_buf; 6.35 + 6.36 + if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 ) 6.37 + return -1; 6.38 + 6.39 + ret = do_dom0_op(&op); 6.40 + 6.41 + (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long)); 6.42 + 6.43 + return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; 6.44 +} 6.45 + 6.46 +static int send_pgupdates(mmu_update_t *updates, int nr_updates) 6.47 +{ 6.48 + int ret = -1; 6.49 + privcmd_hypercall_t hypercall; 6.50 + 6.51 + hypercall.op = __HYPERVISOR_mmu_update; 6.52 + hypercall.arg[0] = (unsigned long)updates; 6.53 + hypercall.arg[1] = (unsigned long)nr_updates; 6.54 + 6.55 + if ( mlock(updates, nr_updates * sizeof(*updates)) != 0 ) 6.56 + goto out1; 6.57 + 6.58 + if ( do_xen_hypercall(&hypercall) < 0 ) 6.59 + goto out2; 6.60 + 6.61 + ret = 0; 6.62 + 6.63 + out2: (void)munlock(updates, nr_updates * sizeof(*updates)); 6.64 + out1: return ret; 6.65 +} 6.66 + 6.67 +/* Read the kernel header, extracting the image size and load address. */ 6.68 +static int read_kernel_header(gzFile gfd, long dom_size, 6.69 + unsigned long *load_addr, int verbose) 6.70 +{ 6.71 + char signature[SIG_LEN]; 6.72 + 6.73 + gzread(gfd, signature, SIG_LEN); 6.74 + if ( strncmp(signature, GUEST_SIG, SIG_LEN) ) 6.75 + { 6.76 + if ( verbose ) 6.77 + ERROR("Kernel image does not contain required signature"); 6.78 + return -1; 6.79 + } 6.80 + 6.81 + /* Read the load address which immediately follows the Xeno signature. */ 6.82 + gzread(gfd, load_addr, sizeof(unsigned long)); 6.83 + 6.84 + return 0; 6.85 +} 6.86 + 6.87 +static int copy_to_domain_page(unsigned long dst_pfn, void *src_page) 6.88 +{ 6.89 + void *vaddr = map_pfn(dst_pfn); 6.90 + if ( vaddr == NULL ) 6.91 + return -1; 6.92 + memcpy(vaddr, src_page, PAGE_SIZE); 6.93 + unmap_pfn(vaddr); 6.94 + return 0; 6.95 +} 6.96 + 6.97 +static int setup_guestos( 6.98 + int dom, gzFile kernel_gfd, int initrd_fd, unsigned long tot_pages, 6.99 + unsigned long *virt_startinfo_addr, unsigned long virt_load_addr, 6.100 + dom0_builddomain_t *builddomain, const char *cmdline, 6.101 + unsigned long shared_info_frame, int verbose) 6.102 +{ 6.103 + l1_pgentry_t *vl1tab = NULL, *vl1e = NULL; 6.104 + l2_pgentry_t *vl2tab = NULL, *vl2e = NULL; 6.105 + unsigned long *page_array = NULL; 6.106 + mmu_update_t *pgt_update_arr = NULL, *pgt_updates = NULL; 6.107 + int alloc_index, num_pt_pages; 6.108 + unsigned long l2tab; 6.109 + unsigned long l1tab = 0; 6.110 + unsigned long num_pgt_updates = 0; 6.111 + unsigned long count, pt_start, i, j; 6.112 + unsigned long initrd_addr = 0, initrd_len = 0; 6.113 + start_info_t *start_info; 6.114 + shared_info_t *shared_info; 6.115 + unsigned long ksize; 6.116 + 6.117 + memset(builddomain, 0, sizeof(*builddomain)); 6.118 + 6.119 + if ( init_pfn_mapper() < 0 ) 6.120 + goto error_out; 6.121 + 6.122 + pgt_updates = malloc((tot_pages + 1024) * 3 * sizeof(mmu_update_t)); 6.123 + page_array = malloc(tot_pages * sizeof(unsigned long)); 6.124 + pgt_update_arr = pgt_updates; 6.125 + if ( (pgt_update_arr == NULL) || (page_array == NULL) ) 6.126 + { 6.127 + if ( verbose ) 6.128 + PERROR("Could not allocate memory"); 6.129 + goto error_out; 6.130 + } 6.131 + 6.132 + if ( get_pfn_list(dom, page_array, tot_pages) != tot_pages ) 6.133 + { 6.134 + if ( verbose ) 6.135 + PERROR("Could not get the page frame list"); 6.136 + goto error_out; 6.137 + } 6.138 + 6.139 + /* Load the guest OS image. Let it take no more than 1/2 memory.*/ 6.140 + for ( i = 0; i < ((tot_pages/2)*PAGE_SIZE); i += PAGE_SIZE ) 6.141 + { 6.142 + char page[PAGE_SIZE]; 6.143 + int size; 6.144 + if ( (size = gzread(kernel_gfd, page, PAGE_SIZE)) == -1 ) 6.145 + { 6.146 + if ( verbose ) 6.147 + PERROR("Error reading kernel image, could not" 6.148 + " read the whole image."); 6.149 + goto error_out; 6.150 + } 6.151 + if ( size == 0 ) 6.152 + goto kernel_copied; 6.153 + copy_to_domain_page(page_array[i>>PAGE_SHIFT], page); 6.154 + } 6.155 + if ( verbose ) 6.156 + ERROR("Kernel too big to safely fit in domain memory"); 6.157 + goto error_out; 6.158 + 6.159 + kernel_copied: 6.160 + /* ksize is kernel-image size rounded up to a page boundary. */ 6.161 + ksize = i; 6.162 + 6.163 + /* Load the initial ramdisk image. */ 6.164 + if ( initrd_fd >= 0 ) 6.165 + { 6.166 + struct stat stat; 6.167 + unsigned long isize; 6.168 + 6.169 + if ( fstat(initrd_fd, &stat) < 0 ) 6.170 + { 6.171 + if ( verbose ) 6.172 + PERROR("Could not stat the initrd image"); 6.173 + goto error_out; 6.174 + } 6.175 + isize = stat.st_size; 6.176 + if ( (isize + ksize) > ((tot_pages/2) * PAGE_SIZE) ) 6.177 + { 6.178 + if ( verbose ) 6.179 + ERROR("Kernel/initrd too big to safely fit in domain memory"); 6.180 + goto error_out; 6.181 + } 6.182 + 6.183 + initrd_addr = virt_load_addr + ksize; 6.184 + initrd_len = isize; 6.185 + 6.186 + for ( j = 0, i = ksize; j < isize; j += PAGE_SIZE, i += PAGE_SIZE ) 6.187 + { 6.188 + char page[PAGE_SIZE]; 6.189 + int size = ((isize-j) < PAGE_SIZE) ? (isize-j) : PAGE_SIZE; 6.190 + if ( read(initrd_fd, page, size) != size ) 6.191 + { 6.192 + if ( verbose ) 6.193 + PERROR("Error reading initrd image, could not" 6.194 + " read the whole image."); 6.195 + goto error_out; 6.196 + } 6.197 + copy_to_domain_page(page_array[i>>PAGE_SHIFT], page); 6.198 + } 6.199 + } 6.200 + 6.201 + alloc_index = tot_pages - 1; 6.202 + 6.203 + /* Count bottom-level PTs, rounding up. */ 6.204 + num_pt_pages = (l1_table_offset(virt_load_addr) + tot_pages + 1023) / 1024; 6.205 + 6.206 + /* We must also count the page directory. */ 6.207 + num_pt_pages++; 6.208 + 6.209 + /* Index of first PT page. */ 6.210 + pt_start = tot_pages - num_pt_pages; 6.211 + 6.212 + /* 6.213 + * First allocate page for page dir. Allocation goes backwards from the end 6.214 + * of the allocated physical address space. 6.215 + */ 6.216 + l2tab = page_array[alloc_index] << PAGE_SHIFT; 6.217 + alloc_index--; 6.218 + builddomain->ctxt.pt_base = l2tab; 6.219 + 6.220 + /* 6.221 + * Pin down l2tab addr as page dir page - causes hypervisor to provide 6.222 + * correct protection for the page 6.223 + */ 6.224 + pgt_updates->ptr = l2tab | MMU_EXTENDED_COMMAND; 6.225 + pgt_updates->val = MMUEXT_PIN_L2_TABLE; 6.226 + pgt_updates++; 6.227 + num_pgt_updates++; 6.228 + 6.229 + /* Initialise the page tables. */ 6.230 + if ( (vl2tab = map_pfn(l2tab >> PAGE_SHIFT)) == NULL ) 6.231 + goto error_out; 6.232 + memset(vl2tab, 0, PAGE_SIZE); 6.233 + vl2e = vl2tab + l2_table_offset(virt_load_addr); 6.234 + for ( count = 0; count < tot_pages; count++ ) 6.235 + { 6.236 + if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 ) 6.237 + { 6.238 + l1tab = page_array[alloc_index] << PAGE_SHIFT; 6.239 + if ( (vl1tab = map_pfn(l1tab >> PAGE_SHIFT)) == NULL ) 6.240 + goto error_out; 6.241 + memset(vl1tab, 0, PAGE_SIZE); 6.242 + alloc_index--; 6.243 + 6.244 + vl1e = vl1tab + l1_table_offset(virt_load_addr + 6.245 + (count << PAGE_SHIFT)); 6.246 + 6.247 + /* make apropriate entry in the page directory */ 6.248 + pgt_updates->ptr = (unsigned long)vl2e; 6.249 + pgt_updates->val = l1tab | L2_PROT; 6.250 + pgt_updates++; 6.251 + num_pgt_updates++; 6.252 + vl2e++; 6.253 + } 6.254 + 6.255 + if ( count < pt_start ) 6.256 + { 6.257 + pgt_updates->ptr = (unsigned long)vl1e; 6.258 + pgt_updates->val = (page_array[count] << PAGE_SHIFT) | L1_PROT; 6.259 + pgt_updates++; 6.260 + num_pgt_updates++; 6.261 + vl1e++; 6.262 + } 6.263 + else 6.264 + { 6.265 + pgt_updates->ptr = (unsigned long)vl1e; 6.266 + pgt_updates->val = 6.267 + ((page_array[count] << PAGE_SHIFT) | L1_PROT) & ~_PAGE_RW; 6.268 + pgt_updates++; 6.269 + num_pgt_updates++; 6.270 + vl1e++; 6.271 + } 6.272 + 6.273 + pgt_updates->ptr = 6.274 + (page_array[count] << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; 6.275 + pgt_updates->val = count; 6.276 + pgt_updates++; 6.277 + num_pgt_updates++; 6.278 + } 6.279 + 6.280 + *virt_startinfo_addr = 6.281 + virt_load_addr + ((alloc_index-1) << PAGE_SHIFT); 6.282 + 6.283 + start_info = map_pfn(page_array[alloc_index-1]); 6.284 + memset(start_info, 0, sizeof(*start_info)); 6.285 + start_info->pt_base = virt_load_addr + ((tot_pages-1) << PAGE_SHIFT); 6.286 + start_info->mod_start = initrd_addr; 6.287 + start_info->mod_len = initrd_len; 6.288 + start_info->nr_pages = tot_pages; 6.289 + start_info->shared_info = shared_info_frame << PAGE_SHIFT; 6.290 + start_info->dom_id = dom; 6.291 + start_info->flags = 0; 6.292 + strncpy(start_info->cmd_line, cmdline, MAX_CMD_LEN); 6.293 + start_info->cmd_line[MAX_CMD_LEN-1] = '\0'; 6.294 + 6.295 + unmap_pfn(start_info); 6.296 + 6.297 + /* shared_info page starts its life empty. */ 6.298 + shared_info = map_pfn(shared_info_frame); 6.299 + memset(shared_info, 0, PAGE_SIZE); 6.300 + unmap_pfn(shared_info); 6.301 + 6.302 + /* Send the page update requests down to the hypervisor. */ 6.303 + if ( send_pgupdates(pgt_update_arr, num_pgt_updates) < 0 ) 6.304 + goto error_out; 6.305 + 6.306 + free(page_array); 6.307 + free(pgt_update_arr); 6.308 + return 0; 6.309 + 6.310 + error_out: 6.311 + if ( page_array == NULL ) 6.312 + free(page_array); 6.313 + if ( pgt_update_arr == NULL ) 6.314 + free(pgt_update_arr); 6.315 + return -1; 6.316 +} 6.317 + 6.318 +int xi_domain_build(unsigned int domid, 6.319 + const char *image_name, 6.320 + const char *ramdisk_name, 6.321 + const char *cmdline, 6.322 + int verbose) 6.323 +{ 6.324 + dom0_op_t launch_op, op; 6.325 + unsigned long load_addr; 6.326 + long tot_pages; 6.327 + int kernel_fd, initrd_fd = -1; 6.328 + gzFile kernel_gfd; 6.329 + int rc, i; 6.330 + full_execution_context_t *ctxt; 6.331 + unsigned long virt_startinfo_addr; 6.332 + 6.333 + if ( (tot_pages = get_tot_pages(domid)) < 0 ) 6.334 + { 6.335 + if ( verbose ) 6.336 + PERROR("Could not find total pages for domain"); 6.337 + return 1; 6.338 + } 6.339 + 6.340 + kernel_fd = open(image_name, O_RDONLY); 6.341 + if ( kernel_fd < 0 ) 6.342 + { 6.343 + if ( verbose ) 6.344 + PERROR("Could not open kernel image"); 6.345 + return 1; 6.346 + } 6.347 + 6.348 + if ( (kernel_gfd = gzdopen(kernel_fd, "rb")) == NULL ) 6.349 + { 6.350 + if ( verbose ) 6.351 + PERROR("Could not allocate decompression state for state file"); 6.352 + close(kernel_fd); 6.353 + return 1; 6.354 + } 6.355 + 6.356 + rc = read_kernel_header(kernel_gfd, 6.357 + tot_pages << (PAGE_SHIFT - 10), 6.358 + &load_addr, verbose); 6.359 + if ( rc < 0 ) 6.360 + goto error_out; 6.361 + 6.362 + if ( (load_addr & (PAGE_SIZE-1)) != 0 ) 6.363 + { 6.364 + if ( verbose ) 6.365 + ERROR("We can only deal with page-aligned load addresses"); 6.366 + goto error_out; 6.367 + } 6.368 + 6.369 + if ( (load_addr + (tot_pages << PAGE_SHIFT)) > HYPERVISOR_VIRT_START ) 6.370 + { 6.371 + if ( verbose ) 6.372 + ERROR("Cannot map all domain memory without hitting Xen space"); 6.373 + goto error_out; 6.374 + } 6.375 + 6.376 + if ( ramdisk_name != NULL ) 6.377 + { 6.378 + initrd_fd = open(ramdisk_name, O_RDONLY); 6.379 + if ( initrd_fd < 0 ) 6.380 + { 6.381 + if ( verbose ) 6.382 + PERROR("Could not open the initial ramdisk image"); 6.383 + goto error_out; 6.384 + } 6.385 + } 6.386 + 6.387 + op.cmd = DOM0_GETDOMAININFO; 6.388 + op.u.getdomaininfo.domain = domid; 6.389 + if ( (do_dom0_op(&op) < 0) || (op.u.getdomaininfo.domain != domid) ) 6.390 + { 6.391 + if ( verbose ) 6.392 + PERROR("Could not get info on domain"); 6.393 + goto error_out; 6.394 + } 6.395 + if ( (op.u.getdomaininfo.state != DOMSTATE_STOPPED) || 6.396 + (op.u.getdomaininfo.ctxt.pt_base != 0) ) 6.397 + { 6.398 + if ( verbose ) 6.399 + ERROR("Domain is already constructed"); 6.400 + goto error_out; 6.401 + } 6.402 + 6.403 + if ( setup_guestos(domid, kernel_gfd, initrd_fd, tot_pages, 6.404 + &virt_startinfo_addr, 6.405 + load_addr, &launch_op.u.builddomain, cmdline, 6.406 + op.u.getdomaininfo.shared_info_frame, verbose) < 0 ) 6.407 + { 6.408 + if ( verbose ) 6.409 + ERROR("Error constructing guest OS"); 6.410 + goto error_out; 6.411 + } 6.412 + 6.413 + if ( initrd_fd >= 0 ) 6.414 + close(initrd_fd); 6.415 + gzclose(kernel_gfd); 6.416 + 6.417 + ctxt = &launch_op.u.builddomain.ctxt; 6.418 + 6.419 + ctxt->flags = 0; 6.420 + 6.421 + /* 6.422 + * Initial register values: 6.423 + * DS,ES,FS,GS = FLAT_RING1_DS 6.424 + * CS:EIP = FLAT_RING1_CS:start_pc 6.425 + * SS:ESP = FLAT_RING1_DS:start_stack 6.426 + * ESI = start_info 6.427 + * [EAX,EBX,ECX,EDX,EDI,EBP are zero] 6.428 + * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1) 6.429 + */ 6.430 + ctxt->i386_ctxt.ds = FLAT_RING1_DS; 6.431 + ctxt->i386_ctxt.es = FLAT_RING1_DS; 6.432 + ctxt->i386_ctxt.fs = FLAT_RING1_DS; 6.433 + ctxt->i386_ctxt.gs = FLAT_RING1_DS; 6.434 + ctxt->i386_ctxt.ss = FLAT_RING1_DS; 6.435 + ctxt->i386_ctxt.cs = FLAT_RING1_CS; 6.436 + ctxt->i386_ctxt.eip = load_addr; 6.437 + ctxt->i386_ctxt.esp = virt_startinfo_addr; 6.438 + ctxt->i386_ctxt.esi = virt_startinfo_addr; 6.439 + ctxt->i386_ctxt.eflags = (1<<9) | (1<<2); 6.440 + 6.441 + /* FPU is set up to default initial state. */ 6.442 + memset(ctxt->i387_ctxt, 0, sizeof(ctxt->i387_ctxt)); 6.443 + 6.444 + /* Virtual IDT is empty at start-of-day. */ 6.445 + for ( i = 0; i < 256; i++ ) 6.446 + { 6.447 + ctxt->trap_ctxt[i].vector = i; 6.448 + ctxt->trap_ctxt[i].cs = FLAT_RING1_CS; 6.449 + } 6.450 + ctxt->fast_trap_idx = 0; 6.451 + 6.452 + /* No LDT. */ 6.453 + ctxt->ldt_ents = 0; 6.454 + 6.455 + /* Use the default Xen-provided GDT. */ 6.456 + ctxt->gdt_ents = 0; 6.457 + 6.458 + /* Ring 1 stack is the initial stack. */ 6.459 + ctxt->ring1_ss = FLAT_RING1_DS; 6.460 + ctxt->ring1_esp = virt_startinfo_addr; 6.461 + 6.462 + /* No debugging. */ 6.463 + memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg)); 6.464 + 6.465 + /* No callback handlers. */ 6.466 + ctxt->event_callback_cs = FLAT_RING1_CS; 6.467 + ctxt->event_callback_eip = 0; 6.468 + ctxt->failsafe_callback_cs = FLAT_RING1_CS; 6.469 + ctxt->failsafe_callback_eip = 0; 6.470 + 6.471 + launch_op.u.builddomain.domain = domid; 6.472 + launch_op.u.builddomain.num_vifs = 1; 6.473 + 6.474 + launch_op.cmd = DOM0_BUILDDOMAIN; 6.475 + rc = do_dom0_op(&launch_op); 6.476 + 6.477 + return rc; 6.478 + 6.479 + error_out: 6.480 + if ( initrd_fd >= 0 ) 6.481 + close(initrd_fd); 6.482 + gzclose(kernel_gfd); 6.483 + return -1; 6.484 +}
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/tools/libxi/libxi_linux_restore.c Wed Nov 19 17:22:42 2003 +0000 7.3 @@ -0,0 +1,476 @@ 7.4 +/****************************************************************************** 7.5 + * libxi_linux_restore.c 7.6 + * 7.7 + * Restore the state of a Xenolinux session. 7.8 + * 7.9 + * Copyright (c) 2003, K A Fraser. 7.10 + */ 7.11 + 7.12 +#include "libxi_private.h" 7.13 +#include <asm-xeno/suspend.h> 7.14 +#include <zlib.h> 7.15 + 7.16 +/* This may allow us to create a 'quiet' command-line option, if necessary. */ 7.17 +#define verbose_printf(_f, _a...) \ 7.18 + do { \ 7.19 + if ( !verbose ) break; \ 7.20 + printf( _f , ## _a ); \ 7.21 + fflush(stdout); \ 7.22 + } while ( 0 ) 7.23 + 7.24 +static int get_pfn_list( 7.25 + int domain_id, unsigned long *pfn_buf, unsigned long max_pfns) 7.26 +{ 7.27 + dom0_op_t op; 7.28 + int ret; 7.29 + op.cmd = DOM0_GETMEMLIST; 7.30 + op.u.getmemlist.domain = domain_id; 7.31 + op.u.getmemlist.max_pfns = max_pfns; 7.32 + op.u.getmemlist.buffer = pfn_buf; 7.33 + 7.34 + if ( mlock(pfn_buf, max_pfns * sizeof(unsigned long)) != 0 ) 7.35 + { 7.36 + PERROR("Could not lock pfn list buffer"); 7.37 + return -1; 7.38 + } 7.39 + 7.40 + ret = do_dom0_op(&op); 7.41 + 7.42 + (void)munlock(pfn_buf, max_pfns * sizeof(unsigned long)); 7.43 + 7.44 + return (ret < 0) ? -1 : op.u.getmemlist.num_pfns; 7.45 +} 7.46 + 7.47 +#define MAX_MMU_UPDATES 1024 7.48 + 7.49 +static int flush_mmu_updates(mmu_update_t *mmu_updates, 7.50 + int *mmu_update_idx) 7.51 +{ 7.52 + int err = 0; 7.53 + privcmd_hypercall_t hypercall; 7.54 + 7.55 + if ( *mmu_update_idx == 0 ) 7.56 + return 0; 7.57 + 7.58 + hypercall.op = __HYPERVISOR_mmu_update; 7.59 + hypercall.arg[0] = (unsigned long)mmu_updates; 7.60 + hypercall.arg[1] = (unsigned long)*mmu_update_idx; 7.61 + 7.62 + if ( mlock(mmu_updates, sizeof(mmu_updates)) != 0 ) 7.63 + { 7.64 + PERROR("Could not lock pagetable update array"); 7.65 + err = 1; 7.66 + goto out; 7.67 + } 7.68 + 7.69 + if ( do_xen_hypercall(&hypercall) < 0 ) 7.70 + { 7.71 + ERROR("Failure when submitting mmu updates"); 7.72 + err = 1; 7.73 + } 7.74 + 7.75 + *mmu_update_idx = 0; 7.76 + 7.77 + (void)munlock(mmu_updates, sizeof(mmu_updates)); 7.78 + 7.79 + out: 7.80 + return err; 7.81 +} 7.82 + 7.83 +static int add_mmu_update(mmu_update_t *mmu_updates, 7.84 + int *mmu_update_idx, 7.85 + unsigned long ptr, 7.86 + unsigned long val) 7.87 +{ 7.88 + mmu_updates[*mmu_update_idx].ptr = ptr; 7.89 + mmu_updates[*mmu_update_idx].val = val; 7.90 + if ( ++*mmu_update_idx == MAX_MMU_UPDATES ) 7.91 + return flush_mmu_updates(mmu_updates, mmu_update_idx); 7.92 + return 0; 7.93 +} 7.94 + 7.95 +static int checked_read(gzFile fd, void *buf, size_t count) 7.96 +{ 7.97 + int rc; 7.98 + while ( ((rc = gzread(fd, buf, count)) == -1) && (errno == EINTR) ) 7.99 + continue; 7.100 + return rc == count; 7.101 +} 7.102 + 7.103 +int xi_linux_restore(const char *state_file, int verbose) 7.104 +{ 7.105 + dom0_op_t op; 7.106 + int rc = 1, i, j; 7.107 + unsigned long mfn, pfn, dom = 0; 7.108 + unsigned int prev_pc, this_pc; 7.109 + 7.110 + /* Number of page frames in use by this XenoLinux session. */ 7.111 + unsigned long nr_pfns; 7.112 + 7.113 + /* The new domain's shared-info frame number. */ 7.114 + unsigned long shared_info_frame; 7.115 + unsigned char shared_info[PAGE_SIZE]; /* saved contents from file */ 7.116 + 7.117 + /* A copy of the CPU context of the guest. */ 7.118 + full_execution_context_t ctxt; 7.119 + 7.120 + /* First 16 bytes of the state file must contain 'XenoLinuxSuspend'. */ 7.121 + char signature[16]; 7.122 + 7.123 + /* A copy of the domain's name. */ 7.124 + char name[MAX_DOMAIN_NAME]; 7.125 + 7.126 + /* A table containg the type of each PFN (/not/ MFN!). */ 7.127 + unsigned long *pfn_type = NULL; 7.128 + 7.129 + /* A temporary mapping, and a copy, of one frame of guest memory. */ 7.130 + unsigned long *ppage, page[1024]; 7.131 + 7.132 + /* A copy of the pfn-to-mfn table frame list. */ 7.133 + unsigned long pfn_to_mfn_frame_list[1024]; 7.134 + 7.135 + /* A table mapping each PFN to its new MFN. */ 7.136 + unsigned long *pfn_to_mfn_table = NULL; 7.137 + 7.138 + /* A temporary mapping of the guest's suspend record. */ 7.139 + suspend_record_t *p_srec; 7.140 + 7.141 + /* The name and descriptor of the file that we are reading from. */ 7.142 + int fd; 7.143 + gzFile gfd; 7.144 + 7.145 + mmu_update_t mmu_updates[MAX_MMU_UPDATES]; 7.146 + int mmu_update_idx = 0; 7.147 + 7.148 + if ( (fd = open(state_file, O_RDONLY)) == -1 ) 7.149 + { 7.150 + PERROR("Could not open state file for reading"); 7.151 + return 1; 7.152 + } 7.153 + 7.154 + if ( (gfd = gzdopen(fd, "rb")) == NULL ) 7.155 + { 7.156 + ERROR("Could not allocate decompression state for state file"); 7.157 + close(fd); 7.158 + return 1; 7.159 + } 7.160 + 7.161 + /* Start writing out the saved-domain record. */ 7.162 + if ( !checked_read(gfd, signature, 16) || 7.163 + (memcmp(signature, "XenoLinuxSuspend", 16) != 0) ) 7.164 + { 7.165 + ERROR("Unrecognised state format -- no signature found"); 7.166 + goto out; 7.167 + } 7.168 + 7.169 + if ( !checked_read(gfd, name, sizeof(name)) || 7.170 + !checked_read(gfd, &nr_pfns, sizeof(unsigned long)) || 7.171 + !checked_read(gfd, &ctxt, sizeof(ctxt)) || 7.172 + !checked_read(gfd, shared_info, PAGE_SIZE) || 7.173 + !checked_read(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) ) 7.174 + { 7.175 + ERROR("Error when reading from state file"); 7.176 + goto out; 7.177 + } 7.178 + 7.179 + for ( i = 0; i < MAX_DOMAIN_NAME; i++ ) 7.180 + { 7.181 + if ( name[i] == '\0' ) break; 7.182 + if ( name[i] & 0x80 ) 7.183 + { 7.184 + ERROR("Random characters in domain name"); 7.185 + goto out; 7.186 + } 7.187 + } 7.188 + name[MAX_DOMAIN_NAME-1] = '\0'; 7.189 + 7.190 + if ( nr_pfns > 1024*1024 ) 7.191 + { 7.192 + ERROR("Invalid state file -- pfn count out of range"); 7.193 + goto out; 7.194 + } 7.195 + 7.196 + /* We want zeroed memory so use calloc rather than malloc. */ 7.197 + pfn_to_mfn_table = calloc(1, 4 * nr_pfns); 7.198 + pfn_type = calloc(1, 4 * nr_pfns); 7.199 + 7.200 + if ( (pfn_to_mfn_table == NULL) || (pfn_type == NULL) ) 7.201 + { 7.202 + errno = ENOMEM; 7.203 + goto out; 7.204 + } 7.205 + 7.206 + if ( !checked_read(gfd, pfn_type, 4 * nr_pfns) ) 7.207 + { 7.208 + ERROR("Error when reading from state file"); 7.209 + goto out; 7.210 + } 7.211 + 7.212 + /* Create a new domain of the appropriate size, and find it's dom_id. */ 7.213 + op.cmd = DOM0_CREATEDOMAIN; 7.214 + op.u.createdomain.memory_kb = nr_pfns * (PAGE_SIZE / 1024); 7.215 + memcpy(op.u.createdomain.name, name, MAX_DOMAIN_NAME); 7.216 + if ( do_dom0_op(&op) < 0 ) 7.217 + { 7.218 + ERROR("Could not create new domain"); 7.219 + goto out; 7.220 + } 7.221 + dom = op.u.createdomain.domain; 7.222 + 7.223 + /* Get the domain's shared-info frame. */ 7.224 + op.cmd = DOM0_GETDOMAININFO; 7.225 + op.u.getdomaininfo.domain = dom; 7.226 + if ( do_dom0_op(&op) < 0 ) 7.227 + { 7.228 + ERROR("Could not get information on new domain"); 7.229 + goto out; 7.230 + } 7.231 + shared_info_frame = op.u.getdomaininfo.shared_info_frame; 7.232 + 7.233 + if ( init_pfn_mapper() < 0 ) 7.234 + goto out; 7.235 + 7.236 + /* Copy saved contents of shared-info page. No checking needed. */ 7.237 + ppage = map_pfn(shared_info_frame); 7.238 + memcpy(ppage, shared_info, PAGE_SIZE); 7.239 + unmap_pfn(ppage); 7.240 + 7.241 + /* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */ 7.242 + if ( get_pfn_list(dom, pfn_to_mfn_table, nr_pfns) != nr_pfns ) 7.243 + { 7.244 + ERROR("Did not read correct number of frame numbers for new dom"); 7.245 + goto out; 7.246 + } 7.247 + 7.248 + verbose_printf("Reloading memory pages: 0%%"); 7.249 + 7.250 + /* 7.251 + * Now simply read each saved frame into its new machine frame. 7.252 + * We uncanonicalise page tables as we go. 7.253 + */ 7.254 + prev_pc = 0; 7.255 + for ( i = 0; i < nr_pfns; i++ ) 7.256 + { 7.257 + this_pc = (i * 100) / nr_pfns; 7.258 + if ( (this_pc - prev_pc) >= 5 ) 7.259 + { 7.260 + verbose_printf("\b\b\b\b%3d%%", this_pc); 7.261 + prev_pc = this_pc; 7.262 + } 7.263 + 7.264 + mfn = pfn_to_mfn_table[i]; 7.265 + 7.266 + if ( !checked_read(gfd, page, PAGE_SIZE) ) 7.267 + { 7.268 + ERROR("Error when reading from state file"); 7.269 + goto out; 7.270 + } 7.271 + 7.272 + ppage = map_pfn(mfn); 7.273 + switch ( pfn_type[i] ) 7.274 + { 7.275 + case L1TAB: 7.276 + memset(ppage, 0, PAGE_SIZE); 7.277 + if ( add_mmu_update(mmu_updates, &mmu_update_idx, 7.278 + (mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND, 7.279 + MMUEXT_PIN_L1_TABLE) ) 7.280 + goto out; 7.281 + for ( j = 0; j < 1024; j++ ) 7.282 + { 7.283 + if ( page[j] & _PAGE_PRESENT ) 7.284 + { 7.285 + if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns ) 7.286 + { 7.287 + ERROR("Frame number in page table is out of range"); 7.288 + goto out; 7.289 + } 7.290 + if ( (pfn_type[pfn] != NONE) && (page[j] & _PAGE_RW) ) 7.291 + { 7.292 + ERROR("Write access requested for a restricted frame"); 7.293 + goto out; 7.294 + } 7.295 + page[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT); 7.296 + page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT; 7.297 + } 7.298 + if ( add_mmu_update(mmu_updates, &mmu_update_idx, 7.299 + (unsigned long)&ppage[j], page[j]) ) 7.300 + goto out; 7.301 + } 7.302 + break; 7.303 + case L2TAB: 7.304 + memset(ppage, 0, PAGE_SIZE); 7.305 + if ( add_mmu_update(mmu_updates, &mmu_update_idx, 7.306 + (mfn<<PAGE_SHIFT) | MMU_EXTENDED_COMMAND, 7.307 + MMUEXT_PIN_L2_TABLE) ) 7.308 + goto out; 7.309 + for ( j = 0; j < (HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT); j++ ) 7.310 + { 7.311 + if ( page[j] & _PAGE_PRESENT ) 7.312 + { 7.313 + if ( (pfn = page[j] >> PAGE_SHIFT) >= nr_pfns ) 7.314 + { 7.315 + ERROR("Frame number in page table is out of range"); 7.316 + goto out; 7.317 + } 7.318 + if ( pfn_type[pfn] != L1TAB ) 7.319 + { 7.320 + ERROR("Page table mistyping"); 7.321 + goto out; 7.322 + } 7.323 + /* Haven't reached the L1 table yet. Ensure it is safe! */ 7.324 + if ( pfn > i ) 7.325 + { 7.326 + unsigned long **l1 = map_pfn(pfn_to_mfn_table[pfn]); 7.327 + memset(l1, 0, PAGE_SIZE); 7.328 + unmap_pfn(l1); 7.329 + } 7.330 + page[j] &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE); 7.331 + page[j] |= pfn_to_mfn_table[pfn] << PAGE_SHIFT; 7.332 + } 7.333 + if ( add_mmu_update(mmu_updates, &mmu_update_idx, 7.334 + (unsigned long)&ppage[j], page[j]) ) 7.335 + goto out; 7.336 + } 7.337 + break; 7.338 + default: 7.339 + memcpy(ppage, page, PAGE_SIZE); 7.340 + break; 7.341 + } 7.342 + /* NB. Must flush before unmapping page, as pass VAs to Xen. */ 7.343 + if ( flush_mmu_updates(mmu_updates, &mmu_update_idx) ) 7.344 + goto out; 7.345 + unmap_pfn(ppage); 7.346 + 7.347 + if ( add_mmu_update(mmu_updates, &mmu_update_idx, 7.348 + (mfn<<PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i) ) 7.349 + goto out; 7.350 + } 7.351 + 7.352 + if ( flush_mmu_updates(mmu_updates, &mmu_update_idx) ) 7.353 + goto out; 7.354 + 7.355 + verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n"); 7.356 + 7.357 + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ 7.358 + pfn = ctxt.i386_ctxt.esi; 7.359 + if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) ) 7.360 + { 7.361 + ERROR("Suspend record frame number is bad"); 7.362 + goto out; 7.363 + } 7.364 + ctxt.i386_ctxt.esi = mfn = pfn_to_mfn_table[pfn]; 7.365 + p_srec = map_pfn(mfn); 7.366 + p_srec->resume_info.nr_pages = nr_pfns; 7.367 + p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT; 7.368 + p_srec->resume_info.dom_id = dom; 7.369 + p_srec->resume_info.flags = 0; 7.370 + unmap_pfn(p_srec); 7.371 + 7.372 + /* Uncanonicalise each GDT frame number. */ 7.373 + if ( ctxt.gdt_ents > 8192 ) 7.374 + { 7.375 + ERROR("GDT entry count out of range"); 7.376 + goto out; 7.377 + } 7.378 + for ( i = 0; i < ctxt.gdt_ents; i += 512 ) 7.379 + { 7.380 + pfn = ctxt.gdt_frames[i]; 7.381 + if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) ) 7.382 + { 7.383 + ERROR("GDT frame number is bad"); 7.384 + goto out; 7.385 + } 7.386 + ctxt.gdt_frames[i] = pfn_to_mfn_table[pfn]; 7.387 + } 7.388 + 7.389 + /* Uncanonicalise the page table base pointer. */ 7.390 + pfn = ctxt.pt_base >> PAGE_SHIFT; 7.391 + if ( (pfn >= nr_pfns) || (pfn_type[pfn] != L2TAB) ) 7.392 + { 7.393 + ERROR("PT base is bad"); 7.394 + goto out; 7.395 + } 7.396 + ctxt.pt_base = pfn_to_mfn_table[pfn] << PAGE_SHIFT; 7.397 + 7.398 + /* Uncanonicalise the pfn-to-mfn table frame-number list. */ 7.399 + for ( i = 0; i < nr_pfns; i += 1024 ) 7.400 + { 7.401 + unsigned long copy_size = (nr_pfns - i) * sizeof(unsigned long); 7.402 + if ( copy_size > PAGE_SIZE ) copy_size = PAGE_SIZE; 7.403 + pfn = pfn_to_mfn_frame_list[i/1024]; 7.404 + if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) ) 7.405 + { 7.406 + ERROR("PFN-to-MFN frame number is bad"); 7.407 + goto out; 7.408 + } 7.409 + ppage = map_pfn(pfn_to_mfn_table[pfn]); 7.410 + memcpy(ppage, &pfn_to_mfn_table[i], copy_size); 7.411 + unmap_pfn(ppage); 7.412 + } 7.413 + 7.414 + /* 7.415 + * Safety checking of saved context: 7.416 + * 1. i386_ctxt is fine, as Xen checks that on context switch. 7.417 + * 2. i387_ctxt is fine, as it can't hurt Xen. 7.418 + * 3. trap_ctxt needs the code selectors checked. 7.419 + * 4. fast_trap_idx is checked by Xen. 7.420 + * 5. ldt base must be page-aligned, no more than 8192 ents, ... 7.421 + * 6. gdt already done, and further checking is done by Xen. 7.422 + * 7. check that ring1_ss is safe. 7.423 + * 8. pt_base is already done. 7.424 + * 9. debugregs are checked by Xen. 7.425 + * 10. callback code selectors need checking. 7.426 + */ 7.427 + for ( i = 0; i < 256; i++ ) 7.428 + { 7.429 + ctxt.trap_ctxt[i].vector = i; 7.430 + if ( (ctxt.trap_ctxt[i].cs & 3) == 0 ) 7.431 + ctxt.trap_ctxt[i].cs = FLAT_RING1_CS; 7.432 + } 7.433 + if ( (ctxt.ring1_ss & 3) == 0 ) 7.434 + ctxt.ring1_ss = FLAT_RING1_DS; 7.435 + if ( (ctxt.event_callback_cs & 3) == 0 ) 7.436 + ctxt.event_callback_cs = FLAT_RING1_CS; 7.437 + if ( (ctxt.failsafe_callback_cs & 3) == 0 ) 7.438 + ctxt.failsafe_callback_cs = FLAT_RING1_CS; 7.439 + if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) || 7.440 + (ctxt.ldt_ents > 8192) || 7.441 + (ctxt.ldt_base > HYPERVISOR_VIRT_START) || 7.442 + ((ctxt.ldt_base + ctxt.ldt_ents*8) > HYPERVISOR_VIRT_START) ) 7.443 + { 7.444 + ERROR("Bad LDT base or size"); 7.445 + goto out; 7.446 + } 7.447 + 7.448 + op.cmd = DOM0_BUILDDOMAIN; 7.449 + op.u.builddomain.domain = dom; 7.450 + op.u.builddomain.num_vifs = 1; 7.451 + memcpy(&op.u.builddomain.ctxt, &ctxt, sizeof(ctxt)); 7.452 + rc = do_dom0_op(&op); 7.453 + 7.454 + out: 7.455 + if ( rc != 0 ) 7.456 + { 7.457 + if ( dom != 0 ) 7.458 + { 7.459 + op.cmd = DOM0_DESTROYDOMAIN; 7.460 + op.u.destroydomain.domain = dom; 7.461 + op.u.destroydomain.force = 1; 7.462 + (void)do_dom0_op(&op); 7.463 + } 7.464 + } 7.465 + else 7.466 + { 7.467 + /* Success: print the domain id. */ 7.468 + verbose_printf("DOM=%ld\n", dom); 7.469 + } 7.470 + 7.471 + if ( pfn_to_mfn_table != NULL ) 7.472 + free(pfn_to_mfn_table); 7.473 + if ( pfn_type != NULL ) 7.474 + free(pfn_type); 7.475 + 7.476 + gzclose(gfd); 7.477 + 7.478 + return (rc == 0) ? dom : rc; 7.479 +}
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/tools/libxi/libxi_linux_save.c Wed Nov 19 17:22:42 2003 +0000 8.3 @@ -0,0 +1,380 @@ 8.4 +/****************************************************************************** 8.5 + * libxi_linux_save.c 8.6 + * 8.7 + * Save the state of a running Xenolinux session. 8.8 + * 8.9 + * Copyright (c) 2003, K A Fraser. 8.10 + */ 8.11 + 8.12 +#include "libxi_private.h" 8.13 +#include <asm-xeno/suspend.h> 8.14 +#include <zlib.h> 8.15 + 8.16 +/* This may allow us to create a 'quiet' command-line option, if necessary. */ 8.17 +#define verbose_printf(_f, _a...) \ 8.18 + do { \ 8.19 + if ( !verbose ) break; \ 8.20 + printf( _f , ## _a ); \ 8.21 + fflush(stdout); \ 8.22 + } while ( 0 ) 8.23 + 8.24 +/* 8.25 + * Returns TRUE if the given machine frame number has a unique mapping 8.26 + * in the guest's pseudophysical map. 8.27 + */ 8.28 +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ 8.29 + (((_mfn) < (1024*1024)) && \ 8.30 + (pfn_to_mfn_table[mfn_to_pfn_table[_mfn]] == (_mfn))) 8.31 + 8.32 +/* Returns TRUE if MFN is successfully converted to a PFN. */ 8.33 +#define translate_mfn_to_pfn(_pmfn) \ 8.34 +({ \ 8.35 + unsigned long mfn = *(_pmfn); \ 8.36 + int _res = 1; \ 8.37 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \ 8.38 + _res = 0; \ 8.39 + else \ 8.40 + *(_pmfn) = mfn_to_pfn_table[mfn]; \ 8.41 + _res; \ 8.42 +}) 8.43 + 8.44 +static int check_pfn_ownership(unsigned long mfn, unsigned int dom) 8.45 +{ 8.46 + dom0_op_t op; 8.47 + op.cmd = DOM0_GETPAGEFRAMEINFO; 8.48 + op.u.getpageframeinfo.pfn = mfn; 8.49 + if ( (do_dom0_op(&op) < 0) || (op.u.getpageframeinfo.domain != dom) ) 8.50 + return 0; 8.51 + return 1; 8.52 +} 8.53 + 8.54 +#define GETPFN_ERR (~0U) 8.55 +static unsigned int get_pfn_type(unsigned long mfn) 8.56 +{ 8.57 + dom0_op_t op; 8.58 + op.cmd = DOM0_GETPAGEFRAMEINFO; 8.59 + op.u.getpageframeinfo.pfn = mfn; 8.60 + if ( do_dom0_op(&op) < 0 ) 8.61 + { 8.62 + PERROR("Unexpected failure when getting page frame info!"); 8.63 + return GETPFN_ERR; 8.64 + } 8.65 + return op.u.getpageframeinfo.type; 8.66 +} 8.67 + 8.68 +static int checked_write(gzFile fd, void *buf, size_t count) 8.69 +{ 8.70 + int rc; 8.71 + while ( ((rc = gzwrite(fd, buf, count)) == -1) && (errno = EINTR) ) 8.72 + continue; 8.73 + return rc == count; 8.74 +} 8.75 + 8.76 +int xi_linux_save(unsigned int domid, const char *state_file, int verbose) 8.77 +{ 8.78 + dom0_op_t op; 8.79 + int rc = 1, i, j; 8.80 + unsigned long mfn; 8.81 + unsigned int prev_pc, this_pc; 8.82 + 8.83 + /* Remember if we stopped the guest, so we can restart it on exit. */ 8.84 + int we_stopped_it = 0; 8.85 + 8.86 + /* The new domain's shared-info frame number. */ 8.87 + unsigned long shared_info_frame; 8.88 + 8.89 + /* A copy of the CPU context of the guest. */ 8.90 + full_execution_context_t ctxt; 8.91 + 8.92 + /* A copy of the domain's name. */ 8.93 + char name[MAX_DOMAIN_NAME]; 8.94 + 8.95 + /* A table containg the type of each PFN (/not/ MFN!). */ 8.96 + unsigned long *pfn_type = NULL; 8.97 + 8.98 + /* A temporary mapping, and a copy, of one frame of guest memory. */ 8.99 + unsigned long *ppage, page[1024]; 8.100 + 8.101 + /* A temporary mapping, and a copy, of the pfn-to-mfn table frame list. */ 8.102 + unsigned long *p_pfn_to_mfn_frame_list, pfn_to_mfn_frame_list[1024]; 8.103 + /* A temporary mapping of one frame in the above list. */ 8.104 + unsigned long *pfn_to_mfn_frame; 8.105 + 8.106 + /* A table mapping each PFN to its current MFN. */ 8.107 + unsigned long *pfn_to_mfn_table = NULL; 8.108 + /* A table mapping each current MFN to its canonical PFN. */ 8.109 + unsigned long *mfn_to_pfn_table = NULL; 8.110 + 8.111 + /* A temporary mapping, and a copy, of the guest's suspend record. */ 8.112 + suspend_record_t *p_srec, srec; 8.113 + 8.114 + /* The name and descriptor of the file that we are writing to. */ 8.115 + int fd; 8.116 + gzFile gfd; 8.117 + 8.118 + if ( (fd = open(state_file, O_CREAT|O_EXCL|O_WRONLY, 0644)) == -1 ) 8.119 + { 8.120 + PERROR("Could not open file for writing"); 8.121 + return 1; 8.122 + } 8.123 + 8.124 + /* 8.125 + * Compression rate 1: we want speed over compression. We're mainly going 8.126 + * for those zero pages, after all. 8.127 + */ 8.128 + if ( (gfd = gzdopen(fd, "wb1")) == NULL ) 8.129 + { 8.130 + ERROR("Could not allocate compression state for state file"); 8.131 + close(fd); 8.132 + return 1; 8.133 + } 8.134 + 8.135 + /* Ensure that the domain exists, and that it is stopped. */ 8.136 + for ( ; ; ) 8.137 + { 8.138 + op.cmd = DOM0_GETDOMAININFO; 8.139 + op.u.getdomaininfo.domain = domid; 8.140 + if ( (do_dom0_op(&op) < 0) || (op.u.getdomaininfo.domain != domid) ) 8.141 + { 8.142 + PERROR("Could not get info on domain"); 8.143 + goto out; 8.144 + } 8.145 + 8.146 + memcpy(&ctxt, &op.u.getdomaininfo.ctxt, sizeof(ctxt)); 8.147 + memcpy(name, op.u.getdomaininfo.name, sizeof(name)); 8.148 + shared_info_frame = op.u.getdomaininfo.shared_info_frame; 8.149 + 8.150 + if ( op.u.getdomaininfo.state == DOMSTATE_STOPPED ) 8.151 + break; 8.152 + 8.153 + we_stopped_it = 1; 8.154 + 8.155 + op.cmd = DOM0_STOPDOMAIN; 8.156 + op.u.stopdomain.domain = domid; 8.157 + (void)do_dom0_op(&op); 8.158 + 8.159 + sleep(1); 8.160 + } 8.161 + 8.162 + /* A cheesy test to see whether the domain contains valid state. */ 8.163 + if ( ctxt.pt_base == 0 ) 8.164 + { 8.165 + ERROR("Domain is not in a valid Xenolinux state"); 8.166 + goto out; 8.167 + } 8.168 + 8.169 + if ( init_pfn_mapper() < 0 ) 8.170 + goto out; 8.171 + 8.172 + /* Is the suspend-record MFN actually valid for this domain? */ 8.173 + if ( !check_pfn_ownership(ctxt.i386_ctxt.esi, domid) ) 8.174 + { 8.175 + ERROR("Invalid state record pointer"); 8.176 + goto out; 8.177 + } 8.178 + 8.179 + /* If the suspend-record MFN is okay then grab a copy of it to @srec. */ 8.180 + p_srec = map_pfn(ctxt.i386_ctxt.esi); 8.181 + memcpy(&srec, p_srec, sizeof(srec)); 8.182 + unmap_pfn(p_srec); 8.183 + 8.184 + if ( srec.nr_pfns > 1024*1024 ) 8.185 + { 8.186 + ERROR("Invalid state record -- pfn count out of range"); 8.187 + goto out; 8.188 + } 8.189 + 8.190 + if ( !check_pfn_ownership(srec.pfn_to_mfn_frame_list, domid) ) 8.191 + { 8.192 + ERROR("Invalid pfn-to-mfn frame list pointer"); 8.193 + goto out; 8.194 + } 8.195 + 8.196 + /* Grab a copy of the pfn-to-mfn table frame list. */ 8.197 + p_pfn_to_mfn_frame_list = map_pfn(srec.pfn_to_mfn_frame_list); 8.198 + memcpy(pfn_to_mfn_frame_list, p_pfn_to_mfn_frame_list, PAGE_SIZE); 8.199 + unmap_pfn(p_pfn_to_mfn_frame_list); 8.200 + 8.201 + /* We want zeroed memory so use calloc rather than malloc. */ 8.202 + mfn_to_pfn_table = calloc(1, 4 * 1024 * 1024); 8.203 + pfn_to_mfn_table = calloc(1, 4 * srec.nr_pfns); 8.204 + pfn_type = calloc(1, 4 * srec.nr_pfns); 8.205 + 8.206 + if ( (mfn_to_pfn_table == NULL) || 8.207 + (pfn_to_mfn_table == NULL) || 8.208 + (pfn_type == NULL) ) 8.209 + { 8.210 + errno = ENOMEM; 8.211 + goto out; 8.212 + } 8.213 + 8.214 + 8.215 + /* 8.216 + * Construct the local pfn-to-mfn and mfn-to-pfn tables. On exit from this 8.217 + * loop we have each MFN mapped at most once. Note that there may be MFNs 8.218 + * that aren't mapped at all: we detect these by MFN_IS_IN_PSEUDOPHYS_MAP. 8.219 + */ 8.220 + pfn_to_mfn_frame = NULL; 8.221 + for ( i = 0; i < srec.nr_pfns; i++ ) 8.222 + { 8.223 + /* Each frameful of table frames must be checked & mapped on demand. */ 8.224 + if ( (i & 1023) == 0 ) 8.225 + { 8.226 + mfn = pfn_to_mfn_frame_list[i/1024]; 8.227 + if ( !check_pfn_ownership(mfn, domid) ) 8.228 + { 8.229 + ERROR("Invalid frame number if pfn-to-mfn frame list"); 8.230 + goto out; 8.231 + } 8.232 + if ( pfn_to_mfn_frame != NULL ) 8.233 + unmap_pfn(pfn_to_mfn_frame); 8.234 + pfn_to_mfn_frame = map_pfn(mfn); 8.235 + } 8.236 + 8.237 + mfn = pfn_to_mfn_frame[i & 1023]; 8.238 + 8.239 + if ( !check_pfn_ownership(mfn, domid) ) 8.240 + { 8.241 + ERROR("Invalid frame specified with pfn-to-mfn table"); 8.242 + goto out; 8.243 + } 8.244 + 8.245 + /* Did we map this MFN already? That would be invalid! */ 8.246 + if ( MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) 8.247 + { 8.248 + ERROR("A machine frame appears twice in pseudophys space"); 8.249 + goto out; 8.250 + } 8.251 + 8.252 + pfn_to_mfn_table[i] = mfn; 8.253 + mfn_to_pfn_table[mfn] = i; 8.254 + 8.255 + /* Query page type by MFN, but store it by PFN. */ 8.256 + if ( (pfn_type[i] = get_pfn_type(mfn)) == GETPFN_ERR ) 8.257 + goto out; 8.258 + } 8.259 + 8.260 + /* Canonicalise the suspend-record frame number. */ 8.261 + if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) ) 8.262 + { 8.263 + ERROR("State record is not in range of pseudophys map"); 8.264 + goto out; 8.265 + } 8.266 + 8.267 + /* Canonicalise each GDT frame number. */ 8.268 + for ( i = 0; i < ctxt.gdt_ents; i += 512 ) 8.269 + { 8.270 + if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) 8.271 + { 8.272 + ERROR("GDT frame is not in range of pseudophys map"); 8.273 + goto out; 8.274 + } 8.275 + } 8.276 + 8.277 + /* Canonicalise the page table base pointer. */ 8.278 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) 8.279 + { 8.280 + ERROR("PT base is not in range of pseudophys map"); 8.281 + goto out; 8.282 + } 8.283 + ctxt.pt_base = mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] << PAGE_SHIFT; 8.284 + 8.285 + /* Canonicalise the pfn-to-mfn table frame-number list. */ 8.286 + for ( i = 0; i < srec.nr_pfns; i += 1024 ) 8.287 + { 8.288 + if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ) 8.289 + { 8.290 + ERROR("Frame # in pfn-to-mfn frame list is not in pseudophys"); 8.291 + goto out; 8.292 + } 8.293 + } 8.294 + 8.295 + /* Start writing out the saved-domain record. */ 8.296 + ppage = map_pfn(shared_info_frame); 8.297 + if ( !checked_write(gfd, "XenoLinuxSuspend", 16) || 8.298 + !checked_write(gfd, name, sizeof(name)) || 8.299 + !checked_write(gfd, &srec.nr_pfns, sizeof(unsigned long)) || 8.300 + !checked_write(gfd, &ctxt, sizeof(ctxt)) || 8.301 + !checked_write(gfd, ppage, PAGE_SIZE) || 8.302 + !checked_write(gfd, pfn_to_mfn_frame_list, PAGE_SIZE) || 8.303 + !checked_write(gfd, pfn_type, 4 * srec.nr_pfns) ) 8.304 + { 8.305 + ERROR("Error when writing to state file"); 8.306 + goto out; 8.307 + } 8.308 + unmap_pfn(ppage); 8.309 + 8.310 + verbose_printf("Saving memory pages: 0%%"); 8.311 + 8.312 + /* Now write out each data page, canonicalising page tables as we go... */ 8.313 + prev_pc = 0; 8.314 + for ( i = 0; i < srec.nr_pfns; i++ ) 8.315 + { 8.316 + this_pc = (i * 100) / srec.nr_pfns; 8.317 + if ( (this_pc - prev_pc) >= 5 ) 8.318 + { 8.319 + verbose_printf("\b\b\b\b%3d%%", this_pc); 8.320 + prev_pc = this_pc; 8.321 + } 8.322 + 8.323 + mfn = pfn_to_mfn_table[i]; 8.324 + 8.325 + ppage = map_pfn(mfn); 8.326 + memcpy(page, ppage, PAGE_SIZE); 8.327 + unmap_pfn(ppage); 8.328 + 8.329 + if ( (pfn_type[i] == L1TAB) || (pfn_type[i] == L2TAB) ) 8.330 + { 8.331 + for ( j = 0; 8.332 + j < ((pfn_type[i] == L2TAB) ? 8.333 + (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) : 1024); 8.334 + j++ ) 8.335 + { 8.336 + if ( !(page[j] & _PAGE_PRESENT) ) continue; 8.337 + mfn = page[j] >> PAGE_SHIFT; 8.338 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) 8.339 + { 8.340 + ERROR("Frame number in pagetable page is invalid"); 8.341 + goto out; 8.342 + } 8.343 + page[j] &= PAGE_SIZE - 1; 8.344 + page[j] |= mfn_to_pfn_table[mfn] << PAGE_SHIFT; 8.345 + } 8.346 + } 8.347 + 8.348 + if ( !checked_write(gfd, page, PAGE_SIZE) ) 8.349 + { 8.350 + ERROR("Error when writing to state file"); 8.351 + goto out; 8.352 + } 8.353 + } 8.354 + 8.355 + verbose_printf("\b\b\b\b100%%\nMemory saved.\n"); 8.356 + 8.357 + /* Success! */ 8.358 + rc = 0; 8.359 + 8.360 + out: 8.361 + /* Restart the domain if we had to stop it to save its state. */ 8.362 + if ( we_stopped_it ) 8.363 + { 8.364 + op.cmd = DOM0_STARTDOMAIN; 8.365 + op.u.startdomain.domain = domid; 8.366 + (void)do_dom0_op(&op); 8.367 + } 8.368 + 8.369 + gzclose(gfd); 8.370 + 8.371 + if ( pfn_to_mfn_table != NULL ) 8.372 + free(pfn_to_mfn_table); 8.373 + if ( mfn_to_pfn_table != NULL ) 8.374 + free(mfn_to_pfn_table); 8.375 + if ( pfn_type != NULL ) 8.376 + free(pfn_type); 8.377 + 8.378 + /* On error, make sure the file is deleted. */ 8.379 + if ( rc != 0 ) 8.380 + unlink(state_file); 8.381 + 8.382 + return !!rc; 8.383 +}
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/tools/libxi/libxi_misc.c Wed Nov 19 17:22:42 2003 +0000 9.3 @@ -0,0 +1,50 @@ 9.4 +/****************************************************************************** 9.5 + * libxi_misc.c 9.6 + * 9.7 + * Miscellaneous control interface functions. 9.8 + */ 9.9 + 9.10 +#include "libxi_private.h" 9.11 + 9.12 +int privcmd_fd = -1; 9.13 + 9.14 +int xi_interface_open(void) 9.15 +{ 9.16 + if ( (privcmd_fd == -1) && 9.17 + ((privcmd_fd = open("/proc/xeno/privcmd", O_RDWR)) < 0) ) 9.18 + { 9.19 + privcmd_fd = -1; 9.20 + return -1; 9.21 + } 9.22 + return 0; 9.23 +} 9.24 + 9.25 +int xi_interface_close(void) 9.26 +{ 9.27 + if ( privcmd_fd != -1 ) 9.28 + { 9.29 + close(privcmd_fd); 9.30 + privcmd_fd = -1; 9.31 + } 9.32 + return 0; 9.33 +} 9.34 + 9.35 + 9.36 +#define CONSOLE_RING_CLEAR 1 9.37 + 9.38 +int xi_readconsolering(char *str, unsigned int max_chars, int clear) 9.39 +{ 9.40 + int ret; 9.41 + dom0_op_t op; 9.42 + 9.43 + op.cmd = DOM0_READCONSOLE; 9.44 + op.u.readconsole.str = (unsigned long)str; 9.45 + op.u.readconsole.count = max_chars; 9.46 + op.u.readconsole.cmd = clear ? CONSOLE_RING_CLEAR : 0; 9.47 + 9.48 + if ( (ret = do_dom0_op(&op)) > 0 ) 9.49 + str[ret] = '\0'; 9.50 + 9.51 + return ret; 9.52 +} 9.53 +
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/tools/libxi/libxi_private.c Wed Nov 19 17:22:42 2003 +0000 10.3 @@ -0,0 +1,34 @@ 10.4 +/****************************************************************************** 10.5 + * libxi_private.c 10.6 + * 10.7 + * Helper functions for the rest of the library. 10.8 + */ 10.9 + 10.10 +#include "libxi_private.h" 10.11 + 10.12 +static int devmem_fd = -1; 10.13 + 10.14 +int init_pfn_mapper(void) 10.15 +{ 10.16 + if ( (devmem_fd == -1) && 10.17 + ((devmem_fd = open("/dev/mem", O_RDWR)) < 0) ) 10.18 + { 10.19 + devmem_fd = -1; 10.20 + return -1; 10.21 + } 10.22 + return 0; 10.23 +} 10.24 + 10.25 +void *map_pfn(unsigned long pfn) 10.26 +{ 10.27 + void *vaddr = mmap(NULL, PAGE_SIZE, PROT_READ|PROT_WRITE, 10.28 + MAP_SHARED, devmem_fd, pfn << PAGE_SHIFT); 10.29 + if ( vaddr == MAP_FAILED ) 10.30 + return NULL; 10.31 + return vaddr; 10.32 +} 10.33 + 10.34 +void unmap_pfn(void *vaddr) 10.35 +{ 10.36 + (void)munmap(vaddr, PAGE_SIZE); 10.37 +}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/tools/libxi/libxi_private.h Wed Nov 19 17:22:42 2003 +0000 11.3 @@ -0,0 +1,155 @@ 11.4 + 11.5 +#ifndef __LIBXI_PRIVATE_H__ 11.6 +#define __LIBXI_PRIVATE_H__ 11.7 + 11.8 +typedef unsigned char u8; 11.9 +typedef unsigned short u16; 11.10 +typedef unsigned long u32; 11.11 +typedef unsigned long long u64; 11.12 +typedef signed char s8; 11.13 +typedef signed short s16; 11.14 +typedef signed long s32; 11.15 +typedef signed long long s64; 11.16 + 11.17 +#include <unistd.h> 11.18 +#include <stdio.h> 11.19 +#include <errno.h> 11.20 +#include <fcntl.h> 11.21 +#include <sys/mman.h> 11.22 +#include <sys/types.h> 11.23 +#include <sys/stat.h> 11.24 +#include <stdlib.h> 11.25 +#include <sys/ioctl.h> 11.26 +#include <errno.h> 11.27 +#include <string.h> 11.28 + 11.29 +#include "xi.h" 11.30 + 11.31 +#include <asm-xeno/proc_cmd.h> 11.32 +#include <hypervisor-ifs/hypervisor-if.h> 11.33 +#include <hypervisor-ifs/dom0_ops.h> 11.34 +#include <hypervisor-ifs/vbd.h> 11.35 + 11.36 +#define _PAGE_PRESENT 0x001 11.37 +#define _PAGE_RW 0x002 11.38 +#define _PAGE_USER 0x004 11.39 +#define _PAGE_PWT 0x008 11.40 +#define _PAGE_PCD 0x010 11.41 +#define _PAGE_ACCESSED 0x020 11.42 +#define _PAGE_DIRTY 0x040 11.43 +#define _PAGE_PAT 0x080 11.44 +#define _PAGE_PSE 0x080 11.45 +#define _PAGE_GLOBAL 0x100 11.46 + 11.47 + 11.48 +#define L1_PAGETABLE_SHIFT 12 11.49 +#define L2_PAGETABLE_SHIFT 22 11.50 + 11.51 +#define ENTRIES_PER_L1_PAGETABLE 1024 11.52 +#define ENTRIES_PER_L2_PAGETABLE 1024 11.53 + 11.54 +#define PAGE_SHIFT L1_PAGETABLE_SHIFT 11.55 +#define PAGE_SIZE (1UL << PAGE_SHIFT) 11.56 +#define PAGE_MASK (~(PAGE_SIZE-1)) 11.57 + 11.58 +typedef struct { unsigned long l1_lo; } l1_pgentry_t; 11.59 +typedef struct { unsigned long l2_lo; } l2_pgentry_t; 11.60 + 11.61 +#define l1_table_offset(_a) \ 11.62 + (((_a) >> L1_PAGETABLE_SHIFT) & (ENTRIES_PER_L1_PAGETABLE - 1)) 11.63 +#define l2_table_offset(_a) \ 11.64 + ((_a) >> L2_PAGETABLE_SHIFT) 11.65 + 11.66 +#define ERROR(_m) \ 11.67 + fprintf(stderr, "ERROR: %s\n", (_m)) 11.68 + 11.69 +#define PERROR(_m) \ 11.70 + fprintf(stderr, "ERROR: %s (%d = %s)\n", (_m), errno, strerror(errno)) 11.71 + 11.72 +extern int privcmd_fd; 11.73 +static inline int do_privcmd(unsigned int cmd, unsigned long data) 11.74 +{ 11.75 + return ioctl(privcmd_fd, cmd, data); 11.76 +} 11.77 + 11.78 +static inline int do_xen_hypercall(privcmd_hypercall_t *hypercall) 11.79 +{ 11.80 + return do_privcmd(IOCTL_PRIVCMD_HYPERCALL, (unsigned long)hypercall); 11.81 +} 11.82 + 11.83 +static inline int do_dom0_op(dom0_op_t *op) 11.84 +{ 11.85 + int ret = -1; 11.86 + privcmd_hypercall_t hypercall; 11.87 + 11.88 + op->interface_version = DOM0_INTERFACE_VERSION; 11.89 + 11.90 + hypercall.op = __HYPERVISOR_dom0_op; 11.91 + hypercall.arg[0] = (unsigned long)op; 11.92 + 11.93 + if ( mlock(op, sizeof(*op)) != 0 ) 11.94 + goto out1; 11.95 + 11.96 + if ( (ret = do_xen_hypercall(&hypercall)) < 0 ) 11.97 + { 11.98 + if ( errno == EACCES ) 11.99 + fprintf(stderr, "Dom0 operation failed -- need to" 11.100 + " rebuild the user-space tool set?\n"); 11.101 + goto out2; 11.102 + } 11.103 + 11.104 + ret = 0; 11.105 + 11.106 + out2: (void)munlock(op, sizeof(*op)); 11.107 + out1: return ret; 11.108 +} 11.109 + 11.110 +static inline int do_network_op(network_op_t *op) 11.111 +{ 11.112 + int ret = -1; 11.113 + privcmd_hypercall_t hypercall; 11.114 + 11.115 + hypercall.op = __HYPERVISOR_network_op; 11.116 + hypercall.arg[0] = (unsigned long)op; 11.117 + 11.118 + if ( mlock(op, sizeof(*op)) != 0 ) 11.119 + goto out1; 11.120 + 11.121 + if ( (ret = do_xen_hypercall(&hypercall)) < 0 ) 11.122 + goto out2; 11.123 + 11.124 + ret = 0; 11.125 + 11.126 + out2: (void)munlock(op, sizeof(*op)); 11.127 + out1: return ret; 11.128 +} 11.129 + 11.130 + 11.131 +static inline int do_block_io_op(block_io_op_t *op) 11.132 +{ 11.133 + int ret = -1; 11.134 + privcmd_hypercall_t hypercall; 11.135 + 11.136 + hypercall.op = __HYPERVISOR_block_io_op; 11.137 + hypercall.arg[0] = (unsigned long)op; 11.138 + 11.139 + if ( mlock(op, sizeof(*op)) != 0 ) 11.140 + goto out1; 11.141 + 11.142 + if ( do_xen_hypercall(&hypercall) < 0 ) 11.143 + goto out2; 11.144 + 11.145 + ret = 0; 11.146 + 11.147 + out2: (void)munlock(op, sizeof(*op)); 11.148 + out1: return ret; 11.149 +} 11.150 + 11.151 +/* 11.152 + * PFN mapping. 11.153 + */ 11.154 +int init_pfn_mapper(void); 11.155 +void *map_pfn(unsigned long pfn); 11.156 +void unmap_pfn(void *vaddr); 11.157 + 11.158 +#endif /* __LIBXI_PRIVATE_H__ */
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/tools/libxi/libxi_vbd.c Wed Nov 19 17:22:42 2003 +0000 12.3 @@ -0,0 +1,116 @@ 12.4 +/****************************************************************************** 12.5 + * libxi_vbd.c 12.6 + * 12.7 + * API for manipulating and accessing per-domain virtual block devices. 12.8 + * 12.9 + * Copyright (c) 2003, K A Fraser. 12.10 + */ 12.11 + 12.12 +#define _GNU_SOURCE 12.13 +#include "libxi_private.h" 12.14 + 12.15 +int xi_vbd_create(unsigned int domid, unsigned short vbdid, int writeable) 12.16 +{ 12.17 + block_io_op_t op; 12.18 + op.cmd = BLOCK_IO_OP_VBD_CREATE; 12.19 + op.u.create_params.domain = domid; 12.20 + op.u.create_params.vdevice = vbdid; 12.21 + op.u.create_params.mode = VBD_MODE_R | (writeable ? VBD_MODE_W : 0); 12.22 + return do_block_io_op(&op); 12.23 +} 12.24 + 12.25 + 12.26 +int xi_vbd_destroy(unsigned int domid, unsigned short vbdid) 12.27 +{ 12.28 + block_io_op_t op; 12.29 + op.cmd = BLOCK_IO_OP_VBD_DELETE; 12.30 + op.u.delete_params.domain = domid; 12.31 + op.u.delete_params.vdevice = vbdid; 12.32 + return do_block_io_op(&op); 12.33 +} 12.34 + 12.35 + 12.36 +int xi_vbd_add_extent(unsigned int domid, 12.37 + unsigned short vbdid, 12.38 + unsigned short real_device, 12.39 + unsigned long start_sector, 12.40 + unsigned long nr_sectors) 12.41 +{ 12.42 + block_io_op_t op; 12.43 + op.cmd = BLOCK_IO_OP_VBD_ADD; 12.44 + op.u.add_params.domain = domid; 12.45 + op.u.add_params.vdevice = vbdid; 12.46 + op.u.add_params.extent.device = real_device; 12.47 + op.u.add_params.extent.start_sector = start_sector; 12.48 + op.u.add_params.extent.nr_sectors = nr_sectors; 12.49 + return do_block_io_op(&op); 12.50 +} 12.51 + 12.52 + 12.53 +int xi_vbd_delete_extent(unsigned int domid, 12.54 + unsigned short vbdid, 12.55 + unsigned short real_device, 12.56 + unsigned long start_sector, 12.57 + unsigned long nr_sectors) 12.58 +{ 12.59 + block_io_op_t op; 12.60 + op.cmd = BLOCK_IO_OP_VBD_REMOVE; 12.61 + op.u.add_params.domain = domid; 12.62 + op.u.add_params.vdevice = vbdid; 12.63 + op.u.add_params.extent.device = real_device; 12.64 + op.u.add_params.extent.start_sector = start_sector; 12.65 + op.u.add_params.extent.nr_sectors = nr_sectors; 12.66 + return do_block_io_op(&op); 12.67 +} 12.68 + 12.69 + 12.70 +int xi_vbd_probe(unsigned int domid, 12.71 + unsigned short vbdid, 12.72 + unsigned int max_vbds, 12.73 + xi_vbd_t *vbds) 12.74 +{ 12.75 + block_io_op_t op; 12.76 + xen_disk_info_t *xdi = &op.u.probe_params.xdi; 12.77 + int i, j, ret, allocsz = max_vbds * sizeof(xen_disk_t); 12.78 + 12.79 + op.cmd = BLOCK_IO_OP_VBD_PROBE; 12.80 + op.u.probe_params.domain = domid; 12.81 + 12.82 + xdi->max = max_vbds; 12.83 + xdi->disks = malloc(allocsz); 12.84 + xdi->count = 0; 12.85 + 12.86 + if ( (xdi->disks == NULL) || (mlock(xdi->disks, allocsz) != 0) ) 12.87 + { 12.88 + if ( xdi->disks != NULL ) 12.89 + free(xdi->disks); 12.90 + return -ENOMEM; 12.91 + } 12.92 + 12.93 + ret = do_block_io_op(&op); 12.94 + 12.95 + (void)munlock(xdi->disks, allocsz); 12.96 + 12.97 + if ( ret >= 0 ) 12.98 + { 12.99 + for ( i = 0, j = 0; i < xdi->count; i++ ) 12.100 + { 12.101 + if ( !(xdi->disks[i].info & XD_FLAG_VIRT) ) 12.102 + continue; 12.103 + 12.104 + vbds[j].domid = xdi->disks[i].domain; 12.105 + vbds[j].vbdid = xdi->disks[i].device; 12.106 + vbds[j].flags = (xdi->disks[i].info & XD_FLAG_RO) ? 12.107 + 0 : XI_VBDF_WRITEABLE; 12.108 + vbds[j].nr_sectors = xdi->disks[i].capacity; 12.109 + 12.110 + j++; 12.111 + } 12.112 + 12.113 + ret = j; 12.114 + } 12.115 + 12.116 + free(xdi->disks); 12.117 + 12.118 + return ret; 12.119 +}
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/tools/libxi/libxi_vif.c Wed Nov 19 17:22:42 2003 +0000 13.3 @@ -0,0 +1,66 @@ 13.4 +/****************************************************************************** 13.5 + * libxi_vif.c 13.6 + * 13.7 + * API for manipulating and accessing per-network-interface parameters. 13.8 + * 13.9 + * Copyright (c) 2003, K A Fraser. 13.10 + */ 13.11 + 13.12 +#include "libxi_private.h" 13.13 + 13.14 +int xi_vif_scheduler_set(unsigned int domid, 13.15 + unsigned int vifid, 13.16 + xi_vif_sched_params_t *params) 13.17 +{ 13.18 + network_op_t netop; 13.19 + netop.cmd = NETWORK_OP_VIFSETPARAMS; 13.20 + netop.u.vif_setparams.domain = domid; 13.21 + netop.u.vif_setparams.vif = vifid; 13.22 + netop.u.vif_setparams.credit_bytes = params->credit_bytes; 13.23 + netop.u.vif_setparams.credit_usec = params->credit_usec; 13.24 + return do_network_op(&netop); 13.25 +} 13.26 + 13.27 + 13.28 +int xi_vif_scheduler_get(unsigned int domid, 13.29 + unsigned int vifid, 13.30 + xi_vif_sched_params_t *params) 13.31 +{ 13.32 + network_op_t netop; 13.33 + int rc; 13.34 + 13.35 + netop.cmd = NETWORK_OP_VIFGETINFO; 13.36 + netop.u.vif_getinfo.domain = domid; 13.37 + netop.u.vif_getinfo.vif = vifid; 13.38 + 13.39 + if ( (rc = do_network_op(&netop)) >= 0 ) 13.40 + { 13.41 + params->credit_bytes = netop.u.vif_getinfo.credit_bytes; 13.42 + params->credit_usec = netop.u.vif_getinfo.credit_usec; 13.43 + } 13.44 + 13.45 + return rc; 13.46 +} 13.47 + 13.48 + 13.49 +int xi_vif_stats_get(unsigned int domid, 13.50 + unsigned int vifid, 13.51 + xi_vif_stats_t *stats) 13.52 +{ 13.53 + network_op_t netop; 13.54 + int rc; 13.55 + 13.56 + netop.cmd = NETWORK_OP_VIFGETINFO; 13.57 + netop.u.vif_getinfo.domain = domid; 13.58 + netop.u.vif_getinfo.vif = vifid; 13.59 + 13.60 + if ( (rc = do_network_op(&netop)) >= 0 ) 13.61 + { 13.62 + stats->tx_bytes = netop.u.vif_getinfo.total_bytes_sent; 13.63 + stats->tx_pkts = netop.u.vif_getinfo.total_packets_sent; 13.64 + stats->rx_bytes = netop.u.vif_getinfo.total_bytes_received; 13.65 + stats->rx_pkts = netop.u.vif_getinfo.total_packets_received; 13.66 + } 13.67 + 13.68 + return rc; 13.69 +}
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/tools/libxi/rpm.spec Wed Nov 19 17:22:42 2003 +0000 14.3 @@ -0,0 +1,28 @@ 14.4 +Summary: Xen control interface library 14.5 +Name: xen-internal-library 14.6 +Version: 1.2 14.7 +Release: 1 14.8 +License: Xen 14.9 +Group: Xen 14.10 +BuildRoot: %{staging} 14.11 +%description 14.12 +Library to make it easier to access the Xen control interfaces. 14.13 + 14.14 +%pre 14.15 +%preun 14.16 +%install 14.17 +install -m 0755 -d $RPM_BUILD_ROOT/lib 14.18 +install -m 0755 libxi.a $RPM_BUILD_ROOT/lib/libxi.a 14.19 +install -m 0755 libxi.so $RPM_BUILD_ROOT/lib/libxi.so 14.20 +install -m 0755 -d $RPM_BUILD_ROOT/include 14.21 +install -m 0644 xi.h $RPM_BUILD_ROOT/include/xi.h 14.22 +%clean 14.23 +%post 14.24 +%postun 14.25 +%files 14.26 +%defattr(-,root,root) 14.27 +%dir /lib 14.28 +/lib/libxi.a 14.29 +/lib/libxi.so 14.30 +%dir /include 14.31 +/include/xi.h
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/tools/libxi/xi.h Wed Nov 19 17:22:42 2003 +0000 15.3 @@ -0,0 +1,99 @@ 15.4 +/****************************************************************************** 15.5 + * xi.h 15.6 + * 15.7 + * A library for low-level access to the Xen control interfaces. 15.8 + * 15.9 + * Copyright (c) 2003, K A Fraser. 15.10 + */ 15.11 + 15.12 +#ifndef __XI_H__ 15.13 +#define __XI_H__ 15.14 + 15.15 +int xi_interface_open(void); 15.16 +int xi_interface_close(void); 15.17 + 15.18 +typedef struct { 15.19 + unsigned int domid; 15.20 + unsigned int cpu; 15.21 + int has_cpu; 15.22 + int stopped; 15.23 + unsigned long nr_pages; 15.24 + unsigned long long cpu_time; 15.25 +#define XI_DOMINFO_MAXNAME 16 15.26 + char name[XI_DOMINFO_MAXNAME]; 15.27 +} xi_dominfo_t; 15.28 + 15.29 +int xi_domain_create(unsigned int mem_kb, const char *name); 15.30 +int xi_domain_start(unsigned int domid); 15.31 +int xi_domain_stop(unsigned int domid); 15.32 +int xi_domain_destroy(unsigned int domid, int force); 15.33 +int xi_domain_getinfo(unsigned int first_domid, 15.34 + unsigned int max_doms, 15.35 + xi_dominfo_t *info); 15.36 + 15.37 +int xi_linux_save(unsigned int domid, const char *state_file, int verbose); 15.38 +int xi_linux_restore(const char *state_file, int verbose); 15.39 +int xi_linux_build(unsigned int domid, 15.40 + const char *image_name, 15.41 + const char *ramdisk_name, 15.42 + const char *cmdline, 15.43 + int verbose); 15.44 + 15.45 +int xi_bvtsched_global_set(unsigned long ctx_allow); 15.46 +int xi_bvtsched_domain_set(unsigned int domid, 15.47 + unsigned long mcuadv, 15.48 + unsigned long warp, 15.49 + unsigned long warpl, 15.50 + unsigned long warpu); 15.51 + 15.52 +typedef struct { 15.53 + unsigned long credit_bytes; 15.54 + unsigned long credit_usec; 15.55 +} xi_vif_sched_params_t; 15.56 + 15.57 +typedef struct { 15.58 + unsigned long long tx_bytes, tx_pkts; 15.59 + unsigned long long rx_bytes, rx_pkts; 15.60 +} xi_vif_stats_t; 15.61 + 15.62 +int xi_vif_scheduler_set(unsigned int domid, 15.63 + unsigned int vifid, 15.64 + xi_vif_sched_params_t *params); 15.65 +int xi_vif_scheduler_get(unsigned int domid, 15.66 + unsigned int vifid, 15.67 + xi_vif_sched_params_t *params); 15.68 +int xi_vif_stats_get(unsigned int domid, 15.69 + unsigned int vifid, 15.70 + xi_vif_stats_t *stats); 15.71 + 15.72 +typedef struct { 15.73 +#define XI_VBDDOM_PROBE_ALL (~0U) 15.74 + unsigned int domid; 15.75 + unsigned short vbdid; 15.76 +#define XI_VBDF_WRITEABLE (1<<0) 15.77 + unsigned long flags; 15.78 + unsigned long nr_sectors; 15.79 +} xi_vbd_t; 15.80 + 15.81 + 15.82 +int xi_vbd_create(unsigned int domid, unsigned short vbdid, int writeable); 15.83 +int xi_vbd_destroy(unsigned int domid, unsigned short vbdid); 15.84 +int xi_vbd_add_extent(unsigned int domid, 15.85 + unsigned short vbdid, 15.86 + unsigned short real_device, 15.87 + unsigned long start_sector, 15.88 + unsigned long nr_sectors); 15.89 +int xi_vbd_delete_extent(unsigned int domid, 15.90 + unsigned short vbdid, 15.91 + unsigned short real_device, 15.92 + unsigned long start_sector, 15.93 + unsigned long nr_sectors); 15.94 +int xi_vbd_probe(unsigned int domid, 15.95 + unsigned short vbdid, 15.96 + unsigned int max_vbds, 15.97 + xi_vbd_t *vbds); 15.98 + 15.99 +int xi_readconsolering(char *str, unsigned int max_chars, int clear); 15.100 + 15.101 + 15.102 +#endif /* __XI_H__ */