ia64/xen-unstable
changeset 13957:9529d667d042
merge with xen-unstable.hg
author | awilliam@xenbuild2.aw |
---|---|
date | Thu Feb 15 14:09:39 2007 -0700 (2007-02-15) |
parents | ac18d251df63 9af0c7e4ff51 |
children | b0aeca575dfb |
files | tools/libxc/xc_dom_powerpc64.c tools/python/xen/xend/FlatDeviceTree.py xen/arch/powerpc/elf32.c xen/arch/powerpc/of_handler/strncpy.c |
line diff
1.1 --- a/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Feb 15 13:13:36 2007 -0700 1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blkback/xenbus.c Thu Feb 15 14:09:39 2007 -0700 1.3 @@ -344,7 +344,7 @@ static void frontend_changed(struct xenb 1.4 switch (frontend_state) { 1.5 case XenbusStateInitialising: 1.6 if (dev->state == XenbusStateClosed) { 1.7 - printk("%s: %s: prepare for reconnect\n", 1.8 + printk(KERN_INFO "%s: %s: prepare for reconnect\n", 1.9 __FUNCTION__, dev->nodename); 1.10 xenbus_switch_state(dev, XenbusStateInitWait); 1.11 } 1.12 @@ -488,7 +488,8 @@ static int connect_ring(struct backend_i 1.13 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 1.14 return -1; 1.15 } 1.16 - printk("blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", 1.17 + printk(KERN_INFO 1.18 + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", 1.19 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 1.20 1.21 /* Map the shared frame, irq etc. */
2.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Feb 15 13:13:36 2007 -0700 2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/xenbus.c Thu Feb 15 14:09:39 2007 -0700 2.3 @@ -272,7 +272,7 @@ static void tap_frontend_changed(struct 2.4 switch (frontend_state) { 2.5 case XenbusStateInitialising: 2.6 if (dev->state == XenbusStateClosed) { 2.7 - printk("%s: %s: prepare for reconnect\n", 2.8 + printk(KERN_INFO "%s: %s: prepare for reconnect\n", 2.9 __FUNCTION__, dev->nodename); 2.10 xenbus_switch_state(dev, XenbusStateInitWait); 2.11 } 2.12 @@ -369,7 +369,8 @@ static int connect_ring(struct backend_i 2.13 xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); 2.14 return -1; 2.15 } 2.16 - printk("blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", 2.17 + printk(KERN_INFO 2.18 + "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", 2.19 ring_ref, evtchn, be->blkif->blk_protocol, protocol); 2.20 2.21 /* Map the shared frame, irq etc. */
3.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Feb 15 13:13:36 2007 -0700 3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Thu Feb 15 14:09:39 2007 -0700 3.3 @@ -100,6 +100,7 @@ static void __shutdown_handler(void *unu 3.4 static void shutdown_handler(struct xenbus_watch *watch, 3.5 const char **vec, unsigned int len) 3.6 { 3.7 + extern void ctrl_alt_del(void); 3.8 char *str; 3.9 struct xenbus_transaction xbt; 3.10 int err; 3.11 @@ -129,7 +130,7 @@ static void shutdown_handler(struct xenb 3.12 if (strcmp(str, "poweroff") == 0) 3.13 shutting_down = SHUTDOWN_POWEROFF; 3.14 else if (strcmp(str, "reboot") == 0) 3.15 - kill_proc(1, SIGINT, 1); /* interrupt init */ 3.16 + ctrl_alt_del(); 3.17 else if (strcmp(str, "suspend") == 0) 3.18 shutting_down = SHUTDOWN_SUSPEND; 3.19 else if (strcmp(str, "halt") == 0)
4.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Feb 15 13:13:36 2007 -0700 4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Feb 15 14:09:39 2007 -0700 4.3 @@ -217,7 +217,7 @@ static void frontend_changed(struct xenb 4.4 switch (frontend_state) { 4.5 case XenbusStateInitialising: 4.6 if (dev->state == XenbusStateClosed) { 4.7 - printk("%s: %s: prepare for reconnect\n", 4.8 + printk(KERN_INFO "%s: %s: prepare for reconnect\n", 4.9 __FUNCTION__, dev->nodename); 4.10 if (be->netif) { 4.11 netif_disconnect(be->netif);
5.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Feb 15 13:13:36 2007 -0700 5.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Feb 15 14:09:39 2007 -0700 5.3 @@ -1505,7 +1505,7 @@ static void netif_release_rx_bufs(struct 5.4 int id, ref; 5.5 5.6 if (np->copying_receiver) { 5.7 - printk("%s: fix me for copying receiver.\n", __FUNCTION__); 5.8 + WPRINTK("%s: fix me for copying receiver.\n", __FUNCTION__); 5.9 return; 5.10 } 5.11 5.12 @@ -1555,8 +1555,8 @@ static void netif_release_rx_bufs(struct 5.13 xfer++; 5.14 } 5.15 5.16 - printk("%s: %d xfer, %d noxfer, %d unused\n", 5.17 - __FUNCTION__, xfer, noxfer, unused); 5.18 + IPRINTK("%s: %d xfer, %d noxfer, %d unused\n", 5.19 + __FUNCTION__, xfer, noxfer, unused); 5.20 5.21 if (xfer) { 5.22 /* Some pages are no longer absent... */
6.1 --- a/tools/libxc/Makefile Thu Feb 15 13:13:36 2007 -0700 6.2 +++ b/tools/libxc/Makefile Thu Feb 15 14:09:39 2007 -0700 6.3 @@ -49,15 +49,8 @@ GUEST_SRCS-y += $(LIBELF_SRCS) 6.4 GUEST_SRCS-y += xc_dom_core.c xc_dom_boot.c 6.5 GUEST_SRCS-y += xc_dom_elfloader.c 6.6 GUEST_SRCS-y += xc_dom_binloader.c 6.7 - 6.8 -ifeq ($(CONFIG_POWERPC),y) 6.9 -# big endian boxes 6.10 -GUEST_SRCS-y += xc_dom_powerpc64.c 6.11 -else 6.12 -# little endian boxes 6.13 GUEST_SRCS-y += xc_dom_x86.c 6.14 GUEST_SRCS-y += xc_dom_ia64.c 6.15 -endif 6.16 6.17 GUEST_SRCS-$(CONFIG_X86) += xc_dom_compat_linux.c 6.18 GUEST_SRCS-$(CONFIG_IA64) += xc_dom_compat_linux.c
7.1 --- a/tools/libxc/powerpc64/Makefile Thu Feb 15 13:13:36 2007 -0700 7.2 +++ b/tools/libxc/powerpc64/Makefile Thu Feb 15 14:09:39 2007 -0700 7.3 @@ -1,4 +1,5 @@ 7.4 GUEST_SRCS-y += powerpc64/flatdevtree.c 7.5 +GUEST_SRCS-y += powerpc64/mk_flatdevtree.c 7.6 GUEST_SRCS-y += powerpc64/xc_linux_build.c 7.7 GUEST_SRCS-y += powerpc64/xc_prose_build.c 7.8 GUEST_SRCS-y += powerpc64/utils.c
8.1 --- a/tools/libxc/powerpc64/flatdevtree.c Thu Feb 15 13:13:36 2007 -0700 8.2 +++ b/tools/libxc/powerpc64/flatdevtree.c Thu Feb 15 14:09:39 2007 -0700 8.3 @@ -14,7 +14,7 @@ 8.4 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 8.5 * 8.6 * Copyright Pantelis Antoniou 2006 8.7 - * Copyright (C) IBM Corporation 2006 8.8 + * Copyright IBM Corporation 2006, 2007 8.9 * 2006 (c) MontaVista, Software, Inc. 8.10 * 8.11 * Authors: Pantelis Antoniou <pantelis@embeddedalley.com> 8.12 @@ -209,7 +209,7 @@ void ft_add_rsvmap(struct ft_cxt *cxt, u 8.13 ((u64 *) cxt->pres)[0] = cpu_to_be64(physaddr); /* phys = 0, size = 0, terminate */ 8.14 ((u64 *) cxt->pres)[1] = cpu_to_be64(size); 8.15 8.16 - cxt->pres += 18; /* advance */ 8.17 + cxt->pres += 16; /* advance two u64s worth */ 8.18 8.19 ((u64 *) cxt->pres)[0] = 0; /* phys = 0, size = 0, terminate */ 8.20 ((u64 *) cxt->pres)[1] = 0; 8.21 @@ -318,6 +318,9 @@ int ft_end_tree(struct ft_cxt *cxt) 8.22 cxt->pstr_begin = cxt->p_begin + cxt->struct_size; 8.23 cxt->pstr = cxt->pstr_begin + cxt->strings_size; 8.24 8.25 + /* mark the size of string structure in bph */ 8.26 + bph->size_dt_strings = cxt->strings_size; 8.27 + 8.28 return 0; 8.29 } 8.30
9.1 --- a/tools/libxc/powerpc64/flatdevtree.h Thu Feb 15 13:13:36 2007 -0700 9.2 +++ b/tools/libxc/powerpc64/flatdevtree.h Thu Feb 15 14:09:39 2007 -0700 9.3 @@ -40,7 +40,7 @@ struct boot_param_header { 9.4 /* version 2 fields below */ 9.5 u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ 9.6 /* version 3 fields below */ 9.7 - u32 dt_strings_size; /* size of the DT strings block */ 9.8 + u32 size_dt_strings; /* size of the DT strings block */ 9.9 }; 9.10 9.11 struct ft_cxt {
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/tools/libxc/powerpc64/mk_flatdevtree.c Thu Feb 15 14:09:39 2007 -0700 10.3 @@ -0,0 +1,650 @@ 10.4 +/* 10.5 + * This program is free software; you can redistribute it and/or modify 10.6 + * it under the terms of the GNU General Public License as published by 10.7 + * the Free Software Foundation; either version 2 of the License, or 10.8 + * (at your option) any later version. 10.9 + * 10.10 + * This program is distributed in the hope that it will be useful, 10.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10.13 + * GNU General Public License for more details. 10.14 + * 10.15 + * You should have received a copy of the GNU General Public License 10.16 + * along with this program; if not, write to the Free Software 10.17 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 10.18 + * 10.19 + * Copyright IBM Corporation 2007 10.20 + * 10.21 + * Authors: Ryan Harper <ryanh@us.ibm.com> 10.22 + */ 10.23 + 10.24 +#include <stdio.h> 10.25 +#include <stdlib.h> 10.26 +#include <string.h> 10.27 +#include <fcntl.h> 10.28 +#include <dirent.h> 10.29 +#include <unistd.h> 10.30 +#include <libgen.h> 10.31 +#include <inttypes.h> 10.32 +#include <math.h> 10.33 +#include <errno.h> 10.34 +#include <sys/types.h> 10.35 +#include <sys/dir.h> 10.36 +#include <sys/stat.h> 10.37 +#include <sys/param.h> 10.38 + 10.39 +#include <xc_private.h> /* for PERROR() */ 10.40 + 10.41 +#include "mk_flatdevtree.h" 10.42 + 10.43 +static uint32_t current_phandle = 0; 10.44 + 10.45 +static uint32_t get_phandle(void) 10.46 +{ 10.47 + return current_phandle++; 10.48 +} 10.49 + 10.50 +static int readfile(const char *fullpath, void *data, int len) 10.51 +{ 10.52 + struct stat st; 10.53 + int saved_errno; 10.54 + int rc = -1; 10.55 + int fd; 10.56 + 10.57 + if ((fd = open(fullpath, O_RDONLY)) == -1) { 10.58 + PERROR("%s: failed to open file %s", __func__, fullpath); 10.59 + return -1; 10.60 + } 10.61 + 10.62 + if ((rc = fstat(fd, &st)) == -1) { 10.63 + PERROR("%s: failed to stat fd %d", __func__, fd); 10.64 + goto error; 10.65 + } 10.66 + 10.67 + if (S_ISREG(st.st_mode)) 10.68 + rc = read(fd, data, len); 10.69 + 10.70 + close(fd); 10.71 + return rc; 10.72 + 10.73 +error: 10.74 + saved_errno = errno; 10.75 + close(fd); 10.76 + errno = saved_errno; 10.77 + return -1; 10.78 +} 10.79 + 10.80 +/* 10.81 + * @property - string to check against the filter list 10.82 + * @filter - NULL terminated list of strings 10.83 + * 10.84 + * compare @property string to each string in @filter 10.85 + * 10.86 + * return 1 if @property matches any filter, otherwise 0 10.87 + * 10.88 + */ 10.89 +static int match(const char *property, const char **filter) 10.90 +{ 10.91 + int i; 10.92 + 10.93 + for (i=0; filter[i] != NULL; i++) { 10.94 + /* compare the filter to property */ 10.95 + if (strncmp(property, filter[i], strlen(filter[i])) == 0) 10.96 + return 1; 10.97 + } 10.98 + 10.99 + return 0; 10.100 +} 10.101 + 10.102 +/* 10.103 + * copy the node at @dirpath filtering out any properties that match in @propfilter 10.104 + */ 10.105 +static int copynode(struct ft_cxt *cxt, const char *dirpath, const char **propfilter) 10.106 +{ 10.107 + struct dirent *tree; 10.108 + struct stat st; 10.109 + DIR *dir; 10.110 + char fullpath[MAX_PATH]; 10.111 + char *bname = NULL; 10.112 + char *basec = NULL; 10.113 + int saved_errno; 10.114 + 10.115 + if ((dir = opendir(dirpath)) == NULL) { 10.116 + PERROR("%s: failed to open dir %s", __func__, dirpath); 10.117 + return -1; 10.118 + } 10.119 + 10.120 + while (1) { 10.121 + if ((tree = readdir(dir)) == NULL) 10.122 + break; /* reached end of directory entries */ 10.123 + 10.124 + /* ignore . and .. */ 10.125 + if (strcmp(tree->d_name,"." ) == 0 || strcmp(tree->d_name,"..") == 0) 10.126 + continue; 10.127 + 10.128 + /* build full path name of the file, for stat() */ 10.129 + if (snprintf(fullpath, sizeof(fullpath), "%s/%s", dirpath, 10.130 + tree->d_name) >= sizeof(fullpath)) { 10.131 + PERROR("%s: failed to build full path", __func__); 10.132 + goto error; 10.133 + } 10.134 + 10.135 + /* stat the entry */ 10.136 + if (stat(fullpath, &st) < 0) { 10.137 + PERROR("%s: failed to stat file %s", __func__, fullpath); 10.138 + goto error; 10.139 + } 10.140 + 10.141 + if (S_ISDIR(st.st_mode)) { 10.142 + /* start a new node for a dir */ 10.143 + ft_begin_node(cxt, tree->d_name); 10.144 + 10.145 + /* copy everything in this dir */ 10.146 + if (copynode(cxt, fullpath, propfilter) < 0) { 10.147 + PERROR("%s: failed to copy node @ %s", __func__, fullpath); 10.148 + goto error; 10.149 + } 10.150 + 10.151 + /* end the node */ 10.152 + ft_end_node(cxt); 10.153 + } 10.154 + /* add files in dir as properties */ 10.155 + else if (S_ISREG(st.st_mode)) { 10.156 + 10.157 + if ((basec = strdup(fullpath)) == NULL) { 10.158 + PERROR("%s: failed to dupe string", __func__); 10.159 + goto error; 10.160 + } 10.161 + 10.162 + if ((bname = basename(basec)) == NULL) { 10.163 + PERROR("%s: basename() failed", __func__); 10.164 + goto error; 10.165 + } 10.166 + 10.167 + /* only add files that don't match the property filter string */ 10.168 + if (!match(bname, propfilter)) { 10.169 + char data[BUFSIZE]; 10.170 + int len; 10.171 + 10.172 + /* snarf the data and push into the property */ 10.173 + if ((len = readfile(fullpath, data, sizeof(data))) < 0) { 10.174 + PERROR("%s: failed to read data from file %s", __func__, 10.175 + fullpath); 10.176 + goto error; 10.177 + } 10.178 + ft_prop(cxt, tree->d_name, data, len); 10.179 + 10.180 + } 10.181 + 10.182 + /* strdup mallocs memory */ 10.183 + if (basec != NULL ) { 10.184 + free(basec); 10.185 + basec = NULL; 10.186 + } 10.187 + 10.188 + } 10.189 + } 10.190 + 10.191 + closedir(dir); 10.192 + return 0; 10.193 + 10.194 +error: 10.195 + saved_errno = errno; 10.196 + 10.197 + /* strdup mallocs memory */ 10.198 + if (basec != NULL ) { 10.199 + free(basec); 10.200 + basec = NULL; 10.201 + } 10.202 + 10.203 + closedir(dir); 10.204 + 10.205 + errno = saved_errno; 10.206 + return -1; 10.207 +} 10.208 + 10.209 +static int find_cpu0(char *cpupath, int len) 10.210 +{ 10.211 + const char path[] = "/proc/device-tree/cpus"; 10.212 + const char device[] = "device_type"; 10.213 + const char dev_cpu[] = "cpu"; 10.214 + const char reg[] = "reg"; 10.215 + char data[sizeof(dev_cpu)]; 10.216 + char prop[MAX_PATH]; 10.217 + char node[MAX_PATH]; 10.218 + struct dirent *tree; 10.219 + struct stat st; 10.220 + DIR* dir; 10.221 + int saved_errno; 10.222 + int found = 0; 10.223 + 10.224 + if ((dir = opendir(path)) == NULL) { 10.225 + PERROR("%s: failed to open directory %s", __func__, path); 10.226 + return -1; 10.227 + } 10.228 + 10.229 + while (!found) { 10.230 + 10.231 + if ((tree = readdir(dir)) == NULL) 10.232 + break; /* reached end of directory entries */ 10.233 + 10.234 + /* ignore ., .. */ 10.235 + if (strcmp(tree->d_name,"." ) == 0 || strcmp(tree->d_name,"..") == 0) 10.236 + continue; 10.237 + 10.238 + /* build full path name of the file, for stat() */ 10.239 + if (snprintf(node, sizeof(node), "%s/%s", path, 10.240 + tree->d_name) >= sizeof(node)) { 10.241 + PERROR("%s: failed to concat strings", __func__); 10.242 + goto error; 10.243 + } 10.244 + 10.245 + /* stat the entry */ 10.246 + if (stat(node, &st) < 0) { 10.247 + PERROR("%s: failed to stat file %s", __func__, node); 10.248 + /* something funny happen in /proc/device-tree, but march onward */ 10.249 + continue; 10.250 + } 10.251 + 10.252 + /* for each dir, check the device_type property until we find 'cpu'*/ 10.253 + if (S_ISDIR(st.st_mode)) { 10.254 + 10.255 + /* construct path to device_type */ 10.256 + if (snprintf(prop, sizeof(prop), "%s/%s", node, 10.257 + device) >= sizeof(prop)) { 10.258 + PERROR("%s: failed to concat strings", __func__); 10.259 + goto error; 10.260 + } 10.261 + 10.262 + /* read device_type into buffer */ 10.263 + if ((readfile(prop, data, sizeof(data))) < 0) { 10.264 + PERROR("%s: failed to read data from file %s", __func__, prop); 10.265 + goto error; 10.266 + } 10.267 + 10.268 + /* if the device_type is 'cpu', and reg is 0 10.269 + * return the path where we found it */ 10.270 + if (strcmp(data, "cpu") == 0) { 10.271 + 10.272 + /* construct path to reg */ 10.273 + if (snprintf(prop, sizeof(prop), "%s/%s", node, 10.274 + reg) >= sizeof(prop)) { 10.275 + PERROR("%s: failed to concat strings", __func__); 10.276 + goto error; 10.277 + } 10.278 + 10.279 + /* using data buffer since reg and device_type values have same size */ 10.280 + if ((readfile(prop, data, sizeof(data))) < 0) { 10.281 + PERROR("%s: failed to read data from file %s", __func__, prop); 10.282 + goto error; 10.283 + } 10.284 + 10.285 + /* now check property "reg" for value 0 */ 10.286 + if ((u32)*data == 0) { 10.287 + if (snprintf(cpupath, len, "%s", node) >= len) { 10.288 + PERROR("%s: failed to copy cpupath", __func__); 10.289 + goto error; 10.290 + } 10.291 + found = 1; 10.292 + } 10.293 + } 10.294 + } 10.295 + } 10.296 + 10.297 + closedir(dir); 10.298 + return found; 10.299 + 10.300 +error: 10.301 + saved_errno = errno; 10.302 + closedir(dir); 10.303 + errno = saved_errno; 10.304 + return -1; 10.305 +} 10.306 + 10.307 +void free_devtree(struct ft_cxt *root) 10.308 +{ 10.309 + if ((root != NULL) && root->bph != NULL) { 10.310 + free(root->bph); 10.311 + root->bph = NULL; 10.312 + } 10.313 +} 10.314 + 10.315 +int make_devtree(struct ft_cxt *root, 10.316 + uint32_t domid, 10.317 + uint32_t mem_mb, 10.318 + unsigned long rma_bytes, 10.319 + unsigned long shadow_mb, 10.320 + unsigned long initrd_base, 10.321 + unsigned long initrd_len, 10.322 + const char *bootargs, 10.323 + uint64_t shared_info_paddr, 10.324 + unsigned long console_evtchn, 10.325 + uint64_t console_paddr, 10.326 + unsigned long store_evtchn, 10.327 + uint64_t store_paddr) 10.328 +{ 10.329 + struct boot_param_header *bph = NULL; 10.330 + uint64_t val[2]; 10.331 + uint32_t val32[2]; 10.332 + unsigned long remaining; 10.333 + unsigned long initrd_end = initrd_base + initrd_len; 10.334 + int64_t shadow_mb_log; 10.335 + uint64_t pft_size; 10.336 + char cpupath[MAX_PATH]; 10.337 + const char *propfilter[] = { "ibm", "linux,", NULL }; 10.338 + char *cpupath_copy = NULL; 10.339 + char *cpuname = NULL; 10.340 + int saved_errno; 10.341 + int dtb_fd = -1; 10.342 + uint32_t cpu0_phandle = get_phandle(); 10.343 + uint32_t xen_phandle = get_phandle(); 10.344 + uint32_t rma_phandle = get_phandle(); 10.345 + 10.346 + /* initialize bph to prevent double free on error path */ 10.347 + root->bph = NULL; 10.348 + 10.349 + /* carve out space for bph */ 10.350 + if ((bph = (struct boot_param_header *)malloc(BPH_SIZE)) == NULL) { 10.351 + PERROR("%s: Failed to malloc bph buffer size", __func__); 10.352 + goto error; 10.353 + } 10.354 + 10.355 + /* NB: struct ft_cxt root defined at top of file */ 10.356 + /* root = Tree() */ 10.357 + ft_begin(root, bph, BPH_SIZE); 10.358 + 10.359 + /* you MUST set reservations BEFORE _starting_the_tree_ */ 10.360 + 10.361 + /* reserve shared_info_t page */ 10.362 + if (shared_info_paddr) { 10.363 + val[0] = cpu_to_be64((u64) shared_info_paddr); 10.364 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.365 + ft_add_rsvmap(root, val[0], val[1]); 10.366 + } 10.367 + 10.368 + /* reserve console page for domU */ 10.369 + if (console_paddr) { 10.370 + val[0] = cpu_to_be64((u64) console_paddr); 10.371 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.372 + ft_add_rsvmap(root, val[0], val[1]); 10.373 + } 10.374 + 10.375 + /* reserve xen store page for domU */ 10.376 + if (store_paddr) { 10.377 + val[0] = cpu_to_be64((u64) store_paddr); 10.378 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.379 + ft_add_rsvmap(root, val[0], val[1]); 10.380 + } 10.381 + 10.382 + /* reserve space for initrd if needed */ 10.383 + if ( initrd_len > 0 ) { 10.384 + val[0] = cpu_to_be64((u64) initrd_base); 10.385 + val[1] = cpu_to_be64((u64) initrd_len); 10.386 + ft_add_rsvmap(root, val[0], val[1]); 10.387 + } 10.388 + 10.389 + /* NB: ft_add_rsvmap() already terminates with a NULL reservation for us */ 10.390 + 10.391 + /* done with reservations, _starting_the_tree_ */ 10.392 + ft_begin_tree(root); 10.393 + 10.394 + /* make root node */ 10.395 + ft_begin_node(root, ""); 10.396 + 10.397 + /* root.addprop('device_type', 'chrp-but-not-really\0') */ 10.398 + ft_prop_str(root, "device_type", "chrp-but-not-really"); 10.399 + 10.400 + /* root.addprop('#size-cells', 2) */ 10.401 + ft_prop_int(root, "#size-cells", 2); 10.402 + 10.403 + /* root.addprop('#address-cells', 2) */ 10.404 + ft_prop_int(root, "#address-cells", 2); 10.405 + 10.406 + /* root.addprop('model', 'Momentum,Maple-D\0') */ 10.407 + ft_prop_str(root, "model", "Momentum,Maple-D"); 10.408 + 10.409 + /* root.addprop('compatible', 'Momentum,Maple\0') */ 10.410 + ft_prop_str(root, "compatible", "Momentum,Maple"); 10.411 + 10.412 + /* start chosen node */ 10.413 + ft_begin_node(root, "chosen"); 10.414 + 10.415 + /* chosen.addprop('cpu', cpu0.get_phandle()) */ 10.416 + ft_prop_int(root, "cpu", cpu0_phandle); 10.417 + 10.418 + /* chosen.addprop('rma', rma.get_phandle()) */ 10.419 + ft_prop_int(root, "memory", rma_phandle); 10.420 + 10.421 + /* chosen.addprop('linux,stdout-path', '/xen/console\0') */ 10.422 + ft_prop_str(root, "linux,stdout-path", "/xen/console"); 10.423 + 10.424 + /* chosen.addprop('interrupt-controller, xen.get_phandle()) */ 10.425 + ft_prop_int(root, "interrupt-controller", xen_phandle); 10.426 + 10.427 + /* chosen.addprop('bootargs', imghandler.cmdline + '\0') */ 10.428 + if ( bootargs != NULL ) 10.429 + ft_prop_str(root, "bootargs", bootargs); 10.430 + 10.431 + /* mark where the initrd is, if present */ 10.432 + if ( initrd_len > 0 ) { 10.433 + val[0] = cpu_to_be64((u64) initrd_base); 10.434 + val[1] = cpu_to_be64((u64) initrd_end); 10.435 + ft_prop(root, "linux,initrd-start", &(val[0]), sizeof(val[0])); 10.436 + ft_prop(root, "linux,initrd-end", &(val[1]), sizeof(val[1])); 10.437 + } 10.438 + 10.439 + /* end chosen node */ 10.440 + ft_end_node(root); 10.441 + 10.442 + /* xen = root.addnode('xen') */ 10.443 + ft_begin_node(root, "xen"); 10.444 + 10.445 + /* xen.addprop('version', 'Xen-3.0-unstable\0') */ 10.446 + ft_prop_str(root, "compatible", "Xen-3.0-unstable"); 10.447 + 10.448 + /* xen.addprop('reg', long(imghandler.vm.domid), long(0)) */ 10.449 + val[0] = cpu_to_be64((u64) domid); 10.450 + val[1] = cpu_to_be64((u64) 0); 10.451 + ft_prop(root, "reg", val, sizeof(val)); 10.452 + 10.453 + /* point to shared_info_t page base addr */ 10.454 + val[0] = cpu_to_be64((u64) shared_info_paddr); 10.455 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.456 + ft_prop(root, "shared-info", val, sizeof(val)); 10.457 + 10.458 + /* xen.addprop('domain-name', imghandler.vm.getName() + '\0') */ 10.459 + /* libxc doesn't know the domain name, that is purely a xend thing */ 10.460 + /* ft_prop_str(root, "domain-name", domain_name); */ 10.461 + 10.462 + /* add xen/linux,phandle for chosen/interrupt-controller */ 10.463 + ft_prop_int(root, "linux,phandle", xen_phandle); 10.464 + 10.465 + if (console_paddr != 0) { 10.466 + /* xencons = xen.addnode('console') */ 10.467 + ft_begin_node(root, "console"); 10.468 + 10.469 + /* console_paddr */ 10.470 + val[0] = cpu_to_be64((u64) console_paddr); 10.471 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.472 + ft_prop(root, "reg", val, sizeof(val)); 10.473 + 10.474 + /* xencons.addprop('interrupts', console_evtchn, 0) */ 10.475 + val32[0] = cpu_to_be32((u32) console_evtchn); 10.476 + val32[1] = cpu_to_be32((u32) 0); 10.477 + ft_prop(root, "interrupts", val32, sizeof(val32)); 10.478 + 10.479 + /* end of console */ 10.480 + ft_end_node(root); 10.481 + } 10.482 + 10.483 + if (store_paddr != 0) { 10.484 + /* start store node */ 10.485 + ft_begin_node(root, "store"); 10.486 + 10.487 + /* store paddr */ 10.488 + val[0] = cpu_to_be64((u64) store_paddr); 10.489 + val[1] = cpu_to_be64((u64) PAGE_SIZE); 10.490 + ft_prop(root, "reg", val, sizeof(val)); 10.491 + 10.492 + /* store event channel */ 10.493 + val32[0] = cpu_to_be32((u32) store_evtchn); 10.494 + val32[1] = cpu_to_be32((u32) 0); 10.495 + ft_prop(root, "interrupts", val32, sizeof(val32)); 10.496 + 10.497 + /* end of store */ 10.498 + ft_end_node(root); 10.499 + } 10.500 + 10.501 + /* end of xen node */ 10.502 + ft_end_node(root); 10.503 + 10.504 + /* rma = root.addnode('memory@0') */ 10.505 + ft_begin_node(root, "memory@0"); 10.506 + 10.507 + /* rma.addprop('reg', long(0), long(rma_bytes)) */ 10.508 + val[0] = cpu_to_be64((u64) 0); 10.509 + val[1] = cpu_to_be64((u64) rma_bytes); 10.510 + ft_prop(root, "reg", val, sizeof(val)); 10.511 + 10.512 + /* rma.addprop('device_type', 'memory\0') */ 10.513 + ft_prop_str(root, "device_type", "memory"); 10.514 + 10.515 + /* add linux,phandle for chosen/rma node */ 10.516 + ft_prop_int(root, "linux,phandle", rma_phandle); 10.517 + 10.518 + /* end of memory@0 */ 10.519 + ft_end_node(root); 10.520 + 10.521 + /* calculate remaining bytes from total - rma size */ 10.522 + remaining = (mem_mb * 1024 * 1024) - rma_bytes; 10.523 + 10.524 + /* memory@<rma_bytes> is all remaining memory after RMA */ 10.525 + if (remaining > 0) 10.526 + { 10.527 + char mem[MAX_PATH]; 10.528 + 10.529 + if (snprintf(mem, sizeof(mem), "memory@%lx", 10.530 + rma_bytes) >= sizeof(mem)) { 10.531 + PERROR("%s: failed to build memory string", __func__); 10.532 + goto error; 10.533 + } 10.534 + 10.535 + /* memory@<rma_bytes> is all remaining memory after RMA */ 10.536 + ft_begin_node(root, mem); 10.537 + 10.538 + /* mem.addprop('reg', long(rma_bytes), long(remaining)) */ 10.539 + val[0] = cpu_to_be64((u64) rma_bytes); 10.540 + val[1] = cpu_to_be64((u64) remaining); 10.541 + ft_prop(root, "reg", val, sizeof(val)); 10.542 + 10.543 + /* mem.addprop('device_type', 'memory\0') */ 10.544 + ft_prop_str(root, "device_type", "memory"); 10.545 + 10.546 + /* end memory@<rma_bytes> node */ 10.547 + ft_end_node(root); 10.548 + } 10.549 + 10.550 + /* add CPU nodes */ 10.551 + /* cpus = root.addnode('cpus') */ 10.552 + ft_begin_node(root, "cpus"); 10.553 + 10.554 + /* cpus.addprop('smp-enabled') */ 10.555 + ft_prop(root, "smp-enabled", NULL, 0); 10.556 + 10.557 + /* cpus.addprop('#size-cells', 0) */ 10.558 + ft_prop_int(root, "#size-cells", 0); 10.559 + 10.560 + /* cpus.addprop('#address-cells', 1) */ 10.561 + ft_prop_int(root, "#address-cells", 1); 10.562 + 10.563 + /* 10.564 + * Copy all properties the system firmware gave us from a 10.565 + * CPU node in the device tree. 10.566 + */ 10.567 + if (find_cpu0(cpupath, sizeof(cpupath)) <= 0) { 10.568 + PERROR("%s: failed find cpu0 node in host devtree", __func__); 10.569 + goto error; 10.570 + } 10.571 + 10.572 + /* get the basename from path to cpu device */ 10.573 + if ((cpupath_copy = strdup(cpupath)) == NULL) { 10.574 + PERROR("%s: failed to dupe string", __func__); 10.575 + goto error; 10.576 + } 10.577 + if ((cpuname = basename(cpupath_copy)) == NULL) { 10.578 + PERROR("%s: basename() failed", __func__); 10.579 + goto error; 10.580 + } 10.581 + 10.582 + /* start node for the cpu */ 10.583 + ft_begin_node(root, cpuname); 10.584 + 10.585 + /* strdup() mallocs memory */ 10.586 + if ( cpupath_copy != NULL ) { 10.587 + free(cpupath_copy); 10.588 + cpupath_copy = NULL; 10.589 + } 10.590 + 10.591 + /* copy over most properties from host tree for cpu */ 10.592 + if (copynode(root, cpupath, propfilter) < 0) { 10.593 + PERROR("%s: failed to copy node", __func__); 10.594 + goto error; 10.595 + } 10.596 + 10.597 + /* calculate the pft-size */ 10.598 + shadow_mb_log = (int)log2((double)shadow_mb); 10.599 + pft_size = shadow_mb_log + 20; 10.600 + 10.601 + val32[0] = cpu_to_be32((u32) 0); 10.602 + val32[1] = cpu_to_be32((u32) pft_size); 10.603 + ft_prop(root, "ibm,pft-size", val32, sizeof(val32)); 10.604 + 10.605 + /* make phandle for cpu0 */ 10.606 + ft_prop_int(root, "linux,phandle", cpu0_phandle); 10.607 + 10.608 + /* end <cpuname> node */ 10.609 + ft_end_node(root); 10.610 + 10.611 + /* end cpus node */ 10.612 + ft_end_node(root); 10.613 + 10.614 + /* end root node */ 10.615 + ft_end_node(root); 10.616 + 10.617 + /* end of the tree */ 10.618 + if (ft_end_tree(root) != 0) { 10.619 + PERROR("%s: failed to end tree", __func__); 10.620 + goto error; 10.621 + } 10.622 + 10.623 + /* write a copy of the tree to a file */ 10.624 + if ((dtb_fd = open(DTB_FILE , O_RDWR)) == -1) { 10.625 + PERROR("%s: failed to open file %s", __func__, DTB_FILE); 10.626 + goto error; 10.627 + } 10.628 + 10.629 + if (write(dtb_fd, (const void *)bph, bph->totalsize) != bph->totalsize) { 10.630 + PERROR("%s: failed to write blob to file", __func__); 10.631 + goto error; 10.632 + } 10.633 + 10.634 + return 0; 10.635 + 10.636 +error: 10.637 + saved_errno = errno; 10.638 + 10.639 + /* strdup() mallocs memory */ 10.640 + if ( cpupath_copy != NULL ) { 10.641 + free(cpupath_copy); 10.642 + cpupath_copy = NULL; 10.643 + } 10.644 + 10.645 + /* free bph buffer */ 10.646 + free_devtree(root); 10.647 + 10.648 + if (dtb_fd) 10.649 + close(dtb_fd); 10.650 + 10.651 + errno = saved_errno; 10.652 + return -1; 10.653 +}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/tools/libxc/powerpc64/mk_flatdevtree.h Thu Feb 15 14:09:39 2007 -0700 11.3 @@ -0,0 +1,47 @@ 11.4 +/* 11.5 + * This program is free software; you can redistribute it and/or modify 11.6 + * it under the terms of the GNU General Public License as published by 11.7 + * the Free Software Foundation; either version 2 of the License, or 11.8 + * (at your option) any later version. 11.9 + * 11.10 + * This program is distributed in the hope that it will be useful, 11.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11.13 + * GNU General Public License for more details. 11.14 + * 11.15 + * You should have received a copy of the GNU General Public License 11.16 + * along with this program; if not, write to the Free Software 11.17 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 11.18 + * 11.19 + * Copyright IBM Corporation 2007 11.20 + * 11.21 + * Authors: Ryan Harper <ryanh@us.ibm.com> 11.22 + */ 11.23 + 11.24 +#ifndef MK_FLATDEVTREE_H 11.25 +#define MK_FLATDEVTREE_H 11.26 + 11.27 +#include "flatdevtree_env.h" 11.28 +#include "flatdevtree.h" 11.29 + 11.30 +extern void free_devtree(struct ft_cxt *root); 11.31 +extern int make_devtree(struct ft_cxt *root, 11.32 + uint32_t domid, 11.33 + uint32_t mem_mb, 11.34 + unsigned long rma_bytes, 11.35 + unsigned long shadow_mb, 11.36 + unsigned long initrd_base, 11.37 + unsigned long initrd_len, 11.38 + const char *bootargs, 11.39 + uint64_t shared_info_paddr, 11.40 + unsigned long console_evtchn, 11.41 + uint64_t console_paddr, 11.42 + unsigned long store_evtchn, 11.43 + uint64_t store_paddr); 11.44 + 11.45 +#define MAX_PATH 200 11.46 +#define BUFSIZE 1024 11.47 +#define BPH_SIZE 16*1024 11.48 +#define DTB_FILE "/tmp/domU.dtb" 11.49 + 11.50 +#endif /* MK_FLATDEVTREE_H */
12.1 --- a/tools/libxc/powerpc64/utils.c Thu Feb 15 13:13:36 2007 -0700 12.2 +++ b/tools/libxc/powerpc64/utils.c Thu Feb 15 14:09:39 2007 -0700 12.3 @@ -13,7 +13,7 @@ 12.4 * along with this program; if not, write to the Free Software 12.5 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 12.6 * 12.7 - * Copyright (C) IBM Corporation 2006 12.8 + * Copyright IBM Corporation 2006, 2007 12.9 * 12.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 12.11 * Jimi Xenidis <jimix@watson.ibm.com> 12.12 @@ -37,29 +37,6 @@ 12.13 #include "flatdevtree.h" 12.14 #include "utils.h" 12.15 12.16 -unsigned long get_rma_pages(void *devtree) 12.17 -{ 12.18 - void *rma; 12.19 - uint64_t rma_reg[2]; 12.20 - int rc; 12.21 - 12.22 - rma = ft_find_node(devtree, "/memory@0"); 12.23 - if (rma == NULL) { 12.24 - DPRINTF("couldn't find /memory@0\n"); 12.25 - return 0; 12.26 - } 12.27 - rc = ft_get_prop(devtree, rma, "reg", rma_reg, sizeof(rma_reg)); 12.28 - if (rc < 0) { 12.29 - DPRINTF("couldn't get /memory@0/reg\n"); 12.30 - return 0; 12.31 - } 12.32 - if (rma_reg[0] != 0) { 12.33 - DPRINTF("RMA did not start at 0\n"); 12.34 - return 0; 12.35 - } 12.36 - return rma_reg[1] >> PAGE_SHIFT; 12.37 -} 12.38 - 12.39 int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array, 12.40 unsigned long nr_pages) 12.41 {
13.1 --- a/tools/libxc/powerpc64/utils.h Thu Feb 15 13:13:36 2007 -0700 13.2 +++ b/tools/libxc/powerpc64/utils.h Thu Feb 15 14:09:39 2007 -0700 13.3 @@ -13,13 +13,12 @@ 13.4 * along with this program; if not, write to the Free Software 13.5 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 13.6 * 13.7 - * Copyright (C) IBM Corporation 2006 13.8 + * Copyright IBM Corporation 2006, 2007 13.9 * 13.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 13.11 * Jimi Xenidis <jimix@watson.ibm.com> 13.12 */ 13.13 13.14 -extern unsigned long get_rma_pages(void *devtree); 13.15 extern int get_rma_page_array(int xc_handle, int domid, xen_pfn_t **page_array, 13.16 unsigned long nr_pages); 13.17 extern int install_image(int xc_handle, int domid, xen_pfn_t *page_array,
14.1 --- a/tools/libxc/powerpc64/xc_linux_build.c Thu Feb 15 13:13:36 2007 -0700 14.2 +++ b/tools/libxc/powerpc64/xc_linux_build.c Thu Feb 15 14:09:39 2007 -0700 14.3 @@ -13,9 +13,10 @@ 14.4 * along with this program; if not, write to the Free Software 14.5 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 14.6 * 14.7 - * Copyright (C) IBM Corporation 2006 14.8 + * Copyright IBM Corporation 2006, 2007 14.9 * 14.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 14.11 + * Ryan Harper <ryanh@us.ibm.com> 14.12 */ 14.13 14.14 #include <stdio.h> 14.15 @@ -32,10 +33,12 @@ 14.16 #include <xc_private.h> 14.17 #include <xg_private.h> 14.18 #include <xenctrl.h> 14.19 +#include <xen/arch-powerpc.h> 14.20 14.21 #include "flatdevtree_env.h" 14.22 #include "flatdevtree.h" 14.23 #include "utils.h" 14.24 +#include "mk_flatdevtree.h" 14.25 14.26 #define INITRD_ADDR (24UL << 20) 14.27 #define DEVTREE_ADDR (16UL << 20) 14.28 @@ -78,85 +81,6 @@ static int init_boot_vcpu( 14.29 return rc; 14.30 } 14.31 14.32 -static int load_devtree( 14.33 - int xc_handle, 14.34 - int domid, 14.35 - xen_pfn_t *page_array, 14.36 - void *devtree, 14.37 - unsigned long devtree_addr, 14.38 - uint64_t initrd_base, 14.39 - unsigned long initrd_len, 14.40 - start_info_t *start_info __attribute__((unused)), 14.41 - unsigned long start_info_addr) 14.42 -{ 14.43 - uint32_t si[4] = {0, start_info_addr, 0, 0x1000}; 14.44 - struct boot_param_header *header; 14.45 - void *chosen; 14.46 - void *xen; 14.47 - uint64_t initrd_end = initrd_base + initrd_len; 14.48 - unsigned int devtree_size; 14.49 - int rc = 0; 14.50 - 14.51 - DPRINTF("adding initrd props\n"); 14.52 - 14.53 - chosen = ft_find_node(devtree, "/chosen"); 14.54 - if (chosen == NULL) { 14.55 - DPRINTF("couldn't find /chosen\n"); 14.56 - return -1; 14.57 - } 14.58 - 14.59 - xen = ft_find_node(devtree, "/xen"); 14.60 - if (xen == NULL) { 14.61 - DPRINTF("couldn't find /xen\n"); 14.62 - return -1; 14.63 - } 14.64 - 14.65 - /* initrd-start */ 14.66 - rc = ft_set_prop(&devtree, chosen, "linux,initrd-start", 14.67 - &initrd_base, sizeof(initrd_base)); 14.68 - if (rc < 0) { 14.69 - DPRINTF("couldn't set /chosen/linux,initrd-start\n"); 14.70 - return rc; 14.71 - } 14.72 - 14.73 - /* initrd-end */ 14.74 - rc = ft_set_prop(&devtree, chosen, "linux,initrd-end", 14.75 - &initrd_end, sizeof(initrd_end)); 14.76 - if (rc < 0) { 14.77 - DPRINTF("couldn't set /chosen/linux,initrd-end\n"); 14.78 - return rc; 14.79 - } 14.80 - 14.81 - rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len); 14.82 - if (rc < 0) { 14.83 - DPRINTF("couldn't set initrd reservation\n"); 14.84 - return ~0UL; 14.85 - } 14.86 - 14.87 - /* start-info (XXX being removed soon) */ 14.88 - rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si)); 14.89 - if (rc < 0) { 14.90 - DPRINTF("couldn't set /xen/start-info\n"); 14.91 - return rc; 14.92 - } 14.93 - 14.94 - header = devtree; 14.95 - devtree_size = header->totalsize; 14.96 - { 14.97 - static const char dtb[] = "/tmp/xc_domU.dtb"; 14.98 - int dfd = creat(dtb, 0666); 14.99 - if (dfd != -1) { 14.100 - write(dfd, devtree, devtree_size); 14.101 - close(dfd); 14.102 - } else 14.103 - DPRINTF("could not open(\"%s\")\n", dtb); 14.104 - } 14.105 - 14.106 - DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, devtree_size); 14.107 - return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR, 14.108 - devtree_size); 14.109 -} 14.110 - 14.111 static int load_initrd( 14.112 int xc_handle, 14.113 int domid, 14.114 @@ -186,46 +110,46 @@ out: 14.115 return rc; 14.116 } 14.117 14.118 -static unsigned long create_start_info( 14.119 - void *devtree, start_info_t *start_info, 14.120 - unsigned int console_evtchn, unsigned int store_evtchn, 14.121 - unsigned long nr_pages, unsigned long rma_pages) 14.122 -{ 14.123 - unsigned long start_info_addr; 14.124 - uint64_t rma_top; 14.125 - int rc; 14.126 - 14.127 - memset(start_info, 0, sizeof(*start_info)); 14.128 - snprintf(start_info->magic, sizeof(start_info->magic), 14.129 - "xen-%d.%d-powerpc64HV", 3, 0); 14.130 - 14.131 - rma_top = rma_pages << PAGE_SHIFT; 14.132 - DPRINTF("RMA top = 0x%"PRIX64"\n", rma_top); 14.133 - 14.134 - start_info->nr_pages = nr_pages; 14.135 - start_info->shared_info = rma_top - PAGE_SIZE; 14.136 - start_info->store_mfn = (rma_top >> PAGE_SHIFT) - 2; 14.137 - start_info->store_evtchn = store_evtchn; 14.138 - start_info->console.domU.mfn = (rma_top >> PAGE_SHIFT) - 3; 14.139 - start_info->console.domU.evtchn = console_evtchn; 14.140 - start_info_addr = rma_top - 4*PAGE_SIZE; 14.141 - 14.142 - rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE); 14.143 - if (rc < 0) { 14.144 - DPRINTF("couldn't set start_info reservation\n"); 14.145 - return ~0UL; 14.146 - } 14.147 - 14.148 - 14.149 - return start_info_addr; 14.150 -} 14.151 - 14.152 static void free_page_array(xen_pfn_t *page_array) 14.153 { 14.154 free(page_array); 14.155 } 14.156 14.157 +static int check_memory_config(int rma_log, unsigned int mem_mb) 14.158 +{ 14.159 + u64 mem_kb = (mem_mb << 10); 14.160 + u64 rma_kb = (1 << rma_log) >> 10; 14.161 14.162 + switch(rma_log) 14.163 + { 14.164 + case 26: 14.165 + case 27: 14.166 + case 28: 14.167 + case 30: 14.168 + case 34: 14.169 + case 38: 14.170 + if (mem_kb < rma_kb) { 14.171 + DPRINTF("Domain memory must be at least %dMB\n", 14.172 + (1 << rma_log)>>20); 14.173 + break; 14.174 + } 14.175 + 14.176 + if (mem_kb % (16 << 10)) { 14.177 + DPRINTF("Domain memory %dMB must be a multiple of 16MB\n", 14.178 + mem_mb); 14.179 + 14.180 + break; 14.181 + } 14.182 + 14.183 + /* rma_log and mem_mb OK */ 14.184 + return 0; 14.185 + 14.186 + default: 14.187 + DPRINTF("Invalid rma_log (%d)\n", rma_log); 14.188 + } 14.189 + 14.190 + return 1; 14.191 +} 14.192 14.193 int xc_linux_build(int xc_handle, 14.194 uint32_t domid, 14.195 @@ -238,10 +162,8 @@ int xc_linux_build(int xc_handle, 14.196 unsigned int store_evtchn, 14.197 unsigned long *store_mfn, 14.198 unsigned int console_evtchn, 14.199 - unsigned long *console_mfn, 14.200 - void *devtree) 14.201 + unsigned long *console_mfn) 14.202 { 14.203 - start_info_t start_info; 14.204 struct domain_setup_info dsi; 14.205 xen_pfn_t *page_array = NULL; 14.206 unsigned long nr_pages; 14.207 @@ -249,19 +171,59 @@ int xc_linux_build(int xc_handle, 14.208 unsigned long kern_addr; 14.209 unsigned long initrd_base = 0; 14.210 unsigned long initrd_len = 0; 14.211 - unsigned long start_info_addr; 14.212 unsigned long rma_pages; 14.213 + unsigned long shadow_mb; 14.214 + u64 shared_info_paddr; 14.215 + u64 store_paddr; 14.216 + u64 console_paddr; 14.217 + u32 remaining_kb; 14.218 + u32 extent_order; 14.219 + u64 nr_extents; 14.220 + int rma_log = 26; /* 64MB RMA */ 14.221 int rc = 0; 14.222 + int op; 14.223 + struct ft_cxt devtree; 14.224 14.225 DPRINTF("%s\n", __func__); 14.226 14.227 nr_pages = mem_mb << (20 - PAGE_SHIFT); 14.228 DPRINTF("nr_pages 0x%lx\n", nr_pages); 14.229 14.230 - rma_pages = get_rma_pages(devtree); 14.231 + rma_pages = (1 << rma_log) >> PAGE_SHIFT; 14.232 if (rma_pages == 0) { 14.233 - rc = -1; 14.234 - goto out; 14.235 + rc = -1; 14.236 + goto out; 14.237 + } 14.238 + 14.239 + /* validate rma_log and domain memory config */ 14.240 + if (check_memory_config(rma_log, mem_mb)) { 14.241 + rc = -1; 14.242 + goto out; 14.243 + } 14.244 + 14.245 + /* alloc RMA */ 14.246 + if (xc_alloc_real_mode_area(xc_handle, domid, rma_log)) { 14.247 + rc = -1; 14.248 + goto out; 14.249 + } 14.250 + 14.251 + /* subtract already allocated RMA to determine remaining KB to alloc */ 14.252 + remaining_kb = (nr_pages - rma_pages) * (PAGE_SIZE / 1024); 14.253 + DPRINTF("totalmem - RMA = %dKB\n", remaining_kb); 14.254 + 14.255 + /* to allocate in 16MB chunks, we need to determine the order of 14.256 + * the number of PAGE_SIZE pages contained in 16MB. */ 14.257 + extent_order = 24 - 12; /* extent_order = log2((1 << 24) - (1 << 12)) */ 14.258 + nr_extents = (remaining_kb / (PAGE_SIZE/1024)) >> extent_order; 14.259 + DPRINTF("allocating memory in %llu chunks of %luMB\n", nr_extents, 14.260 + (((1 << extent_order) >> 10) * PAGE_SIZE) >> 10); 14.261 + 14.262 + /* now allocate the remaining memory as large-order allocations */ 14.263 + DPRINTF("increase_reservation(%u, %llu, %u)\n", domid, nr_extents, extent_order); 14.264 + if (xc_domain_memory_increase_reservation(xc_handle, domid, nr_extents, 14.265 + extent_order, 0, NULL)) { 14.266 + rc = -1; 14.267 + goto out; 14.268 } 14.269 14.270 if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) { 14.271 @@ -285,27 +247,46 @@ int xc_linux_build(int xc_handle, 14.272 } 14.273 } 14.274 14.275 - /* start_info stuff: about to be removed */ 14.276 - start_info_addr = create_start_info(devtree, &start_info, console_evtchn, 14.277 - store_evtchn, nr_pages, rma_pages); 14.278 - *console_mfn = page_array[start_info.console.domU.mfn]; 14.279 - *store_mfn = page_array[start_info.store_mfn]; 14.280 - if (install_image(xc_handle, domid, page_array, &start_info, 14.281 - start_info_addr, sizeof(start_info_t))) { 14.282 + /* fetch the current shadow_memory value for this domain */ 14.283 + op = XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION; 14.284 + if (xc_shadow_control(xc_handle, domid, op, NULL, 0, 14.285 + &shadow_mb, 0, NULL) < 0 ) { 14.286 rc = -1; 14.287 goto out; 14.288 } 14.289 14.290 - if (devtree) { 14.291 - DPRINTF("loading flattened device tree\n"); 14.292 - devtree_addr = DEVTREE_ADDR; 14.293 - if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr, 14.294 - initrd_base, initrd_len, &start_info, 14.295 - start_info_addr)) { 14.296 - DPRINTF("couldn't load flattened device tree.\n"); 14.297 - rc = -1; 14.298 - goto out; 14.299 - } 14.300 + /* determine shared_info, console, and store paddr */ 14.301 + shared_info_paddr = (rma_pages << PAGE_SHIFT) - 14.302 + (RMA_SHARED_INFO * PAGE_SIZE); 14.303 + console_paddr = (rma_pages << PAGE_SHIFT) - (RMA_CONSOLE * PAGE_SIZE); 14.304 + store_paddr = (rma_pages << PAGE_SHIFT) - (RMA_STORE * PAGE_SIZE); 14.305 + 14.306 + /* map paddrs to mfns */ 14.307 + *store_mfn = page_array[(xen_pfn_t)(store_paddr >> PAGE_SHIFT)]; 14.308 + *console_mfn = page_array[(xen_pfn_t)(console_paddr >> PAGE_SHIFT)]; 14.309 + DPRINTF("console_mfn->%08lx store_mfn->%08lx\n", *console_mfn, 14.310 + *store_mfn); 14.311 + 14.312 + /* build the devtree here */ 14.313 + DPRINTF("constructing devtree\n"); 14.314 + if (make_devtree(&devtree, domid, mem_mb, (rma_pages << PAGE_SHIFT), 14.315 + shadow_mb, initrd_base, initrd_len, cmdline, 14.316 + shared_info_paddr, console_evtchn, console_paddr, 14.317 + store_evtchn, store_paddr) < 0) { 14.318 + DPRINTF("failed to create flattened device tree\n"); 14.319 + rc = -1; 14.320 + goto out; 14.321 + } 14.322 + 14.323 + devtree_addr = DEVTREE_ADDR; 14.324 + DPRINTF("loading flattened device tree to 0x%lx[0x%x]\n", 14.325 + devtree_addr, devtree.bph->totalsize); 14.326 + 14.327 + if (install_image(xc_handle, domid, page_array, (void *)devtree.bph, 14.328 + devtree_addr, devtree.bph->totalsize)) { 14.329 + DPRINTF("couldn't load flattened device tree.\n"); 14.330 + rc = -1; 14.331 + goto out; 14.332 } 14.333 14.334 if (init_boot_vcpu(xc_handle, domid, &dsi, devtree_addr, kern_addr)) { 14.335 @@ -314,6 +295,7 @@ int xc_linux_build(int xc_handle, 14.336 } 14.337 14.338 out: 14.339 + free_devtree(&devtree); 14.340 free_page_array(page_array); 14.341 return rc; 14.342 }
15.1 --- a/tools/libxc/powerpc64/xc_prose_build.c Thu Feb 15 13:13:36 2007 -0700 15.2 +++ b/tools/libxc/powerpc64/xc_prose_build.c Thu Feb 15 14:09:39 2007 -0700 15.3 @@ -13,7 +13,7 @@ 15.4 * along with this program; if not, write to the Free Software 15.5 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 15.6 * 15.7 - * Copyright (C) IBM Corporation 2006 15.8 + * Copyright IBM Corporation 2006, 2007 15.9 * 15.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 15.11 * Jonathan Appavoo <jappavoo@us.ibm.com> 15.12 @@ -34,18 +34,14 @@ 15.13 #include <xg_private.h> 15.14 #include <xenctrl.h> 15.15 15.16 -#include "flatdevtree_env.h" 15.17 -#include "flatdevtree.h" 15.18 #include "utils.h" 15.19 15.20 #define INITRD_ADDR (24UL << 20) 15.21 -#define DEVTREE_ADDR (16UL << 20) 15.22 15.23 static int init_boot_vcpu( 15.24 int xc_handle, 15.25 int domid, 15.26 struct domain_setup_info *dsi, 15.27 - unsigned long devtree_addr, 15.28 unsigned long kern_addr) 15.29 { 15.30 vcpu_guest_context_t ctxt; 15.31 @@ -55,7 +51,7 @@ static int init_boot_vcpu( 15.32 ctxt.user_regs.pc = dsi->v_kernentry; 15.33 ctxt.user_regs.msr = 0; 15.34 ctxt.user_regs.gprs[1] = 0; /* Linux uses its own stack */ 15.35 - ctxt.user_regs.gprs[3] = devtree_addr; 15.36 + ctxt.user_regs.gprs[3] = 0; 15.37 ctxt.user_regs.gprs[4] = kern_addr; 15.38 ctxt.user_regs.gprs[5] = 0; /* reserved for specifying OF handler */ 15.39 /* There is a buggy kernel that does not zero the "local_paca", so 15.40 @@ -79,85 +75,6 @@ static int init_boot_vcpu( 15.41 return rc; 15.42 } 15.43 15.44 -static int load_devtree( 15.45 - int xc_handle, 15.46 - int domid, 15.47 - xen_pfn_t *page_array, 15.48 - void *devtree, 15.49 - unsigned long devtree_addr, 15.50 - uint64_t initrd_base, 15.51 - unsigned long initrd_len, 15.52 - start_info_t *start_info __attribute__((unused)), 15.53 - unsigned long start_info_addr) 15.54 -{ 15.55 - uint32_t si[4] = {0, start_info_addr, 0, 0x1000}; 15.56 - struct boot_param_header *header; 15.57 - void *chosen; 15.58 - void *xen; 15.59 - uint64_t initrd_end = initrd_base + initrd_len; 15.60 - unsigned int devtree_size; 15.61 - int rc = 0; 15.62 - 15.63 - DPRINTF("adding initrd props\n"); 15.64 - 15.65 - chosen = ft_find_node(devtree, "/chosen"); 15.66 - if (chosen == NULL) { 15.67 - DPRINTF("couldn't find /chosen\n"); 15.68 - return -1; 15.69 - } 15.70 - 15.71 - xen = ft_find_node(devtree, "/xen"); 15.72 - if (xen == NULL) { 15.73 - DPRINTF("couldn't find /xen\n"); 15.74 - return -1; 15.75 - } 15.76 - 15.77 - /* initrd-start */ 15.78 - rc = ft_set_prop(&devtree, chosen, "linux,initrd-start", 15.79 - &initrd_base, sizeof(initrd_base)); 15.80 - if (rc < 0) { 15.81 - DPRINTF("couldn't set /chosen/linux,initrd-start\n"); 15.82 - return rc; 15.83 - } 15.84 - 15.85 - /* initrd-end */ 15.86 - rc = ft_set_prop(&devtree, chosen, "linux,initrd-end", 15.87 - &initrd_end, sizeof(initrd_end)); 15.88 - if (rc < 0) { 15.89 - DPRINTF("couldn't set /chosen/linux,initrd-end\n"); 15.90 - return rc; 15.91 - } 15.92 - 15.93 - rc = ft_set_rsvmap(devtree, 1, initrd_base, initrd_len); 15.94 - if (rc < 0) { 15.95 - DPRINTF("couldn't set initrd reservation\n"); 15.96 - return ~0UL; 15.97 - } 15.98 - 15.99 - /* start-info (XXX being removed soon) */ 15.100 - rc = ft_set_prop(&devtree, xen, "start-info", si, sizeof(si)); 15.101 - if (rc < 0) { 15.102 - DPRINTF("couldn't set /xen/start-info\n"); 15.103 - return rc; 15.104 - } 15.105 - 15.106 - header = devtree; 15.107 - devtree_size = header->totalsize; 15.108 - { 15.109 - static const char dtb[] = "/tmp/xc_domU.dtb"; 15.110 - int dfd = creat(dtb, 0666); 15.111 - if (dfd != -1) { 15.112 - write(dfd, devtree, devtree_size); 15.113 - close(dfd); 15.114 - } else 15.115 - DPRINTF("could not open(\"%s\")\n", dtb); 15.116 - } 15.117 - 15.118 - DPRINTF("copying device tree to 0x%lx[0x%x]\n", DEVTREE_ADDR, devtree_size); 15.119 - return install_image(xc_handle, domid, page_array, devtree, DEVTREE_ADDR, 15.120 - devtree_size); 15.121 -} 15.122 - 15.123 static int load_initrd( 15.124 int xc_handle, 15.125 int domid, 15.126 @@ -188,13 +105,12 @@ out: 15.127 } 15.128 15.129 static unsigned long create_start_info( 15.130 - void *devtree, start_info_t *start_info, 15.131 + start_info_t *start_info, 15.132 unsigned int console_evtchn, unsigned int store_evtchn, 15.133 unsigned long nr_pages, unsigned long rma_pages, const char *cmdline) 15.134 { 15.135 unsigned long start_info_addr; 15.136 uint64_t rma_top; 15.137 - int rc; 15.138 15.139 memset(start_info, 0, sizeof(*start_info)); 15.140 snprintf(start_info->magic, sizeof(start_info->magic), 15.141 @@ -214,12 +130,6 @@ static unsigned long create_start_info( 15.142 start_info->cmd_line[MAX_GUEST_CMDLINE-1]=0; 15.143 start_info_addr = rma_top - 4*PAGE_SIZE; 15.144 15.145 - rc = ft_set_rsvmap(devtree, 0, start_info_addr, 4*PAGE_SIZE); 15.146 - if (rc < 0) { 15.147 - DPRINTF("couldn't set start_info reservation\n"); 15.148 - return ~0UL; 15.149 - } 15.150 - 15.151 return start_info_addr; 15.152 } 15.153 15.154 @@ -239,14 +149,12 @@ int xc_prose_build(int xc_handle, 15.155 unsigned int store_evtchn, 15.156 unsigned long *store_mfn, 15.157 unsigned int console_evtchn, 15.158 - unsigned long *console_mfn, 15.159 - void *devtree) 15.160 + unsigned long *console_mfn) 15.161 { 15.162 start_info_t start_info; 15.163 struct domain_setup_info dsi; 15.164 xen_pfn_t *page_array = NULL; 15.165 unsigned long nr_pages; 15.166 - unsigned long devtree_addr = 0; 15.167 unsigned long kern_addr; 15.168 unsigned long initrd_base = 0; 15.169 unsigned long initrd_len = 0; 15.170 @@ -261,11 +169,7 @@ int xc_prose_build(int xc_handle, 15.171 nr_pages = mem_mb << (20 - PAGE_SHIFT); 15.172 DPRINTF("nr_pages 0x%lx\n", nr_pages); 15.173 15.174 - rma_pages = get_rma_pages(devtree); 15.175 - if (rma_pages == 0) { 15.176 - rc = -1; 15.177 - goto out; 15.178 - } 15.179 + rma_pages = (1 << 26) >> PAGE_SHIFT; /* 64 MB */ 15.180 15.181 if (get_rma_page_array(xc_handle, domid, &page_array, rma_pages)) { 15.182 rc = -1; 15.183 @@ -289,9 +193,9 @@ int xc_prose_build(int xc_handle, 15.184 } 15.185 15.186 /* start_info stuff: about to be removed */ 15.187 - start_info_addr = create_start_info(devtree, &start_info, console_evtchn, 15.188 + start_info_addr = create_start_info(&start_info, console_evtchn, 15.189 store_evtchn, nr_pages, 15.190 - rma_pages, cmdline); 15.191 + rma_pages, cmdline); 15.192 *console_mfn = page_array[start_info.console.domU.mfn]; 15.193 *store_mfn = page_array[start_info.store_mfn]; 15.194 if (install_image(xc_handle, domid, page_array, &start_info, 15.195 @@ -300,19 +204,7 @@ int xc_prose_build(int xc_handle, 15.196 goto out; 15.197 } 15.198 15.199 - if (devtree) { 15.200 - DPRINTF("loading flattened device tree\n"); 15.201 - devtree_addr = DEVTREE_ADDR; 15.202 - if (load_devtree(xc_handle, domid, page_array, devtree, devtree_addr, 15.203 - initrd_base, initrd_len, &start_info, 15.204 - start_info_addr)) { 15.205 - DPRINTF("couldn't load flattened device tree.\n"); 15.206 - rc = -1; 15.207 - goto out; 15.208 - } 15.209 - } 15.210 - 15.211 - if (init_boot_vcpu(xc_handle, domid, &dsi, devtree_addr, kern_addr)) { 15.212 + if (init_boot_vcpu(xc_handle, domid, &dsi, kern_addr)) { 15.213 rc = -1; 15.214 goto out; 15.215 }
16.1 --- a/tools/libxc/xc_dom.h Thu Feb 15 13:13:36 2007 -0700 16.2 +++ b/tools/libxc/xc_dom.h Thu Feb 15 14:09:39 2007 -0700 16.3 @@ -7,11 +7,6 @@ 16.4 typedef uint64_t xen_vaddr_t; 16.5 typedef uint64_t xen_paddr_t; 16.6 16.7 -/* FIXME: temporary hack ... */ 16.8 -#ifndef PRIpfn 16.9 -#define PRIpfn "lx" 16.10 -#endif 16.11 - 16.12 struct xc_dom_seg { 16.13 xen_vaddr_t vstart; 16.14 xen_vaddr_t vend;
17.1 --- a/tools/libxc/xc_dom_powerpc64.c Thu Feb 15 13:13:36 2007 -0700 17.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 17.3 @@ -1,100 +0,0 @@ 17.4 -/* 17.5 - * Xen domain builder -- ia64 bits. 17.6 - * 17.7 - * Most architecture-specific code for ia64 goes here. 17.8 - * - fill architecture-specific structs. 17.9 - * 17.10 - * This code is licenced under the GPL. 17.11 - * written 2006 by Gerd Hoffmann <kraxel@suse.de>. 17.12 - * 17.13 - */ 17.14 -#include <stdio.h> 17.15 -#include <stdlib.h> 17.16 -#include <string.h> 17.17 -#include <inttypes.h> 17.18 - 17.19 -#include <xen/xen.h> 17.20 - 17.21 -#include "xg_private.h" 17.22 -#include "xc_dom.h" 17.23 - 17.24 -/* ------------------------------------------------------------------------ */ 17.25 - 17.26 -static int alloc_magic_pages(struct xc_dom_image *dom) 17.27 -{ 17.28 - /* allocate special pages */ 17.29 - dom->low_top--; /* shared_info */ 17.30 - dom->xenstore_pfn = --dom->low_top; 17.31 - dom->console_pfn = --dom->low_top; 17.32 - dom->start_info_pfn = --dom->low_top; 17.33 - return 0; 17.34 -} 17.35 - 17.36 -static int start_info(struct xc_dom_image *dom) 17.37 -{ 17.38 - start_info_t *si = 17.39 - xc_dom_pfn_to_ptr(dom, dom->start_info_pfn, 1); 17.40 - 17.41 - xc_dom_printf("%s\n", __FUNCTION__); 17.42 - 17.43 - snprintf(si->magic, sizeof(si->magic), "xen-%d.%d-powerpc64HV", 3, 0); 17.44 - 17.45 - si->nr_pages = dom->total_pages; 17.46 - si->shared_info = (dom->total_pages - 1) << PAGE_SHIFT; 17.47 - si->store_mfn = dom->xenstore_pfn; 17.48 - si->store_evtchn = dom->store_evtchn; 17.49 - si->console.domU.mfn = dom->console_pfn; 17.50 - si->console.domU.evtchn = dom->console_evtchn; 17.51 - return 0; 17.52 -} 17.53 - 17.54 -static int shared_info(struct xc_dom_image *dom, void *ptr) 17.55 -{ 17.56 - shared_info_t *shared_info = ptr; 17.57 - int i; 17.58 - 17.59 - xc_dom_printf("%s: called\n", __FUNCTION__); 17.60 - 17.61 - memset(shared_info, 0, sizeof(*shared_info)); 17.62 - return 0; 17.63 -} 17.64 - 17.65 -static int vcpu(struct xc_dom_image *dom, void *ptr) 17.66 -{ 17.67 - vcpu_guest_context_t *ctxt = ptr; 17.68 - 17.69 - xc_dom_printf("%s: called\n", __FUNCTION__); 17.70 - 17.71 - /* clear everything */ 17.72 - memset(ctxt, 0, sizeof(*ctxt)); 17.73 - 17.74 - memset(&ctxt->user_regs, 0x55, sizeof(ctxt.user_regs)); 17.75 - ctxt->user_regs.pc = dsi->v_kernentry; 17.76 - ctxt->user_regs.msr = 0; 17.77 - ctxt->user_regs.gprs[1] = 0; /* Linux uses its own stack */ 17.78 - ctxt->user_regs.gprs[3] = devtree_addr; 17.79 - ctxt->user_regs.gprs[4] = kern_addr; 17.80 - ctxt->user_regs.gprs[5] = 0; 17.81 - 17.82 - /* There is a buggy kernel that does not zero the "local_paca", so 17.83 - * we must make sure this register is 0 */ 17.84 - ctxt->user_regs.gprs[13] = 0; 17.85 - 17.86 - return 0; 17.87 -} 17.88 - 17.89 -/* ------------------------------------------------------------------------ */ 17.90 - 17.91 -static struct xc_dom_arch xc_dom_arch = { 17.92 - .guest_type = "xen-3.0-powerpc64", 17.93 - .page_shift = FIXME, 17.94 - .alloc_magic_pages = alloc_magic_pages, 17.95 - .start_info = start_info, 17.96 - .shared_info = shared_info, 17.97 - .vcpu = vcpu, 17.98 -}; 17.99 - 17.100 -static void __init register_arch_hooks(void) 17.101 -{ 17.102 - xc_dom_register_arch_hooks(&xc_dom_arch); 17.103 -}
18.1 --- a/tools/libxc/xenguest.h Thu Feb 15 13:13:36 2007 -0700 18.2 +++ b/tools/libxc/xenguest.h Thu Feb 15 14:09:39 2007 -0700 18.3 @@ -153,7 +153,6 @@ int xc_prose_build(int xc_handle, 18.4 unsigned int store_evtchn, 18.5 unsigned long *store_mfn, 18.6 unsigned int console_evtchn, 18.7 - unsigned long *console_mfn, 18.8 - void *arch_args); 18.9 + unsigned long *console_mfn); 18.10 18.11 #endif /* XENGUEST_H */
19.1 --- a/tools/libxc/xg_private.c Thu Feb 15 13:13:36 2007 -0700 19.2 +++ b/tools/libxc/xg_private.c Thu Feb 15 14:09:39 2007 -0700 19.3 @@ -216,3 +216,15 @@ int xc_hvm_restore(int xc_handle, int io 19.4 errno = ENOSYS; 19.5 return -1; 19.6 } 19.7 + 19.8 +__attribute__((weak)) int xc_get_hvm_param( 19.9 + int handle, domid_t dom, int param, unsigned long *value) 19.10 +{ 19.11 + return -ENOSYS; 19.12 +} 19.13 + 19.14 +__attribute__((weak)) int xc_set_hvm_param( 19.15 + int handle, domid_t dom, int param, unsigned long value) 19.16 +{ 19.17 + return -ENOSYS; 19.18 +}
20.1 --- a/tools/python/xen/lowlevel/xc/xc.c Thu Feb 15 13:13:36 2007 -0700 20.2 +++ b/tools/python/xen/lowlevel/xc/xc.c Thu Feb 15 14:09:39 2007 -0700 20.3 @@ -966,7 +966,7 @@ static PyObject *pyxc_alloc_real_mode_ar 20.4 return NULL; 20.5 20.6 if ( xc_alloc_real_mode_area(self->xc_handle, dom, log) ) 20.7 - return PyErr_SetFromErrno(xc_error); 20.8 + return pyxc_error_to_exception(); 20.9 20.10 Py_INCREF(zero); 20.11 return zero; 20.12 @@ -980,33 +980,32 @@ static PyObject *pyxc_prose_build(XcObje 20.13 char *image, *ramdisk = NULL, *cmdline = "", *features = NULL; 20.14 int flags = 0; 20.15 int store_evtchn, console_evtchn; 20.16 + unsigned int mem_mb; 20.17 unsigned long store_mfn = 0; 20.18 unsigned long console_mfn = 0; 20.19 - void *arch_args = NULL; 20.20 int unused; 20.21 20.22 static char *kwd_list[] = { "dom", "store_evtchn", 20.23 - "console_evtchn", "image", 20.24 + "console_evtchn", "image", "memsize", 20.25 /* optional */ 20.26 "ramdisk", "cmdline", "flags", 20.27 - "features", "arch_args", NULL }; 20.28 + "features", NULL }; 20.29 20.30 - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiis|ssiss#", kwd_list, 20.31 - &dom, &store_evtchn, 20.32 + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iiiis|ssis#", kwd_list, 20.33 + &dom, &store_evtchn, &mem_mb, 20.34 &console_evtchn, &image, 20.35 /* optional */ 20.36 &ramdisk, &cmdline, &flags, 20.37 - &features, &arch_args, &unused) ) 20.38 + &features, &unused) ) 20.39 return NULL; 20.40 20.41 - if ( xc_prose_build(self->xc_handle, dom, image, 20.42 + if ( xc_prose_build(self->xc_handle, dom, mem_mb, image, 20.43 ramdisk, cmdline, features, flags, 20.44 store_evtchn, &store_mfn, 20.45 - console_evtchn, &console_mfn, 20.46 - arch_args) != 0 ) { 20.47 + console_evtchn, &console_mfn) != 0 ) { 20.48 if (!errno) 20.49 errno = EINVAL; 20.50 - return PyErr_SetFromErrno(xc_error); 20.51 + return pyxc_error_to_exception(); 20.52 } 20.53 return Py_BuildValue("{s:i,s:i}", 20.54 "store_mfn", store_mfn,
21.1 --- a/tools/python/xen/xend/FlatDeviceTree.py Thu Feb 15 13:13:36 2007 -0700 21.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 21.3 @@ -1,359 +0,0 @@ 21.4 -#!/usr/bin/env python 21.5 -# 21.6 -# This library is free software; you can redistribute it and/or 21.7 -# modify it under the terms of version 2.1 of the GNU Lesser General Public 21.8 -# License as published by the Free Software Foundation. 21.9 -# 21.10 -# This library is distributed in the hope that it will be useful, 21.11 -# but WITHOUT ANY WARRANTY; without even the implied warranty of 21.12 -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21.13 -# Lesser General Public License for more details. 21.14 -# 21.15 -# You should have received a copy of the GNU Lesser General Public 21.16 -# License along with this library; if not, write to the Free Software 21.17 -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21.18 -# 21.19 -# Copyright (C) IBM Corp. 2006 21.20 -# 21.21 -# Authors: Hollis Blanchard <hollisb@us.ibm.com> 21.22 - 21.23 -import os 21.24 -import sys 21.25 -import struct 21.26 -import stat 21.27 -import re 21.28 -import glob 21.29 -import math 21.30 - 21.31 -_host_devtree_root = '/proc/device-tree' 21.32 - 21.33 -_OF_DT_HEADER = int("d00dfeed", 16) # avoid signed/unsigned FutureWarning 21.34 -_OF_DT_BEGIN_NODE = 0x1 21.35 -_OF_DT_END_NODE = 0x2 21.36 -_OF_DT_PROP = 0x3 21.37 -_OF_DT_END = 0x9 21.38 - 21.39 -def _bincat(seq, separator=''): 21.40 - '''Concatenate the contents of seq into a bytestream.''' 21.41 - strs = [] 21.42 - for item in seq: 21.43 - if isinstance(item, int): 21.44 - strs.append(struct.pack(">I", item)) 21.45 - elif isinstance(item, long): 21.46 - strs.append(struct.pack(">Q", item)) 21.47 - else: 21.48 - try: 21.49 - strs.append(item.to_bin()) 21.50 - except AttributeError, e: 21.51 - strs.append(item) 21.52 - return separator.join(strs) 21.53 - 21.54 -def _alignup(val, alignment): 21.55 - return (val + alignment - 1) & ~(alignment - 1) 21.56 - 21.57 -def _pad(buf, alignment): 21.58 - '''Pad bytestream with NULLs to specified alignment.''' 21.59 - padlen = _alignup(len(buf), alignment) 21.60 - return buf + '\0' * (padlen - len(buf)) 21.61 - # not present in Python 2.3: 21.62 - #return buf.ljust(_padlen, '\0') 21.63 - 21.64 -def _indent(item): 21.65 - indented = [] 21.66 - for line in str(item).splitlines(True): 21.67 - indented.append(' ' + line) 21.68 - return ''.join(indented) 21.69 - 21.70 -class _Property: 21.71 - _nonprint = re.compile('[\000-\037\200-\377]') 21.72 - def __init__(self, node, name, value): 21.73 - self.node = node 21.74 - self.value = value 21.75 - self.name = name 21.76 - self.node.tree.stradd(name) 21.77 - 21.78 - def __str__(self): 21.79 - result = self.name 21.80 - if self.value: 21.81 - searchtext = self.value 21.82 - # it's ok for a string to end in NULL 21.83 - if searchtext.find('\000') == len(searchtext)-1: 21.84 - searchtext = searchtext[:-1] 21.85 - m = self._nonprint.search(searchtext) 21.86 - if m: 21.87 - bytes = struct.unpack("B" * len(self.value), self.value) 21.88 - hexbytes = [ '%02x' % b for b in bytes ] 21.89 - words = [] 21.90 - for i in range(0, len(self.value), 4): 21.91 - words.append(''.join(hexbytes[i:i+4])) 21.92 - v = '<' + ' '.join(words) + '>' 21.93 - else: 21.94 - v = '"%s"' % self.value 21.95 - result += ': ' + v 21.96 - return result 21.97 - 21.98 - def to_bin(self): 21.99 - offset = self.node.tree.stroffset(self.name) 21.100 - return struct.pack('>III', _OF_DT_PROP, len(self.value), offset) \ 21.101 - + _pad(self.value, 4) 21.102 - 21.103 -class _Node: 21.104 - def __init__(self, tree, name): 21.105 - self.tree = tree 21.106 - self.name = name 21.107 - self.props = {} 21.108 - self.children = {} 21.109 - self.phandle = 0 21.110 - 21.111 - def __str__(self): 21.112 - propstrs = [ _indent(prop) for prop in self.props.values() ] 21.113 - childstrs = [ _indent(child) for child in self.children.values() ] 21.114 - return '%s:\n%s\n%s' % (self.name, '\n'.join(propstrs), 21.115 - '\n'.join(childstrs)) 21.116 - 21.117 - def to_bin(self): 21.118 - name = _pad(self.name + '\0', 4) 21.119 - return struct.pack('>I', _OF_DT_BEGIN_NODE) + \ 21.120 - name + \ 21.121 - _bincat(self.props.values()) + \ 21.122 - _bincat(self.children.values()) + \ 21.123 - struct.pack('>I', _OF_DT_END_NODE) 21.124 - 21.125 - def addprop(self, propname, *cells): 21.126 - '''setprop with duplicate error-checking.''' 21.127 - if propname in self.props: 21.128 - raise AttributeError('%s/%s already exists' % (self.name, propname)) 21.129 - self.setprop(propname, *cells) 21.130 - 21.131 - def setprop(self, propname, *cells): 21.132 - self.props[propname] = _Property(self, propname, _bincat(cells)) 21.133 - 21.134 - def addnode(self, nodename): 21.135 - '''newnode with duplicate error-checking.''' 21.136 - if nodename in self.children: 21.137 - raise AttributeError('%s/%s already exists' % (self.name, nodename)) 21.138 - return self.newnode(nodename) 21.139 - 21.140 - def newnode(self, nodename): 21.141 - node = _Node(self.tree, nodename) 21.142 - self.children[nodename] = node 21.143 - return node 21.144 - 21.145 - def getprop(self, propname): 21.146 - return self.props[propname] 21.147 - 21.148 - def getchild(self, nodename): 21.149 - return self.children[nodename] 21.150 - 21.151 - def get_phandle(self): 21.152 - if self.phandle: 21.153 - return self.phandle 21.154 - self.phandle = self.tree.alloc_phandle() 21.155 - self.addprop('linux,phandle', self.phandle) 21.156 - return self.phandle 21.157 - 21.158 -class _Header: 21.159 - def __init__(self): 21.160 - self.magic = 0 21.161 - self.totalsize = 0 21.162 - self.off_dt_struct = 0 21.163 - self.off_dt_strings = 0 21.164 - self.off_mem_rsvmap = 0 21.165 - self.version = 0 21.166 - self.last_comp_version = 0 21.167 - self.boot_cpuid_phys = 0 21.168 - self.size_dt_strings = 0 21.169 - def to_bin(self): 21.170 - return struct.pack('>9I', 21.171 - self.magic, 21.172 - self.totalsize, 21.173 - self.off_dt_struct, 21.174 - self.off_dt_strings, 21.175 - self.off_mem_rsvmap, 21.176 - self.version, 21.177 - self.last_comp_version, 21.178 - self.boot_cpuid_phys, 21.179 - self.size_dt_strings) 21.180 - 21.181 -class _StringBlock: 21.182 - def __init__(self): 21.183 - self.table = [] 21.184 - def to_bin(self): 21.185 - return _bincat(self.table, '\0') + '\0' 21.186 - def add(self, str): 21.187 - self.table.append(str) 21.188 - def getoffset(self, str): 21.189 - return self.to_bin().index(str + '\0') 21.190 - 21.191 -class Tree(_Node): 21.192 - def __init__(self): 21.193 - self.last_phandle = 0 21.194 - self.strings = _StringBlock() 21.195 - self.reserved = [(0, 0)] 21.196 - _Node.__init__(self, self, '\0') 21.197 - 21.198 - def alloc_phandle(self): 21.199 - self.last_phandle += 1 21.200 - return self.last_phandle 21.201 - 21.202 - def stradd(self, str): 21.203 - return self.strings.add(str) 21.204 - 21.205 - def stroffset(self, str): 21.206 - return self.strings.getoffset(str) 21.207 - 21.208 - def reserve(self, start, len): 21.209 - self.reserved.insert(0, (start, len)) 21.210 - 21.211 - def to_bin(self): 21.212 - # layout: 21.213 - # header 21.214 - # reservation map 21.215 - # string block 21.216 - # data block 21.217 - 21.218 - datablock = _Node.to_bin(self) 21.219 - 21.220 - r = [ struct.pack('>QQ', rsrv[0], rsrv[1]) for rsrv in self.reserved ] 21.221 - reserved = _bincat(r) 21.222 - 21.223 - strblock = _pad(self.strings.to_bin(), 4) 21.224 - strblocklen = len(strblock) 21.225 - 21.226 - header = _Header() 21.227 - header.magic = _OF_DT_HEADER 21.228 - header.off_mem_rsvmap = _alignup(len(header.to_bin()), 8) 21.229 - header.off_dt_strings = header.off_mem_rsvmap + len(reserved) 21.230 - header.off_dt_struct = header.off_dt_strings + strblocklen 21.231 - header.version = 0x10 21.232 - header.last_comp_version = 0x10 21.233 - header.boot_cpuid_phys = 0 21.234 - header.size_dt_strings = strblocklen 21.235 - 21.236 - payload = reserved + \ 21.237 - strblock + \ 21.238 - datablock + \ 21.239 - struct.pack('>I', _OF_DT_END) 21.240 - header.totalsize = len(payload) + _alignup(len(header.to_bin()), 8) 21.241 - return _pad(header.to_bin(), 8) + payload 21.242 - 21.243 -def _readfile(fullpath): 21.244 - '''Return full contents of a file.''' 21.245 - f = file(fullpath, 'r') 21.246 - data = f.read() 21.247 - f.close() 21.248 - return data 21.249 - 21.250 -def _find_first_cpu(dirpath): 21.251 - '''Find the first node of type 'cpu' in a directory tree.''' 21.252 - cpulist = glob.glob(os.path.join(dirpath, 'cpus', '*')) 21.253 - for node in cpulist: 21.254 - try: 21.255 - data = _readfile(os.path.join(node, 'device_type')) 21.256 - except IOError: 21.257 - continue 21.258 - if 'cpu' in data: 21.259 - return node 21.260 - raise IOError("couldn't find any CPU nodes under " + dirpath) 21.261 - 21.262 -def _copynode(node, dirpath, propfilter): 21.263 - '''Copy all properties and children nodes from a directory tree.''' 21.264 - dirents = os.listdir(dirpath) 21.265 - for dirent in dirents: 21.266 - fullpath = os.path.join(dirpath, dirent) 21.267 - st = os.lstat(fullpath) 21.268 - if stat.S_ISDIR(st.st_mode): 21.269 - child = node.addnode(dirent) 21.270 - _copynode(child, fullpath, propfilter) 21.271 - elif stat.S_ISREG(st.st_mode) and propfilter(fullpath): 21.272 - node.addprop(dirent, _readfile(fullpath)) 21.273 - 21.274 -def build(imghandler): 21.275 - '''Construct a device tree by combining the domain's configuration and 21.276 - the host's device tree.''' 21.277 - root = Tree() 21.278 - 21.279 - # 1st reseravtion entry used for start_info, console, store, shared_info 21.280 - root.reserve(0x3ffc000, 0x4000) 21.281 - 21.282 - # 2nd reservation enrty used for initrd, later on when we load the 21.283 - # initrd we may fill this in with zeroes which signifies the end 21.284 - # of the reservation map. So as to avoid adding a zero map now we 21.285 - # put some bogus yet sensible numbers here. 21.286 - root.reserve(0x1000000, 0x1000) 21.287 - 21.288 - root.addprop('device_type', 'chrp-but-not-really\0') 21.289 - root.addprop('#size-cells', 2) 21.290 - root.addprop('#address-cells', 2) 21.291 - root.addprop('model', 'Momentum,Maple-D\0') 21.292 - root.addprop('compatible', 'Momentum,Maple\0') 21.293 - 21.294 - xen = root.addnode('xen') 21.295 - xen.addprop('start-info', long(0x3ffc000), long(0x1000)) 21.296 - xen.addprop('version', 'Xen-3.0-unstable\0') 21.297 - xen.addprop('reg', long(imghandler.vm.domid), long(0)) 21.298 - xen.addprop('domain-name', imghandler.vm.getName() + '\0') 21.299 - xencons = xen.addnode('console') 21.300 - xencons.addprop('interrupts', 1, 0) 21.301 - 21.302 - # add memory nodes 21.303 - totalmem = imghandler.vm.getMemoryTarget() * 1024 21.304 - rma_log = 26 ### imghandler.vm.info.get('powerpc_rma_log') 21.305 - rma_bytes = 1 << rma_log 21.306 - 21.307 - # RMA node 21.308 - rma = root.addnode('memory@0') 21.309 - rma.addprop('reg', long(0), long(rma_bytes)) 21.310 - rma.addprop('device_type', 'memory\0') 21.311 - 21.312 - # all the rest in a single node 21.313 - remaining = totalmem - rma_bytes 21.314 - if remaining > 0: 21.315 - mem = root.addnode('memory@1') 21.316 - mem.addprop('reg', long(rma_bytes), long(remaining)) 21.317 - mem.addprop('device_type', 'memory\0') 21.318 - 21.319 - # add CPU nodes 21.320 - cpus = root.addnode('cpus') 21.321 - cpus.addprop('smp-enabled') 21.322 - cpus.addprop('#size-cells', 0) 21.323 - cpus.addprop('#address-cells', 1) 21.324 - 21.325 - # Copy all properties the system firmware gave us, except for 'linux,' 21.326 - # properties, from the first CPU node in the device tree. Do this once for 21.327 - # every vcpu. Hopefully all cpus are identical... 21.328 - cpu0 = None 21.329 - cpu0path = _find_first_cpu(_host_devtree_root) 21.330 - def _nolinuxprops(fullpath): 21.331 - return not os.path.basename(fullpath).startswith('linux,') 21.332 - for i in range(imghandler.vm.getVCpuCount()): 21.333 - # create new node and copy all properties 21.334 - cpu = cpus.addnode('PowerPC,970@%d' % i) 21.335 - _copynode(cpu, cpu0path, _nolinuxprops) 21.336 - 21.337 - # overwrite what we need to 21.338 - shadow_mb = imghandler.vm.info.get('shadow_memory', 1) 21.339 - shadow_mb_log = int(math.log(shadow_mb, 2)) 21.340 - pft_size = shadow_mb_log + 20 21.341 - cpu.setprop('ibm,pft-size', 0, pft_size) 21.342 - 21.343 - # set default CPU 21.344 - if cpu0 == None: 21.345 - cpu0 = cpu 21.346 - 21.347 - chosen = root.addnode('chosen') 21.348 - chosen.addprop('cpu', cpu0.get_phandle()) 21.349 - chosen.addprop('memory', rma.get_phandle()) 21.350 - chosen.addprop('linux,stdout-path', '/xen/console\0') 21.351 - chosen.addprop('interrupt-controller', xen.get_phandle()) 21.352 - chosen.addprop('bootargs', imghandler.cmdline + '\0') 21.353 - # xc_linux_load.c will overwrite these 64-bit properties later 21.354 - chosen.addprop('linux,initrd-start', long(0)) 21.355 - chosen.addprop('linux,initrd-end', long(0)) 21.356 - 21.357 - if 1: 21.358 - f = file('/tmp/domU.dtb', 'w') 21.359 - f.write(root.to_bin()) 21.360 - f.close() 21.361 - 21.362 - return root
22.1 --- a/tools/python/xen/xend/image.py Thu Feb 15 13:13:36 2007 -0700 22.2 +++ b/tools/python/xen/xend/image.py Thu Feb 15 14:09:39 2007 -0700 22.3 @@ -30,7 +30,6 @@ from xen.xend.XendOptions import instanc 22.4 from xen.xend.server.netif import randomMAC 22.5 from xen.xend.xenstore.xswatch import xswatch 22.6 from xen.xend import arch 22.7 -from xen.xend import FlatDeviceTree 22.8 22.9 xc = xen.lowlevel.xc.xc() 22.10 22.11 @@ -219,39 +218,7 @@ class LinuxImageHandler(ImageHandler): 22.12 class PPC_LinuxImageHandler(LinuxImageHandler): 22.13 22.14 ostype = "linux" 22.15 - 22.16 - def configure(self, vmConfig, imageConfig, deviceConfig): 22.17 - LinuxImageHandler.configure(self, vmConfig, imageConfig, deviceConfig) 22.18 - self.imageConfig = imageConfig 22.19 - 22.20 - def buildDomain(self): 22.21 - store_evtchn = self.vm.getStorePort() 22.22 - console_evtchn = self.vm.getConsolePort() 22.23 - 22.24 - mem_mb = self.getRequiredInitialReservation() / 1024 22.25 - 22.26 - log.debug("domid = %d", self.vm.getDomid()) 22.27 - log.debug("memsize = %d", mem_mb) 22.28 - log.debug("image = %s", self.kernel) 22.29 - log.debug("store_evtchn = %d", store_evtchn) 22.30 - log.debug("console_evtchn = %d", console_evtchn) 22.31 - log.debug("cmdline = %s", self.cmdline) 22.32 - log.debug("ramdisk = %s", self.ramdisk) 22.33 - log.debug("vcpus = %d", self.vm.getVCpuCount()) 22.34 - log.debug("features = %s", self.vm.getFeatures()) 22.35 - 22.36 - devtree = FlatDeviceTree.build(self) 22.37 - 22.38 - return xc.linux_build(domid = self.vm.getDomid(), 22.39 - memsize = mem_mb, 22.40 - image = self.kernel, 22.41 - store_evtchn = store_evtchn, 22.42 - console_evtchn = console_evtchn, 22.43 - cmdline = self.cmdline, 22.44 - ramdisk = self.ramdisk, 22.45 - features = self.vm.getFeatures(), 22.46 - arch_args = devtree.to_bin()) 22.47 - 22.48 + 22.49 def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): 22.50 """@param shadow_mem_kb The configured shadow memory, in KiB. 22.51 @param maxmem_kb The configured maxmem, in KiB. 22.52 @@ -261,14 +228,11 @@ class PPC_LinuxImageHandler(LinuxImageHa 22.53 return max(maxmem_kb / 64, shadow_mem_kb) 22.54 22.55 22.56 -class PPC_ProseImageHandler(LinuxImageHandler): 22.57 + 22.58 +class PPC_ProseImageHandler(PPC_LinuxImageHandler): 22.59 22.60 ostype = "prose" 22.61 22.62 - def configure(self, imageConfig, deviceConfig): 22.63 - LinuxImageHandler.configure(self, imageConfig, deviceConfig) 22.64 - self.imageConfig = imageConfig 22.65 - 22.66 def buildDomain(self): 22.67 store_evtchn = self.vm.getStorePort() 22.68 console_evtchn = self.vm.getConsolePort() 22.69 @@ -285,8 +249,6 @@ class PPC_ProseImageHandler(LinuxImageHa 22.70 log.debug("vcpus = %d", self.vm.getVCpuCount()) 22.71 log.debug("features = %s", self.vm.getFeatures()) 22.72 22.73 - devtree = FlatDeviceTree.build(self) 22.74 - 22.75 return xc.arch_prose_build(dom = self.vm.getDomid(), 22.76 memsize = mem_mb, 22.77 image = self.kernel, 22.78 @@ -294,17 +256,7 @@ class PPC_ProseImageHandler(LinuxImageHa 22.79 console_evtchn = console_evtchn, 22.80 cmdline = self.cmdline, 22.81 ramdisk = self.ramdisk, 22.82 - features = self.vm.getFeatures(), 22.83 - arch_args = devtree.to_bin()) 22.84 - 22.85 - def getRequiredShadowMemory(self, shadow_mem_kb, maxmem_kb): 22.86 - """@param shadow_mem_kb The configured shadow memory, in KiB. 22.87 - @param maxmem_kb The configured maxmem, in KiB. 22.88 - @return The corresponding required amount of shadow memory, also in 22.89 - KiB. 22.90 - PowerPC currently uses "shadow memory" to refer to the hash table.""" 22.91 - return max(maxmem_kb / 64, shadow_mem_kb) 22.92 - 22.93 + features = self.vm.getFeatures()) 22.94 22.95 class HVMImageHandler(ImageHandler): 22.96
23.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Thu Feb 15 13:13:36 2007 -0700 23.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-compat.c Thu Feb 15 14:09:39 2007 -0700 23.3 @@ -13,6 +13,12 @@ static int system_state = 1; 23.4 EXPORT_SYMBOL(system_state); 23.5 #endif 23.6 23.7 +static inline void ctrl_alt_del(void) 23.8 +{ 23.9 + kill_proc(1, SIGINT, 1); /* interrupt init */ 23.10 +} 23.11 +EXPORT_SYMBOL(ctrl_alt_del); 23.12 + 23.13 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) 23.14 size_t strcspn(const char *s, const char *reject) 23.15 {
24.1 --- a/xen/arch/powerpc/Makefile Thu Feb 15 13:13:36 2007 -0700 24.2 +++ b/xen/arch/powerpc/Makefile Thu Feb 15 14:09:39 2007 -0700 24.3 @@ -51,8 +51,6 @@ obj-$(builtin_dom0) += dom0.o 24.4 24.5 obj-y += firmware_image.o 24.6 24.7 -obj-y += elf32.o 24.8 - 24.9 # These are extra warnings like for the arch/ppc directory but may not 24.10 # allow the rest of the tree to build. 24.11 PPC_C_WARNINGS += -Wundef -Wmissing-prototypes -Wmissing-declarations 24.12 @@ -64,7 +62,7 @@ CFLAGS += $(PPC_C_WARNINGS) 24.13 # objects into a single ELF segment and to not link in any additional 24.14 # objects that gcc would normally like to 24.15 # 24.16 -OMAGIC = -N -nodefaultlibs -nostartfiles 24.17 +OMAGIC = -nodefaultlibs -nostartfiles 24.18 24.19 firmware: of_handler/built_in.o $(TARGET_SUBARCH)/memcpy.o of-devtree.o 24.20 $(CC) $(CFLAGS) $(OMAGIC) -e __ofh_start -Wl,-Ttext,0x0 $^ -o $@
25.1 --- a/xen/arch/powerpc/backtrace.c Thu Feb 15 13:13:36 2007 -0700 25.2 +++ b/xen/arch/powerpc/backtrace.c Thu Feb 15 14:09:39 2007 -0700 25.3 @@ -198,7 +198,6 @@ void show_backtrace_regs(struct cpu_user 25.4 console_start_sync(); 25.5 25.6 show_registers(regs); 25.7 - printk("dar 0x%016lx, dsisr 0x%08x\n", mfdar(), mfdsisr()); 25.8 printk("hid4 0x%016lx\n", regs->hid4); 25.9 printk("---[ backtrace ]---\n"); 25.10 show_backtrace(regs->gprs[1], regs->lr, regs->pc);
26.1 --- a/xen/arch/powerpc/boot_of.c Thu Feb 15 13:13:36 2007 -0700 26.2 +++ b/xen/arch/powerpc/boot_of.c Thu Feb 15 14:09:39 2007 -0700 26.3 @@ -13,7 +13,7 @@ 26.4 * along with this program; if not, write to the Free Software 26.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 26.6 * 26.7 - * Copyright (C) IBM Corp. 2005, 2006 26.8 + * Copyright IBM Corp. 2005, 2006, 2007 26.9 * 26.10 * Authors: Jimi Xenidis <jimix@watson.ibm.com> 26.11 * Hollis Blanchard <hollisb@us.ibm.com> 26.12 @@ -43,6 +43,14 @@ static ulong of_msr; 26.13 static int of_out; 26.14 static ulong eomem; 26.15 26.16 +/* Track memory during early boot with a limited per-page bitmap. We need an 26.17 + * allocator to tell us where we can place RTAS, our copy of the device tree. 26.18 + * We could examine the "available" properties in memory nodes, but we 26.19 + * apparently can't depend on firmware to update those when we call "claim". So 26.20 + * we need to track it ourselves. 26.21 + * We can't dynamically allocate the bitmap, because we would need something 26.22 + * to tell us where it's safe to allocate... 26.23 + */ 26.24 #define MEM_AVAILABLE_PAGES ((32 << 20) >> PAGE_SHIFT) 26.25 static DECLARE_BITMAP(mem_available_pages, MEM_AVAILABLE_PAGES); 26.26 26.27 @@ -532,6 +540,37 @@ static ulong boot_of_alloc(ulong size) 26.28 } 26.29 } 26.30 26.31 +int boot_of_mem_avail(int pos, ulong *startpage, ulong *endpage) 26.32 +{ 26.33 + ulong freebit; 26.34 + ulong usedbit; 26.35 + 26.36 + if (pos >= MEM_AVAILABLE_PAGES) 26.37 + /* Stop iterating. */ 26.38 + return -1; 26.39 + 26.40 + /* Find first free page. */ 26.41 + freebit = find_next_zero_bit(mem_available_pages, MEM_AVAILABLE_PAGES, pos); 26.42 + if (freebit >= MEM_AVAILABLE_PAGES) { 26.43 + /* We know everything after MEM_AVAILABLE_PAGES is still free. */ 26.44 + *startpage = MEM_AVAILABLE_PAGES << PAGE_SHIFT; 26.45 + *endpage = ~0UL; 26.46 + return freebit; 26.47 + } 26.48 + *startpage = freebit << PAGE_SHIFT; 26.49 + 26.50 + /* Now find first used page after that. */ 26.51 + usedbit = find_next_bit(mem_available_pages, MEM_AVAILABLE_PAGES, freebit); 26.52 + if (usedbit >= MEM_AVAILABLE_PAGES) { 26.53 + /* We know everything after MEM_AVAILABLE_PAGES is still free. */ 26.54 + *endpage = ~0UL; 26.55 + return usedbit; 26.56 + } 26.57 + 26.58 + *endpage = usedbit << PAGE_SHIFT; 26.59 + return usedbit; 26.60 +} 26.61 + 26.62 static ulong boot_of_mem_init(void) 26.63 { 26.64 int root; 26.65 @@ -1302,7 +1341,7 @@ multiboot_info_t __init *boot_of_init( 26.66 __func__, 26.67 r3, r4, vec, r6, r7, orig_msr); 26.68 26.69 - if ((vec >= (ulong)_start) && (vec <= (ulong)_end)) { 26.70 + if (is_kernel(vec)) { 26.71 of_panic("Hmm.. OF[0x%lx] seems to have stepped on our image " 26.72 "that ranges: %p .. %p.\n", 26.73 vec, _start, _end);
27.1 --- a/xen/arch/powerpc/domain.c Thu Feb 15 13:13:36 2007 -0700 27.2 +++ b/xen/arch/powerpc/domain.c Thu Feb 15 14:09:39 2007 -0700 27.3 @@ -13,7 +13,7 @@ 27.4 * along with this program; if not, write to the Free Software 27.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 27.6 * 27.7 - * Copyright (C) IBM Corp. 2005, 2006 27.8 + * Copyright IBM Corp. 2005, 2006, 2007 27.9 * 27.10 * Authors: Jimi Xenidis <jimix@watson.ibm.com> 27.11 */ 27.12 @@ -105,13 +105,13 @@ void arch_domain_destroy(struct domain * 27.13 27.14 static void machine_fail(const char *s) 27.15 { 27.16 - printk("%s failed, manual powercycle required!\n", s); 27.17 + printk("%s failed, manual powercycle required!\n" 27.18 + " spinning....\n", s); 27.19 for (;;) 27.20 sleep(); 27.21 } 27.22 void machine_halt(void) 27.23 { 27.24 - printk("machine_halt called: spinning....\n"); 27.25 console_start_sync(); 27.26 printk("%s called\n", __func__); 27.27 rtas_halt(); 27.28 @@ -121,7 +121,6 @@ void machine_halt(void) 27.29 27.30 void machine_restart(char * __unused) 27.31 { 27.32 - printk("machine_restart called: spinning....\n"); 27.33 console_start_sync(); 27.34 printk("%s called\n", __func__); 27.35 rtas_reboot(); 27.36 @@ -152,17 +151,20 @@ void vcpu_destroy(struct vcpu *v) 27.37 27.38 int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_u c) 27.39 { 27.40 + struct domain *d = v->domain; 27.41 + 27.42 memcpy(&v->arch.ctxt, &c.nat->user_regs, sizeof(c.nat->user_regs)); 27.43 27.44 - printk("Domain[%d].%d: initializing\n", 27.45 - v->domain->domain_id, v->vcpu_id); 27.46 + printk("Domain[%d].%d: initializing\n", d->domain_id, v->vcpu_id); 27.47 27.48 - if (v->domain->arch.htab.order == 0) 27.49 - panic("Page table never allocated for Domain: %d\n", 27.50 - v->domain->domain_id); 27.51 - if (v->domain->arch.rma_order == 0) 27.52 - panic("RMA never allocated for Domain: %d\n", 27.53 - v->domain->domain_id); 27.54 + if (d->arch.htab.order == 0) 27.55 + panic("Page table never allocated for Domain: %d\n", d->domain_id); 27.56 + if (d->arch.rma_order == 0) 27.57 + panic("RMA never allocated for Domain: %d\n", d->domain_id); 27.58 + 27.59 + d->shared_info->wc_sec = dom0->shared_info->wc_sec; 27.60 + d->shared_info->wc_nsec = dom0->shared_info->wc_nsec; 27.61 + d->shared_info->arch.boot_timebase = dom0->shared_info->arch.boot_timebase; 27.62 27.63 set_bit(_VCPUF_initialised, &v->vcpu_flags); 27.64 27.65 @@ -171,6 +173,13 @@ int arch_set_info_guest(struct vcpu *v, 27.66 return 0; 27.67 } 27.68 27.69 +int arch_vcpu_reset(struct vcpu *v) 27.70 +{ 27.71 + panic("%s: called for Dom%d[%d]\n", 27.72 + __func__, v->domain->domain_id, v->vcpu_id); 27.73 + return 0; 27.74 +} 27.75 + 27.76 void dump_pageframe_info(struct domain *d) 27.77 { 27.78 struct page_info *page;
28.1 --- a/xen/arch/powerpc/domain_build.c Thu Feb 15 13:13:36 2007 -0700 28.2 +++ b/xen/arch/powerpc/domain_build.c Thu Feb 15 14:09:39 2007 -0700 28.3 @@ -20,20 +20,19 @@ 28.4 28.5 #include <xen/config.h> 28.6 #include <xen/lib.h> 28.7 -#include <xen/elf.h> 28.8 #include <xen/sched.h> 28.9 #include <xen/init.h> 28.10 #include <xen/ctype.h> 28.11 #include <xen/iocap.h> 28.12 #include <xen/shadow.h> 28.13 +#include <xen/domain.h> 28.14 #include <xen/version.h> 28.15 #include <asm/processor.h> 28.16 #include <asm/papr.h> 28.17 +#include <public/arch-powerpc.h> 28.18 +#include <public/libelf.h> 28.19 #include "oftree.h" 28.20 28.21 -extern int parseelfimage_32(struct domain_setup_info *dsi); 28.22 -extern int loadelfimage_32(struct domain_setup_info *dsi); 28.23 - 28.24 /* opt_dom0_mem: memory allocated to domain 0. */ 28.25 static unsigned int dom0_nrpages; 28.26 static void parse_dom0_mem(char *s) 28.27 @@ -51,63 +50,18 @@ integer_param("dom0_max_vcpus", opt_dom0 28.28 static unsigned int opt_dom0_shadow; 28.29 boolean_param("dom0_shadow", opt_dom0_shadow); 28.30 28.31 -int elf_sanity_check(const Elf_Ehdr *ehdr) 28.32 -{ 28.33 - if (IS_ELF(*ehdr)) 28.34 - /* we are happy with either */ 28.35 - if ((ehdr->e_ident[EI_CLASS] == ELFCLASS32 28.36 - && ehdr->e_machine == EM_PPC) 28.37 - || (ehdr->e_ident[EI_CLASS] == ELFCLASS64 28.38 - && ehdr->e_machine == EM_PPC64)) { 28.39 - if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB 28.40 - && ehdr->e_type == ET_EXEC) 28.41 - return 1; 28.42 - } 28.43 - printk("DOM0 image is not a Xen-compatible Elf image.\n"); 28.44 - return 0; 28.45 -} 28.46 - 28.47 /* adapted from common/elf.c */ 28.48 #define RM_MASK(a,l) ((a) & ((1UL << (l)) - 1)) 28.49 28.50 -static int rm_loadelfimage_64(struct domain_setup_info *dsi, ulong rma) 28.51 -{ 28.52 - char *elfbase = (char *)dsi->image_addr; 28.53 - Elf64_Ehdr *ehdr = (Elf64_Ehdr *)dsi->image_addr; 28.54 - Elf64_Phdr *phdr; 28.55 - int h; 28.56 - 28.57 - for (h = 0; h < ehdr->e_phnum; h++ ) 28.58 - { 28.59 - phdr = (Elf64_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); 28.60 - if (!((phdr->p_type == PT_LOAD) && 28.61 - ((phdr->p_flags & (PF_W|PF_X)) != 0))) 28.62 - continue; 28.63 - 28.64 - if (phdr->p_filesz != 0) 28.65 - memcpy((char *)(rma + RM_MASK(phdr->p_paddr, 42)), 28.66 - elfbase + phdr->p_offset, 28.67 - phdr->p_filesz); 28.68 - if (phdr->p_memsz > phdr->p_filesz) 28.69 - memset((char *)(rma + RM_MASK(phdr->p_paddr, 42) + phdr->p_filesz), 28.70 - 0, phdr->p_memsz - phdr->p_filesz); 28.71 - } 28.72 - 28.73 -#ifdef NOT_YET 28.74 - loadelfsymtab(dsi, 1); 28.75 -#endif 28.76 - 28.77 - return 0; 28.78 -} 28.79 - 28.80 int construct_dom0(struct domain *d, 28.81 unsigned long image_start, unsigned long image_len, 28.82 unsigned long initrd_start, unsigned long initrd_len, 28.83 char *cmdline) 28.84 { 28.85 + struct elf_binary elf; 28.86 + struct elf_dom_parms parms; 28.87 int rc; 28.88 struct vcpu *v = d->vcpu[0]; 28.89 - struct domain_setup_info dsi; 28.90 ulong dst; 28.91 u64 *ofh_tree; 28.92 uint rma_nrpages = 1 << d->arch.rma_order; 28.93 @@ -115,11 +69,8 @@ int construct_dom0(struct domain *d, 28.94 ulong rma = page_to_maddr(d->arch.rma_page); 28.95 start_info_t *si; 28.96 ulong eomem; 28.97 - int am64 = 1; 28.98 int preempt = 0; 28.99 - ulong msr; 28.100 - ulong pc; 28.101 - ulong r2; 28.102 + int vcpu; 28.103 28.104 /* Sanity! */ 28.105 BUG_ON(d->domain_id != 0); 28.106 @@ -130,26 +81,27 @@ int construct_dom0(struct domain *d, 28.107 28.108 cpu_init_vcpu(v); 28.109 28.110 - memset(&dsi, 0, sizeof(struct domain_setup_info)); 28.111 - dsi.image_addr = image_start; 28.112 - dsi.image_len = image_len; 28.113 + printk("*** LOADING DOMAIN 0 ***\n"); 28.114 28.115 - printk("Trying Dom0 as 64bit ELF\n"); 28.116 - if ((rc = parseelfimage(&dsi)) != 0) { 28.117 - printk("Trying Dom0 as 32bit ELF\n"); 28.118 - if ((rc = parseelfimage_32(&dsi)) != 0) 28.119 - return rc; 28.120 - am64 = 0; 28.121 - } 28.122 + rc = elf_init(&elf, (void *)image_start, image_len); 28.123 + if (rc) 28.124 + return rc; 28.125 +#ifdef VERBOSE 28.126 + elf_set_verbose(&elf); 28.127 +#endif 28.128 + elf_parse_binary(&elf); 28.129 + if (0 != (elf_xen_parse(&elf, &parms))) 28.130 + return rc; 28.131 + 28.132 + printk("Dom0 kernel: %s, paddr 0x%" PRIx64 " -> 0x%" PRIx64 "\n", 28.133 + elf_64bit(&elf) ? "64-bit" : "32-bit", 28.134 + elf.pstart, elf.pend); 28.135 28.136 /* elf contains virtual addresses that can have the upper bits 28.137 * masked while running in real mode, so we do the masking as well 28.138 * as well */ 28.139 - dsi.v_kernstart = RM_MASK(dsi.v_kernstart, 42); 28.140 - dsi.v_kernend = RM_MASK(dsi.v_kernend, 42); 28.141 - dsi.v_kernentry = RM_MASK(dsi.v_kernentry, 42); 28.142 - 28.143 - printk("*** LOADING DOMAIN 0 ***\n"); 28.144 + parms.virt_kend = RM_MASK(parms.virt_kend, 42); 28.145 + parms.virt_entry = RM_MASK(parms.virt_entry, 42); 28.146 28.147 /* By default DOM0 is allocated all available memory. */ 28.148 d->max_pages = ~0U; 28.149 @@ -210,6 +162,27 @@ int construct_dom0(struct domain *d, 28.150 /* put stack below everything */ 28.151 v->arch.ctxt.gprs[1] = dst - STACK_FRAME_OVERHEAD; 28.152 28.153 + /* startup secondary processors */ 28.154 + if ( opt_dom0_max_vcpus == 0 ) 28.155 + opt_dom0_max_vcpus = num_online_cpus(); 28.156 + if ( opt_dom0_max_vcpus > num_online_cpus() ) 28.157 + opt_dom0_max_vcpus = num_online_cpus(); 28.158 + if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS ) 28.159 + opt_dom0_max_vcpus = MAX_VIRT_CPUS; 28.160 +#ifdef BITS_PER_GUEST_LONG 28.161 + if ( opt_dom0_max_vcpus > BITS_PER_GUEST_LONG(d) ) 28.162 + opt_dom0_max_vcpus = BITS_PER_GUEST_LONG(d); 28.163 +#endif 28.164 + printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus); 28.165 + 28.166 + for (vcpu = 1; vcpu < opt_dom0_max_vcpus; vcpu++) { 28.167 + if (NULL == alloc_vcpu(dom0, vcpu, vcpu)) 28.168 + panic("Error creating domain 0 vcpu %d\n", vcpu); 28.169 + /* for now we pin Dom0 VCPUs to their coresponding CPUs */ 28.170 + if (cpu_isset(vcpu, cpu_online_map)) 28.171 + dom0->vcpu[vcpu]->cpu_affinity = cpumask_of_cpu(vcpu); 28.172 + } 28.173 + 28.174 /* copy relative to Xen */ 28.175 dst += rma; 28.176 28.177 @@ -229,75 +202,56 @@ int construct_dom0(struct domain *d, 28.178 printk("loading OFD: 0x%lx RMA: 0x%lx, 0x%lx\n", dst, dst - rma, 28.179 oftree_len); 28.180 memcpy((void *)dst, (void *)oftree, oftree_len); 28.181 - 28.182 dst = ALIGN_UP(dst + oftree_len, PAGE_SIZE); 28.183 28.184 - if (am64) { 28.185 - ulong kbase; 28.186 - ulong *fdesc; 28.187 - 28.188 - printk("loading 64-bit Dom0: 0x%lx, in RMA:0x%lx\n", dst, dst - rma); 28.189 - rm_loadelfimage_64(&dsi, dst); 28.190 + /* Load the dom0 kernel. */ 28.191 + elf.dest = (void *)dst; 28.192 + elf_load_binary(&elf); 28.193 + v->arch.ctxt.pc = dst - rma; 28.194 + dst = ALIGN_UP(dst + parms.virt_kend, PAGE_SIZE); 28.195 28.196 - kbase = dst; 28.197 - /* move dst to end of bss */ 28.198 - dst = ALIGN_UP(dsi.v_kernend + dst, PAGE_SIZE); 28.199 - 28.200 - if ( initrd_len > 0 ) { 28.201 - ASSERT( (dst - rma) + image_len < eomem ); 28.202 - 28.203 - printk("loading initrd: 0x%lx, 0x%lx\n", dst, initrd_len); 28.204 - memcpy((void *)dst, (void *)initrd_start, initrd_len); 28.205 - 28.206 - si->mod_start = dst - rma; 28.207 - si->mod_len = image_len; 28.208 + /* Load the initrd. */ 28.209 + if (initrd_len > 0) { 28.210 + ASSERT((dst - rma) + image_len < eomem); 28.211 28.212 - dst = ALIGN_UP(dst + initrd_len, PAGE_SIZE); 28.213 - } else { 28.214 - printk("no initrd\n"); 28.215 - si->mod_start = 0; 28.216 - si->mod_len = 0; 28.217 - } 28.218 - /* it may be a function descriptor */ 28.219 - fdesc = (ulong *)(dsi.v_kernstart + dsi.v_kernentry + kbase); 28.220 + printk("loading initrd: 0x%lx, 0x%lx\n", dst, initrd_len); 28.221 + memcpy((void *)dst, (void *)initrd_start, initrd_len); 28.222 + 28.223 + si->mod_start = dst - rma; 28.224 + si->mod_len = image_len; 28.225 28.226 - if (fdesc[2] == 0 28.227 - && ((fdesc[0] >= dsi.v_kernstart) 28.228 - && (fdesc[0] < dsi.v_kernend)) /* text entry is in range */ 28.229 - && ((fdesc[1] >= dsi.v_kernstart) /* toc can be > image */ 28.230 - && (fdesc[1] < (dsi.v_kernend + (0x7fff * sizeof (ulong)))))) { 28.231 - /* it is almost certainly a function descriptor */ 28.232 - pc = RM_MASK(fdesc[0], 42) + kbase - rma; 28.233 - r2 = RM_MASK(fdesc[1], 42) + kbase - rma; 28.234 - } else { 28.235 - pc = ((ulong)fdesc) - rma; 28.236 - r2 = 0; 28.237 - } 28.238 - msr = MSR_SF; 28.239 + dst = ALIGN_UP(dst + initrd_len, PAGE_SIZE); 28.240 } else { 28.241 - printk("loading 32-bit Dom0: 0x%lx, in RMA:0x%lx\n", 28.242 - dsi.v_kernstart + rma, dsi.v_kernstart); 28.243 - dsi.v_start = rma; 28.244 - loadelfimage_32(&dsi); 28.245 - 28.246 - pc = dsi.v_kernentry; 28.247 - r2 = 0; 28.248 - msr = 0; 28.249 + printk("no initrd\n"); 28.250 + si->mod_start = 0; 28.251 + si->mod_len = 0; 28.252 } 28.253 28.254 + if (elf_64bit(&elf)) { 28.255 + v->arch.ctxt.msr = MSR_SF; 28.256 + } else { 28.257 + v->arch.ctxt.msr = 0; 28.258 + } 28.259 + v->arch.ctxt.gprs[2] = 0; 28.260 v->arch.ctxt.gprs[3] = si->mod_start; 28.261 v->arch.ctxt.gprs[4] = si->mod_len; 28.262 28.263 + printk("dom0 initial register state:\n" 28.264 + " pc %016lx msr %016lx\n" 28.265 + " r1 %016lx r2 %016lx r3 %016lx\n" 28.266 + " r4 %016lx r5 %016lx\n", 28.267 + v->arch.ctxt.pc, 28.268 + v->arch.ctxt.msr, 28.269 + v->arch.ctxt.gprs[1], 28.270 + v->arch.ctxt.gprs[2], 28.271 + v->arch.ctxt.gprs[3], 28.272 + v->arch.ctxt.gprs[4], 28.273 + v->arch.ctxt.gprs[5]); 28.274 + 28.275 memset(si->cmd_line, 0, sizeof(si->cmd_line)); 28.276 if ( cmdline != NULL ) 28.277 strlcpy((char *)si->cmd_line, cmdline, sizeof(si->cmd_line)); 28.278 28.279 - v->arch.ctxt.msr = msr; 28.280 - v->arch.ctxt.pc = pc; 28.281 - v->arch.ctxt.gprs[2] = r2; 28.282 - 28.283 - printk("DOM: pc = 0x%lx, r2 = 0x%lx\n", pc, r2); 28.284 - 28.285 ofd_dom0_fixup(d, *ofh_tree + rma, si); 28.286 28.287 set_bit(_VCPUF_initialised, &v->vcpu_flags);
29.1 --- a/xen/arch/powerpc/elf32.c Thu Feb 15 13:13:36 2007 -0700 29.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 29.3 @@ -1,7 +0,0 @@ 29.4 -#define parseelfimage parseelfimage_32 29.5 -#define loadelfimage loadelfimage_32 29.6 -#define xen_elfnote_string xen_elfnote_string32 29.7 -#define xen_elfnote_numeric xen_elfnote_numeric32 29.8 -#define ELFSIZE 32 29.9 -#include "../../common/elf.c" 29.10 -
30.1 --- a/xen/arch/powerpc/hcalls.c Thu Feb 15 13:13:36 2007 -0700 30.2 +++ b/xen/arch/powerpc/hcalls.c Thu Feb 15 14:09:39 2007 -0700 30.3 @@ -13,7 +13,7 @@ 30.4 * along with this program; if not, write to the Free Software 30.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 30.6 * 30.7 - * Copyright (C) IBM Corp. 2005 30.8 + * Copyright IBM Corp. 2005, 2006, 2007 30.9 * 30.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 30.11 */ 30.12 @@ -130,7 +130,7 @@ static void register_papr_hcall(ulong nu 30.13 30.14 static void init_papr_hcalls(void) 30.15 { 30.16 - inithcall_t *hcall; 30.17 + init_hcall_t *hcall; 30.18 int i; 30.19 30.20 /* initialize PAPR hcall table */ 30.21 @@ -140,7 +140,7 @@ static void init_papr_hcalls(void) 30.22 register_papr_hcall(i, do_ni_papr_hypercall); 30.23 30.24 /* register the PAPR hcalls */ 30.25 - for (hcall = &__inithcall_start; hcall < &__inithcall_end; hcall++) { 30.26 + for (hcall = &__init_hcall_start; hcall < &__init_hcall_end; hcall++) { 30.27 register_papr_hcall(hcall->number, hcall->handler); 30.28 } 30.29 }
31.1 --- a/xen/arch/powerpc/memory.c Thu Feb 15 13:13:36 2007 -0700 31.2 +++ b/xen/arch/powerpc/memory.c Thu Feb 15 14:09:39 2007 -0700 31.3 @@ -13,7 +13,7 @@ 31.4 * along with this program; if not, write to the Free Software 31.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 31.6 * 31.7 - * Copyright (C) IBM Corp. 2006 31.8 + * Copyright IBM Corp. 2006, 2007 31.9 * 31.10 * Authors: Dan Poff <poff@us.ibm.com> 31.11 * Jimi Xenidis <jimix@watson.ibm.com> 31.12 @@ -25,7 +25,7 @@ 31.13 #include "oftree.h" 31.14 #include "rtas.h" 31.15 31.16 -#undef DEBUG 31.17 +#define DEBUG 31.18 #ifdef DEBUG 31.19 #define DBG(fmt...) printk(fmt) 31.20 #else 31.21 @@ -42,8 +42,6 @@ integer_param("xenheap_megabytes", opt_x 31.22 unsigned long xenheap_phys_end; 31.23 static uint nr_pages; 31.24 static ulong xenheap_size; 31.25 -static ulong save_start; 31.26 -static ulong save_end; 31.27 31.28 struct membuf { 31.29 ulong start; 31.30 @@ -52,30 +50,6 @@ struct membuf { 31.31 31.32 typedef void (*walk_mem_fn)(struct membuf *, uint); 31.33 31.34 -static ulong free_xenheap(ulong start, ulong end) 31.35 -{ 31.36 - start = ALIGN_UP(start, PAGE_SIZE); 31.37 - end = ALIGN_DOWN(end, PAGE_SIZE); 31.38 - 31.39 - DBG("%s: 0x%lx - 0x%lx\n", __func__, start, end); 31.40 - 31.41 - /* need to do this better */ 31.42 - if (save_start <= end && save_start >= start) { 31.43 - DBG("%s: Go around the saved area: 0x%lx - 0x%lx\n", 31.44 - __func__, save_start, save_end); 31.45 - init_xenheap_pages(start, ALIGN_DOWN(save_start, PAGE_SIZE)); 31.46 - xenheap_size += ALIGN_DOWN(save_start, PAGE_SIZE) - start; 31.47 - 31.48 - init_xenheap_pages(ALIGN_UP(save_end, PAGE_SIZE), end); 31.49 - xenheap_size += end - ALIGN_UP(save_end, PAGE_SIZE); 31.50 - } else { 31.51 - init_xenheap_pages(start, end); 31.52 - xenheap_size += end - start; 31.53 - } 31.54 - 31.55 - return ALIGN_UP(end, PAGE_SIZE); 31.56 -} 31.57 - 31.58 static void set_max_page(struct membuf *mb, uint entries) 31.59 { 31.60 int i; 31.61 @@ -113,6 +87,7 @@ static void heap_init(struct membuf *mb, 31.62 start_blk = xenheap_phys_end; 31.63 } 31.64 31.65 + DBG("boot free: %016lx - %016lx\n", start_blk, end_blk); 31.66 init_boot_pages(start_blk, end_blk); 31.67 total_pages += (end_blk - start_blk) >> PAGE_SHIFT; 31.68 } 31.69 @@ -141,72 +116,31 @@ static void ofd_walk_mem(void *m, walk_m 31.70 } 31.71 } 31.72 31.73 -static void setup_xenheap(module_t *mod, int mcount) 31.74 -{ 31.75 - int i; 31.76 - ulong freemem; 31.77 - 31.78 - freemem = ALIGN_UP((ulong)_end, PAGE_SIZE); 31.79 - 31.80 - for (i = 0; i < mcount; i++) { 31.81 - u32 s; 31.82 - 31.83 - if (mod[i].mod_end == mod[i].mod_start) 31.84 - continue; 31.85 - 31.86 - s = ALIGN_DOWN(mod[i].mod_start, PAGE_SIZE); 31.87 - 31.88 - if (mod[i].mod_start > (ulong)_start && 31.89 - mod[i].mod_start < (ulong)_end) { 31.90 - /* mod was linked in */ 31.91 - continue; 31.92 - } 31.93 - 31.94 - if (s < freemem) 31.95 - panic("module addresses must assend\n"); 31.96 - 31.97 - free_xenheap(freemem, s); 31.98 - freemem = ALIGN_UP(mod[i].mod_end, PAGE_SIZE); 31.99 - 31.100 - } 31.101 - 31.102 - /* the rest of the xenheap, starting at the end of modules */ 31.103 - free_xenheap(freemem, xenheap_phys_end); 31.104 -} 31.105 - 31.106 void memory_init(module_t *mod, int mcount) 31.107 { 31.108 ulong eomem; 31.109 - ulong heap_start; 31.110 + ulong bitmap_start = ~0UL; 31.111 + ulong bitmap_end = 0; 31.112 + ulong bitmap_size; 31.113 ulong xh_pages; 31.114 + ulong start; 31.115 + ulong end; 31.116 + int pos; 31.117 31.118 /* lets find out how much memory there is and set max_page */ 31.119 max_page = 0; 31.120 printk("Physical RAM map:\n"); 31.121 ofd_walk_mem((void *)oftree, set_max_page); 31.122 eomem = max_page << PAGE_SHIFT; 31.123 - 31.124 - if (eomem == 0){ 31.125 + if (eomem == 0) { 31.126 panic("ofd_walk_mem() failed\n"); 31.127 } 31.128 31.129 - /* find the portion of memory we need to keep safe */ 31.130 - save_start = oftree; 31.131 - save_end = oftree_end; 31.132 - if (rtas_base) { 31.133 - if (save_start > rtas_base) 31.134 - save_start = rtas_base; 31.135 - if (save_end < rtas_end) 31.136 - save_end = rtas_end; 31.137 - } 31.138 - 31.139 - /* minimum heap has to reach to the end of all Xen required memory */ 31.140 - xh_pages = ALIGN_UP(save_end, PAGE_SIZE) >> PAGE_SHIFT; 31.141 - xh_pages += opt_xenheap_megabytes << (20 - PAGE_SHIFT); 31.142 + xh_pages = opt_xenheap_megabytes << (20 - PAGE_SHIFT); 31.143 31.144 /* While we are allocating HTABS from The Xen Heap we need it to 31.145 * be larger */ 31.146 - xh_pages += nr_pages >> 5; 31.147 + xh_pages += nr_pages >> 5; 31.148 31.149 xenheap_phys_end = xh_pages << PAGE_SHIFT; 31.150 printk("End of Xen Area: %luMiB (%luKiB)\n", 31.151 @@ -214,17 +148,20 @@ void memory_init(module_t *mod, int mcou 31.152 31.153 printk("End of RAM: %luMiB (%luKiB)\n", eomem >> 20, eomem >> 10); 31.154 31.155 - /* Architecturally the first 4 pages are exception hendlers, we 31.156 - * will also be copying down some code there */ 31.157 - heap_start = 4 << PAGE_SHIFT; 31.158 - if (oftree < (ulong)_start) 31.159 - heap_start = ALIGN_UP(oftree_end, PAGE_SIZE); 31.160 - 31.161 - heap_start = init_boot_allocator(heap_start); 31.162 - if (heap_start > (ulong)_start) { 31.163 - panic("space below _start (%p) is not enough memory " 31.164 - "for heap (0x%lx)\n", _start, heap_start); 31.165 + /* The boot allocator requires one bit per page. Find a spot for it. */ 31.166 + bitmap_size = max_page / 8; 31.167 + pos = boot_of_mem_avail(0, &start, &end); 31.168 + while (pos >= 0) { 31.169 + if (end - start >= bitmap_size) { 31.170 + bitmap_start = start; 31.171 + bitmap_end = init_boot_allocator(bitmap_start); 31.172 + printk("boot allocator @ %lx - %lx\n", bitmap_start, bitmap_end); 31.173 + break; 31.174 + } 31.175 + pos = boot_of_mem_avail(pos, &start, &end); 31.176 } 31.177 + if (bitmap_start == ~0UL) 31.178 + panic("Couldn't find 0x%lx bytes for boot allocator.", bitmap_size); 31.179 31.180 /* allow everything else to be allocated */ 31.181 total_pages = 0; 31.182 @@ -242,12 +179,39 @@ void memory_init(module_t *mod, int mcou 31.183 31.184 numa_initmem_init(0, max_page); 31.185 31.186 + /* Domain heap gets all the unclaimed memory. */ 31.187 end_boot_allocator(); 31.188 31.189 - /* Add memory between the beginning of the heap and the beginning 31.190 - * of our text */ 31.191 - free_xenheap(heap_start, (ulong)_start); 31.192 - setup_xenheap(mod, mcount); 31.193 + /* Create initial xen heap by finding non-reserved memory. */ 31.194 + pos = boot_of_mem_avail(0, &start, &end); 31.195 + while (pos >= 0) { 31.196 + if (end == ~0UL) 31.197 + end = xenheap_phys_end; 31.198 + 31.199 + /* Problem: the bitmap itself is not reserved. */ 31.200 + if ((start >= bitmap_start) && (start < bitmap_end)) { 31.201 + /* Start is inside bitmap. */ 31.202 + start = bitmap_end; 31.203 + } 31.204 + if ((end > bitmap_start) && (end <= bitmap_end)) { 31.205 + /* End is inside bitmap. */ 31.206 + end = bitmap_start; 31.207 + } 31.208 + if ((start < bitmap_start) && (end > bitmap_end)) { 31.209 + /* Range encompasses bitmap. First free low part, then high. */ 31.210 + xenheap_size += bitmap_start - start; 31.211 + DBG("xenheap: %016lx - %016lx\n", start, bitmap_start); 31.212 + init_xenheap_pages(start, bitmap_start); 31.213 + start = bitmap_end; 31.214 + } 31.215 + 31.216 + xenheap_size += end - start; 31.217 + DBG("xenheap: %016lx - %016lx\n", start, end); 31.218 + init_xenheap_pages(start, end); 31.219 + 31.220 + pos = boot_of_mem_avail(pos, &start, &end); 31.221 + } 31.222 + 31.223 printk("Xen Heap: %luMiB (%luKiB)\n", 31.224 xenheap_size >> 20, xenheap_size >> 10); 31.225
32.1 --- a/xen/arch/powerpc/mm.c Thu Feb 15 13:13:36 2007 -0700 32.2 +++ b/xen/arch/powerpc/mm.c Thu Feb 15 14:09:39 2007 -0700 32.3 @@ -28,6 +28,7 @@ 32.4 #include <asm/init.h> 32.5 #include <asm/page.h> 32.6 #include <asm/string.h> 32.7 +#include <public/arch-powerpc.h> 32.8 32.9 #ifdef VERBOSE 32.10 #define MEM_LOG(_f, _a...) \
33.1 --- a/xen/arch/powerpc/of-devtree.c Thu Feb 15 13:13:36 2007 -0700 33.2 +++ b/xen/arch/powerpc/of-devtree.c Thu Feb 15 14:09:39 2007 -0700 33.3 @@ -358,7 +358,7 @@ static ofdn_t ofd_node_create( 33.4 n->on_io = 0; 33.5 n->on_pathlen = pathlen; 33.6 n->on_last = ofd_pathsplit_left(path, '/', pathlen); 33.7 - strlcpy(n->on_path, path, pathlen); 33.8 + strlcpy(n->on_path, path, pathlen + 1); 33.9 33.10 return pos; 33.11 }
34.1 --- a/xen/arch/powerpc/of-devtree.h Thu Feb 15 13:13:36 2007 -0700 34.2 +++ b/xen/arch/powerpc/of-devtree.h Thu Feb 15 14:09:39 2007 -0700 34.3 @@ -13,7 +13,7 @@ 34.4 * along with this program; if not, write to the Free Software 34.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 34.6 * 34.7 - * Copyright (C) IBM Corp. 2005 34.8 + * Copyright IBM Corp. 2005, 2006, 2007 34.9 * 34.10 * Authors: Jimi Xenidis <jimix@watson.ibm.com> 34.11 */ 34.12 @@ -23,6 +23,7 @@ 34.13 34.14 #include <xen/types.h> 34.15 #include <xen/string.h> 34.16 +#include <xen/kernel.h> 34.17 #include <public/xen.h> 34.18 34.19 enum {
35.1 --- a/xen/arch/powerpc/of_handler/Makefile Thu Feb 15 13:13:36 2007 -0700 35.2 +++ b/xen/arch/powerpc/of_handler/Makefile Thu Feb 15 14:09:39 2007 -0700 35.3 @@ -27,5 +27,5 @@ obj-y += snprintf.o 35.4 obj-y += strcmp.o 35.5 obj-y += strlen.o 35.6 obj-y += strncmp.o 35.7 -obj-y += strncpy.o 35.8 +obj-y += strlcpy.o 35.9 obj-y += strnlen.o
36.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 36.2 +++ b/xen/arch/powerpc/of_handler/strlcpy.c Thu Feb 15 14:09:39 2007 -0700 36.3 @@ -0,0 +1,58 @@ 36.4 +/* 36.5 + * This program is free software; you can redistribute it and/or modify 36.6 + * it under the terms of the GNU General Public License as published by 36.7 + * the Free Software Foundation; either version 2 of the License, or 36.8 + * (at your option) any later version. 36.9 + * 36.10 + * This program is distributed in the hope that it will be useful, 36.11 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 36.12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 36.13 + * GNU General Public License for more details. 36.14 + * 36.15 + * You should have received a copy of the GNU General Public License 36.16 + * along with this program; if not, write to the Free Software 36.17 + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 36.18 + * 36.19 + * Copyright IBM Corp. 2005, 2007 36.20 + * 36.21 + * Authors: Jimi Xenidis <jimix@watson.ibm.com> 36.22 + * Hollis Blanchard <hollisb@us.ibm.com> 36.23 + */ 36.24 + 36.25 +#include <xen/string.h> 36.26 + 36.27 +size_t 36.28 +strlcpy(char *dest, const char *src, size_t n) 36.29 +{ 36.30 + size_t ret; 36.31 + char *dp; 36.32 + 36.33 + /* cases to consider: 36.34 + * dest is NULL, s is NULL; 36.35 + * src is empty (0); 36.36 + * src is not empty, less than n; 36.37 + * src is not empty, equal to n; 36.38 + * src is not empty, greater than n; 36.39 + */ 36.40 + 36.41 + if (n <= 0) { 36.42 + return 0; 36.43 + } 36.44 + 36.45 + dp = dest; 36.46 + 36.47 + do { 36.48 + *dp++ = *src; 36.49 + --n; 36.50 + ++src; 36.51 + } while ((*src != '\0') && (n > 1)); 36.52 + 36.53 + ret = n; 36.54 + 36.55 + /* clear remainder of buffer (if any); ANSI semantics */ 36.56 + while (n > 0) { 36.57 + *dp++ = '\0'; 36.58 + --n; 36.59 + } 36.60 + return ret; 36.61 +}
37.1 --- a/xen/arch/powerpc/of_handler/strncpy.c Thu Feb 15 13:13:36 2007 -0700 37.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 37.3 @@ -1,54 +0,0 @@ 37.4 -/* 37.5 - * This program is free software; you can redistribute it and/or modify 37.6 - * it under the terms of the GNU General Public License as published by 37.7 - * the Free Software Foundation; either version 2 of the License, or 37.8 - * (at your option) any later version. 37.9 - * 37.10 - * This program is distributed in the hope that it will be useful, 37.11 - * but WITHOUT ANY WARRANTY; without even the implied warranty of 37.12 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 37.13 - * GNU General Public License for more details. 37.14 - * 37.15 - * You should have received a copy of the GNU General Public License 37.16 - * along with this program; if not, write to the Free Software 37.17 - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 37.18 - * 37.19 - * Copyright (C) IBM Corp. 2005 37.20 - * 37.21 - * Authors: Jimi Xenidis <jimix@watson.ibm.com> 37.22 - */ 37.23 - 37.24 -#include <xen/string.h> 37.25 - 37.26 -char * 37.27 -strncpy(char *dest, const char *src, size_t n) 37.28 -{ 37.29 - char *dp; 37.30 - 37.31 - /* cases to consider: 37.32 - * dest is NULL, s is NULL; 37.33 - * src is empty (0); 37.34 - * src is not empty, less than n; 37.35 - * src is not empty, equal to n; 37.36 - * src is not empty, greater than n; 37.37 - */ 37.38 - 37.39 - if (n <= 0) { 37.40 - return dest; 37.41 - } 37.42 - 37.43 - dp = dest; 37.44 - 37.45 - do { 37.46 - *dp++ = *src; 37.47 - --n; 37.48 - ++src; 37.49 - } while ((*src != '\0') && (n > 0)); 37.50 - 37.51 - /* clear remainder of buffer (if any); ANSI semantics */ 37.52 - while (n > 0) { 37.53 - *dp++ = '\0'; 37.54 - --n; 37.55 - } 37.56 - return dest; 37.57 -}
38.1 --- a/xen/arch/powerpc/ofd_fixup.c Thu Feb 15 13:13:36 2007 -0700 38.2 +++ b/xen/arch/powerpc/ofd_fixup.c Thu Feb 15 14:09:39 2007 -0700 38.3 @@ -178,12 +178,21 @@ static ofdn_t ofd_cpus_props(void *m, st 38.4 if (ofd_boot_cpu == -1) 38.5 ofd_boot_cpu = c; 38.6 while (c > 0) { 38.7 - /* Since we are not MP yet we prune all but the booting cpu */ 38.8 + /* We do not use the OF tree to identify secondary processors 38.9 + * so we must prune them from the tree */ 38.10 if (c == ofd_boot_cpu) { 38.11 + ofdn_t p; 38.12 + 38.13 ibm_pft_size[1] = d->arch.htab.log_num_ptes + LOG_PTE_SIZE; 38.14 ofd_prop_add(m, c, "ibm,pft-size", 38.15 ibm_pft_size, sizeof (ibm_pft_size)); 38.16 38.17 + /* get rid of non-standard properties */ 38.18 + p = ofd_prop_find(m, c, "cpu#"); 38.19 + if (p > 0) { 38.20 + ofd_prop_remove(m, c, p); 38.21 + } 38.22 + 38.23 /* FIXME: Check the the "l2-cache" property who's 38.24 * contents is an orphaned phandle? */ 38.25 } else
39.1 --- a/xen/arch/powerpc/papr/xlate.c Thu Feb 15 13:13:36 2007 -0700 39.2 +++ b/xen/arch/powerpc/papr/xlate.c Thu Feb 15 14:09:39 2007 -0700 39.3 @@ -72,6 +72,20 @@ static inline void pte_insert(union pte 39.4 } 39.5 #endif 39.6 39.7 +/* 39.8 + * POWER Arch 2.03 Sec 4.12.1 (Yes 970 is one) 39.9 + * 39.10 + * when a tlbsync instruction has been executed by a processor in a 39.11 + * given partition, a ptesync instruction must be executed by that 39.12 + * processor before a tlbie or tlbsync instruction is executed by 39.13 + * another processor in that partition. 39.14 + * 39.15 + * So for now, here is a BFLock to deal with it, the lock should be per-domain. 39.16 + * 39.17 + * XXX Will need to audit all tlb usege soon enough. 39.18 + */ 39.19 + 39.20 +static DEFINE_SPINLOCK(native_tlbie_lock); 39.21 static void pte_tlbie(union pte volatile *pte, ulong ptex) 39.22 { 39.23 ulong va; 39.24 @@ -91,6 +105,7 @@ static void pte_tlbie(union pte volatile 39.25 va = (pi << 12) | (vsid << 28); 39.26 va &= ~(0xffffULL << 48); 39.27 39.28 + spin_lock(&native_tlbie_lock); 39.29 #ifndef FLUSH_THE_WHOLE_THING 39.30 if (pte->bits.l) { 39.31 va |= (pte->bits.rpn & 1); 39.32 @@ -114,7 +129,7 @@ static void pte_tlbie(union pte volatile 39.33 } 39.34 } 39.35 #endif 39.36 - 39.37 + spin_unlock(&native_tlbie_lock); 39.38 } 39.39 39.40 long pte_enter(ulong flags, ulong ptex, ulong vsid, ulong rpn)
40.1 --- a/xen/arch/powerpc/powerpc64/asm-offsets.c Thu Feb 15 13:13:36 2007 -0700 40.2 +++ b/xen/arch/powerpc/powerpc64/asm-offsets.c Thu Feb 15 14:09:39 2007 -0700 40.3 @@ -48,6 +48,8 @@ void __dummy__(void) 40.4 OFFSET(UREGS_ctr, struct cpu_user_regs, ctr); 40.5 OFFSET(UREGS_xer, struct cpu_user_regs, xer); 40.6 OFFSET(UREGS_hid4, struct cpu_user_regs, hid4); 40.7 + OFFSET(UREGS_dar, struct cpu_user_regs, dar); 40.8 + OFFSET(UREGS_dsisr, struct cpu_user_regs, dsisr); 40.9 OFFSET(UREGS_cr, struct cpu_user_regs, cr); 40.10 OFFSET(UREGS_fpscr, struct cpu_user_regs, fpscr); 40.11 DEFINE(UREGS_sizeof, sizeof(struct cpu_user_regs));
41.1 --- a/xen/arch/powerpc/powerpc64/exceptions.S Thu Feb 15 13:13:36 2007 -0700 41.2 +++ b/xen/arch/powerpc/powerpc64/exceptions.S Thu Feb 15 14:09:39 2007 -0700 41.3 @@ -373,9 +373,15 @@ ex_machcheck_continued: 41.4 * a better way, but this works for now. */ 41.5 ex_program_continued: 41.6 SAVE_GPRS r14, r31, r1 /* save all the non-volatiles */ 41.7 - /* save hid4 for debug */ 41.8 + 41.9 + /* save these for debug, no needed for restore */ 41.10 mfspr r14, SPRN_HID4 41.11 std r14, UREGS_hid4(r1) 41.12 + mfdar r14 41.13 + std r14, UREGS_dar(r1) 41.14 + mfdsisr r14 41.15 + stw r14, UREGS_dsisr(r1) 41.16 + 41.17 mr r14, r0 41.18 EXCEPTION_SAVE_STATE r1 41.19 mr r4, r14
42.1 --- a/xen/arch/powerpc/powerpc64/ppc970.c Thu Feb 15 13:13:36 2007 -0700 42.2 +++ b/xen/arch/powerpc/powerpc64/ppc970.c Thu Feb 15 14:09:39 2007 -0700 42.3 @@ -129,7 +129,6 @@ unsigned int cpu_extent_order(void) 42.4 return log_large_page_sizes[0] - PAGE_SHIFT; 42.5 } 42.6 42.7 - 42.8 /* This is more a platform thing than a CPU thing, but we only have 42.9 * one platform now */ 42.10 int cpu_io_mfn(ulong mfn) 42.11 @@ -142,6 +141,12 @@ int cpu_io_mfn(ulong mfn) 42.12 return 0; 42.13 } 42.14 42.15 +int cpu_threads(int cpuid) 42.16 +{ 42.17 + return 1; 42.18 +} 42.19 + 42.20 + 42.21 static u64 cpu0_hids[6]; 42.22 static u64 cpu0_hior; 42.23
43.1 --- a/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Feb 15 13:13:36 2007 -0700 43.2 +++ b/xen/arch/powerpc/powerpc64/ppc970_scom.c Thu Feb 15 14:09:39 2007 -0700 43.3 @@ -158,7 +158,7 @@ void cpu_scom_init(void) 43.4 { 43.5 #ifdef CONFIG_SCOM 43.6 ulong val; 43.7 - if (PVR_REV(mfpvr()) == 0x0300) { 43.8 + if (PVR_REV(mfpvr()) == PV_970FX) { 43.9 /* these address are only good for 970FX */ 43.10 console_start_sync(); 43.11 if (!cpu_scom_read(SCOM_PTSR, &val)) 43.12 @@ -174,7 +174,7 @@ void cpu_scom_AMCR(void) 43.13 #ifdef CONFIG_SCOM 43.14 ulong val; 43.15 43.16 - if (PVR_REV(mfpvr()) == 0x0300) { 43.17 + if (PVR_REV(mfpvr()) == PV_970FX) { 43.18 /* these address are only good for 970FX */ 43.19 cpu_scom_read(SCOM_AMC_REG, &val); 43.20 printk("SCOM AMCR: 0x%016lx\n", val);
44.1 --- a/xen/arch/powerpc/powerpc64/traps.c Thu Feb 15 13:13:36 2007 -0700 44.2 +++ b/xen/arch/powerpc/powerpc64/traps.c Thu Feb 15 14:09:39 2007 -0700 44.3 @@ -41,7 +41,15 @@ void show_registers(struct cpu_user_regs 44.4 regs->pc, regs->msr, 44.5 regs->lr, regs->ctr, 44.6 regs->srr0, regs->srr1); 44.7 - for (i=0; i<32; i+=4) { 44.8 + 44.9 + /* These come in handy for debugging but are not always saved, so 44.10 + * what is "actually" in the register should be good */ 44.11 + printk("dar %016lx dsisr %08x *** saved\n" 44.12 + "dar %016lx dsisr %08x *** actual\n", 44.13 + regs->dar, regs->dsisr, 44.14 + mfdar(), mfdsisr()); 44.15 + 44.16 + for (i = 0; i < 32; i += 4) { 44.17 printk("r%02i: %016lx %016lx %016lx %016lx\n", i, 44.18 regs->gprs[i], regs->gprs[i+1], regs->gprs[i+2], regs->gprs[i+3]); 44.19 }
45.1 --- a/xen/arch/powerpc/setup.c Thu Feb 15 13:13:36 2007 -0700 45.2 +++ b/xen/arch/powerpc/setup.c Thu Feb 15 14:09:39 2007 -0700 45.3 @@ -36,6 +36,7 @@ 45.4 #include <xen/symbols.h> 45.5 #include <xen/keyhandler.h> 45.6 #include <xen/numa.h> 45.7 +#include <xen/rcupdate.h> 45.8 #include <acm/acm_hooks.h> 45.9 #include <public/version.h> 45.10 #include <asm/mpic.h> 45.11 @@ -166,9 +167,6 @@ static void __init start_of_day(void) 45.12 set_current(idle_domain->vcpu[0]); 45.13 idle_vcpu[0] = current; 45.14 45.15 - /* for some reason we need to set our own bit in the thread map */ 45.16 - cpu_set(0, cpu_sibling_map[0]); 45.17 - 45.18 initialize_keytable(); 45.19 /* Register another key that will allow for the the Harware Probe 45.20 * to be contacted, this works with RiscWatch probes and should 45.21 @@ -179,6 +177,7 @@ static void __init start_of_day(void) 45.22 register_keyhandler('D', key_ofdump , "Dump OF Devtree"); 45.23 45.24 timer_init(); 45.25 + rcu_init(); 45.26 serial_init_postirq(); 45.27 do_initcalls(); 45.28 } 45.29 @@ -234,6 +233,21 @@ static int kick_secondary_cpus(int maxcp 45.30 int cpuid; 45.31 45.32 for_each_present_cpu(cpuid) { 45.33 + int threads; 45.34 + int i; 45.35 + 45.36 + threads = cpu_threads(cpuid); 45.37 + for (i = 0; i < threads; i++) 45.38 + cpu_set(i, cpu_sibling_map[cpuid]); 45.39 + 45.40 + /* For now everything is single core */ 45.41 + cpu_set(cpuid, cpu_core_map[cpuid]); 45.42 + 45.43 + rcu_online_cpu(cpuid); 45.44 + 45.45 + numa_set_node(cpuid, 0); 45.46 + numa_add_cpu(cpuid); 45.47 + 45.48 if (cpuid == 0) 45.49 continue; 45.50 if (cpuid >= maxcpus) 45.51 @@ -244,9 +258,6 @@ static int kick_secondary_cpus(int maxcp 45.52 /* wait for it */ 45.53 while (!cpu_online(cpuid)) 45.54 cpu_relax(); 45.55 - 45.56 - numa_set_node(cpuid, 0); 45.57 - numa_add_cpu(cpuid); 45.58 } 45.59 45.60 return 0;
46.1 --- a/xen/arch/powerpc/sysctl.c Thu Feb 15 13:13:36 2007 -0700 46.2 +++ b/xen/arch/powerpc/sysctl.c Thu Feb 15 14:09:39 2007 -0700 46.3 @@ -41,9 +41,13 @@ long arch_do_sysctl(struct xen_sysctl *s 46.4 { 46.5 xen_sysctl_physinfo_t *pi = &sysctl->u.physinfo; 46.6 46.7 - pi->threads_per_core = 1; 46.8 - pi->cores_per_socket = 1; 46.9 - pi->sockets_per_node = 1; 46.10 + pi->threads_per_core = 46.11 + cpus_weight(cpu_sibling_map[0]); 46.12 + pi->cores_per_socket = 46.13 + cpus_weight(cpu_core_map[0]) / pi->threads_per_core; 46.14 + pi->sockets_per_node = 46.15 + num_online_cpus() / cpus_weight(cpu_core_map[0]); 46.16 + 46.17 pi->nr_nodes = 1; 46.18 pi->total_pages = total_pages; 46.19 pi->free_pages = avail_domheap_pages();
47.1 --- a/xen/arch/powerpc/time.c Thu Feb 15 13:13:36 2007 -0700 47.2 +++ b/xen/arch/powerpc/time.c Thu Feb 15 14:09:39 2007 -0700 47.3 @@ -85,12 +85,6 @@ void send_timer_event(struct vcpu *v) 47.4 vcpu_unblock(v); 47.5 } 47.6 47.7 -/* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */ 47.8 -void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base) 47.9 -{ 47.10 - unimplemented(); 47.11 -} 47.12 - 47.13 void update_vcpu_system_time(struct vcpu *v) 47.14 { 47.15 }
48.1 --- a/xen/arch/powerpc/xen.lds.S Thu Feb 15 13:13:36 2007 -0700 48.2 +++ b/xen/arch/powerpc/xen.lds.S Thu Feb 15 14:09:39 2007 -0700 48.3 @@ -17,6 +17,8 @@ PHDRS 48.4 SECTIONS 48.5 { 48.6 . = 0x00400000; 48.7 + PROVIDE(_text = .); 48.8 + PROVIDE(_stext = .); 48.9 /* Read-only sections, merged into text segment: */ 48.10 .interp : { *(.interp) } :text 48.11 .hash : { *(.hash) } 48.12 @@ -111,17 +113,26 @@ SECTIONS 48.13 SORT(CONSTRUCTORS) 48.14 } 48.15 48.16 + . = ALIGN(4096); 48.17 + __init_begin = .; 48.18 + _sinittext = .; 48.19 + .init.text : { *(.init.text) } : text 48.20 + _einittext = .; 48.21 + .init.data : { *(.init.data) } : text 48.22 . = ALIGN(32); 48.23 __setup_start = .; 48.24 - .init.setup : { *(.init.setup) } 48.25 + .init.setup : { *(.init.setup) } : text 48.26 __setup_end = .; 48.27 __initcall_start = .; 48.28 - .initcall.init : { *(.initcall1.init) } 48.29 + .initcall.init : { *(.initcall1.init) } : text 48.30 __initcall_end = .; 48.31 - __inithcall_start = .; 48.32 - .inithcall.text : { *(.inithcall.text) } 48.33 - __inithcall_end = .; 48.34 - 48.35 + __init_hcall_start = .; 48.36 + .init_hcall.init : { *(.init_hcall.init) } : text 48.37 + __init_hcall_end = .; 48.38 + __builtin_cmdline : { *(__builtin_cmdline) } : text 48.39 + . = ALIGN(4096); 48.40 + __init_end = .; 48.41 + 48.42 __per_cpu_start = .; 48.43 .data.percpu : { *(.data.percpu) } 48.44 __per_cpu_data_end = .;
49.1 --- a/xen/arch/x86/domain.c Thu Feb 15 13:13:36 2007 -0700 49.2 +++ b/xen/arch/x86/domain.c Thu Feb 15 14:09:39 2007 -0700 49.3 @@ -37,7 +37,7 @@ 49.4 #include <asm/i387.h> 49.5 #include <asm/mpspec.h> 49.6 #include <asm/ldt.h> 49.7 -#include <asm/shadow.h> 49.8 +#include <asm/paging.h> 49.9 #include <asm/hvm/hvm.h> 49.10 #include <asm/hvm/support.h> 49.11 #include <asm/msr.h> 49.12 @@ -331,6 +331,7 @@ int vcpu_initialise(struct vcpu *v) 49.13 49.14 pae_l3_cache_init(&v->arch.pae_l3_cache); 49.15 49.16 + paging_vcpu_init(v); 49.17 49.18 if ( is_hvm_domain(d) ) 49.19 { 49.20 @@ -424,7 +425,7 @@ int arch_domain_create(struct domain *d) 49.21 HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; 49.22 #endif 49.23 49.24 - shadow_domain_init(d); 49.25 + paging_domain_init(d); 49.26 49.27 if ( !is_idle_domain(d) ) 49.28 { 49.29 @@ -464,7 +465,7 @@ void arch_domain_destroy(struct domain * 49.30 hvm_domain_destroy(d); 49.31 } 49.32 49.33 - shadow_final_teardown(d); 49.34 + paging_final_teardown(d); 49.35 49.36 free_xenheap_pages( 49.37 d->arch.mm_perdomain_pt, 49.38 @@ -613,7 +614,7 @@ int arch_set_info_guest( 49.39 { 49.40 cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[3])); 49.41 49.42 - if ( shadow_mode_refcounts(d) 49.43 + if ( paging_mode_refcounts(d) 49.44 ? !get_page(mfn_to_page(cr3_pfn), d) 49.45 : !get_page_and_type(mfn_to_page(cr3_pfn), d, 49.46 PGT_base_page_table) ) 49.47 @@ -631,7 +632,7 @@ int arch_set_info_guest( 49.48 49.49 cr3_pfn = gmfn_to_mfn(d, compat_cr3_to_pfn(c.cmp->ctrlreg[3])); 49.50 49.51 - if ( shadow_mode_refcounts(d) 49.52 + if ( paging_mode_refcounts(d) 49.53 ? !get_page(mfn_to_page(cr3_pfn), d) 49.54 : !get_page_and_type(mfn_to_page(cr3_pfn), d, 49.55 PGT_l3_page_table) ) 49.56 @@ -652,8 +653,8 @@ int arch_set_info_guest( 49.57 /* Don't redo final setup */ 49.58 set_bit(_VCPUF_initialised, &v->vcpu_flags); 49.59 49.60 - if ( shadow_mode_enabled(d) ) 49.61 - shadow_update_paging_modes(v); 49.62 + if ( paging_mode_enabled(d) ) 49.63 + paging_update_paging_modes(v); 49.64 49.65 update_cr3(v); 49.66 49.67 @@ -1406,7 +1407,7 @@ static void vcpu_destroy_pagetables(stru 49.68 49.69 if ( pfn != 0 ) 49.70 { 49.71 - if ( shadow_mode_refcounts(d) ) 49.72 + if ( paging_mode_refcounts(d) ) 49.73 put_page(mfn_to_page(pfn)); 49.74 else 49.75 put_page_and_type(mfn_to_page(pfn)); 49.76 @@ -1427,7 +1428,7 @@ static void vcpu_destroy_pagetables(stru 49.77 pfn = pagetable_get_pfn(v->arch.guest_table); 49.78 if ( pfn != 0 ) 49.79 { 49.80 - if ( shadow_mode_refcounts(d) ) 49.81 + if ( paging_mode_refcounts(d) ) 49.82 put_page(mfn_to_page(pfn)); 49.83 else 49.84 put_page_and_type(mfn_to_page(pfn)); 49.85 @@ -1443,7 +1444,7 @@ static void vcpu_destroy_pagetables(stru 49.86 pfn = pagetable_get_pfn(v->arch.guest_table_user); 49.87 if ( pfn != 0 ) 49.88 { 49.89 - if ( shadow_mode_refcounts(d) ) 49.90 + if ( paging_mode_refcounts(d) ) 49.91 put_page(mfn_to_page(pfn)); 49.92 else 49.93 put_page_and_type(mfn_to_page(pfn)); 49.94 @@ -1464,8 +1465,8 @@ void domain_relinquish_resources(struct 49.95 for_each_vcpu ( d, v ) 49.96 vcpu_destroy_pagetables(v); 49.97 49.98 - /* Tear down shadow mode stuff. */ 49.99 - shadow_teardown(d); 49.100 + /* Tear down paging-assistance stuff. */ 49.101 + paging_teardown(d); 49.102 49.103 /* 49.104 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as 49.105 @@ -1484,35 +1485,12 @@ void domain_relinquish_resources(struct 49.106 49.107 void arch_dump_domain_info(struct domain *d) 49.108 { 49.109 - if ( shadow_mode_enabled(d) ) 49.110 - { 49.111 - printk(" shadow mode: "); 49.112 - if ( d->arch.shadow.mode & SHM2_enable ) 49.113 - printk("enabled "); 49.114 - if ( shadow_mode_refcounts(d) ) 49.115 - printk("refcounts "); 49.116 - if ( shadow_mode_log_dirty(d) ) 49.117 - printk("log_dirty "); 49.118 - if ( shadow_mode_translate(d) ) 49.119 - printk("translate "); 49.120 - if ( shadow_mode_external(d) ) 49.121 - printk("external "); 49.122 - printk("\n"); 49.123 - } 49.124 + paging_dump_domain_info(d); 49.125 } 49.126 49.127 void arch_dump_vcpu_info(struct vcpu *v) 49.128 { 49.129 - if ( shadow_mode_enabled(v->domain) ) 49.130 - { 49.131 - if ( v->arch.shadow.mode ) 49.132 - printk(" shadowed %u-on-%u, %stranslated\n", 49.133 - v->arch.shadow.mode->guest_levels, 49.134 - v->arch.shadow.mode->shadow_levels, 49.135 - shadow_vcpu_mode_translate(v) ? "" : "not "); 49.136 - else 49.137 - printk(" not shadowed\n"); 49.138 - } 49.139 + paging_dump_vcpu_info(v); 49.140 } 49.141 49.142 /*
50.1 --- a/xen/arch/x86/domain_build.c Thu Feb 15 13:13:36 2007 -0700 50.2 +++ b/xen/arch/x86/domain_build.c Thu Feb 15 14:09:39 2007 -0700 50.3 @@ -25,7 +25,7 @@ 50.4 #include <asm/processor.h> 50.5 #include <asm/desc.h> 50.6 #include <asm/i387.h> 50.7 -#include <asm/shadow.h> 50.8 +#include <asm/paging.h> 50.9 50.10 #include <public/version.h> 50.11 #include <public/libelf.h> 50.12 @@ -777,8 +777,8 @@ int construct_dom0(struct domain *d, 50.13 (void)alloc_vcpu(d, i, i); 50.14 50.15 /* Set up CR3 value for write_ptbase */ 50.16 - if ( shadow_mode_enabled(v->domain) ) 50.17 - shadow_update_paging_modes(v); 50.18 + if ( paging_mode_enabled(v->domain) ) 50.19 + paging_update_paging_modes(v); 50.20 else 50.21 update_cr3(v); 50.22 50.23 @@ -918,8 +918,8 @@ int construct_dom0(struct domain *d, 50.24 regs->eflags = X86_EFLAGS_IF; 50.25 50.26 if ( opt_dom0_shadow ) 50.27 - if ( shadow_enable(d, SHM2_enable) == 0 ) 50.28 - shadow_update_paging_modes(v); 50.29 + if ( paging_enable(d, PG_SH_enable) == 0 ) 50.30 + paging_update_paging_modes(v); 50.31 50.32 if ( supervisor_mode_kernel ) 50.33 {
51.1 --- a/xen/arch/x86/domctl.c Thu Feb 15 13:13:36 2007 -0700 51.2 +++ b/xen/arch/x86/domctl.c Thu Feb 15 14:09:39 2007 -0700 51.3 @@ -19,7 +19,7 @@ 51.4 #include <xen/trace.h> 51.5 #include <xen/console.h> 51.6 #include <xen/iocap.h> 51.7 -#include <asm/shadow.h> 51.8 +#include <asm/paging.h> 51.9 #include <asm/irq.h> 51.10 #include <asm/hvm/hvm.h> 51.11 #include <asm/hvm/support.h> 51.12 @@ -42,7 +42,7 @@ long arch_do_domctl( 51.13 d = get_domain_by_id(domctl->domain); 51.14 if ( d != NULL ) 51.15 { 51.16 - ret = shadow_domctl(d, 51.17 + ret = paging_domctl(d, 51.18 &domctl->u.shadow_op, 51.19 guest_handle_cast(u_domctl, void)); 51.20 put_domain(d); 51.21 @@ -398,6 +398,7 @@ long arch_do_domctl( 51.22 51.23 put_domain(d); 51.24 } 51.25 + break; 51.26 51.27 case XEN_DOMCTL_get_address_size: 51.28 { 51.29 @@ -411,7 +412,11 @@ long arch_do_domctl( 51.30 51.31 ret = 0; 51.32 put_domain(d); 51.33 + 51.34 + if ( copy_to_guest(u_domctl, domctl, 1) ) 51.35 + ret = -EFAULT; 51.36 } 51.37 + break; 51.38 51.39 default: 51.40 ret = -ENOSYS;
52.1 --- a/xen/arch/x86/hvm/hvm.c Thu Feb 15 13:13:36 2007 -0700 52.2 +++ b/xen/arch/x86/hvm/hvm.c Thu Feb 15 14:09:39 2007 -0700 52.3 @@ -30,11 +30,10 @@ 52.4 #include <xen/hypercall.h> 52.5 #include <xen/guest_access.h> 52.6 #include <xen/event.h> 52.7 -#include <xen/shadow.h> 52.8 #include <asm/current.h> 52.9 #include <asm/e820.h> 52.10 #include <asm/io.h> 52.11 -#include <asm/shadow.h> 52.12 +#include <asm/paging.h> 52.13 #include <asm/regs.h> 52.14 #include <asm/cpufeature.h> 52.15 #include <asm/processor.h> 52.16 @@ -155,7 +154,7 @@ int hvm_domain_initialise(struct domain 52.17 spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); 52.18 spin_lock_init(&d->arch.hvm_domain.irq_lock); 52.19 52.20 - rc = shadow_enable(d, SHM2_refcounts|SHM2_translate|SHM2_external); 52.21 + rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); 52.22 if ( rc != 0 ) 52.23 return rc; 52.24 52.25 @@ -383,7 +382,7 @@ static int __hvm_copy(void *buf, paddr_t 52.26 count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo); 52.27 52.28 if ( virt ) 52.29 - mfn = get_mfn_from_gpfn(shadow_gva_to_gfn(current, addr)); 52.30 + mfn = get_mfn_from_gpfn(paging_gva_to_gfn(current, addr)); 52.31 else 52.32 mfn = get_mfn_from_gpfn(addr >> PAGE_SHIFT); 52.33 52.34 @@ -600,7 +599,7 @@ void hvm_do_hypercall(struct cpu_user_re 52.35 return; 52.36 } 52.37 52.38 - if ( current->arch.shadow.mode->guest_levels == 4 ) 52.39 + if ( current->arch.paging.mode->guest_levels == 4 ) 52.40 { 52.41 pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi, 52.42 pregs->rsi,
53.1 --- a/xen/arch/x86/hvm/io.c Thu Feb 15 13:13:36 2007 -0700 53.2 +++ b/xen/arch/x86/hvm/io.c Thu Feb 15 14:09:39 2007 -0700 53.3 @@ -32,7 +32,7 @@ 53.4 #include <asm/processor.h> 53.5 #include <asm/msr.h> 53.6 #include <asm/apic.h> 53.7 -#include <asm/shadow.h> 53.8 +#include <asm/paging.h> 53.9 #include <asm/hvm/hvm.h> 53.10 #include <asm/hvm/support.h> 53.11 #include <asm/hvm/vpt.h>
54.1 --- a/xen/arch/x86/hvm/platform.c Thu Feb 15 13:13:36 2007 -0700 54.2 +++ b/xen/arch/x86/hvm/platform.c Thu Feb 15 14:09:39 2007 -0700 54.3 @@ -21,7 +21,6 @@ 54.4 #include <xen/config.h> 54.5 #include <xen/types.h> 54.6 #include <xen/mm.h> 54.7 -#include <xen/shadow.h> 54.8 #include <xen/domain_page.h> 54.9 #include <asm/page.h> 54.10 #include <xen/event.h> 54.11 @@ -29,6 +28,7 @@ 54.12 #include <xen/sched.h> 54.13 #include <asm/regs.h> 54.14 #include <asm/x86_emulate.h> 54.15 +#include <asm/paging.h> 54.16 #include <asm/hvm/hvm.h> 54.17 #include <asm/hvm/support.h> 54.18 #include <asm/hvm/io.h> 54.19 @@ -690,6 +690,39 @@ static int mmio_decode(int address_bytes 54.20 } else 54.21 return DECODE_failure; 54.22 54.23 + case 0xFE: 54.24 + case 0xFF: 54.25 + { 54.26 + unsigned char ins_subtype = (opcode[1] >> 3) & 7; 54.27 + 54.28 + if ( opcode[0] == 0xFE ) { 54.29 + *op_size = BYTE; 54.30 + GET_OP_SIZE_FOR_BYTE(size_reg); 54.31 + } else { 54.32 + GET_OP_SIZE_FOR_NONEBYTE(*op_size); 54.33 + size_reg = *op_size; 54.34 + } 54.35 + 54.36 + mmio_op->immediate = 1; 54.37 + mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE); 54.38 + mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY); 54.39 + 54.40 + switch ( ins_subtype ) { 54.41 + case 0: /* inc */ 54.42 + mmio_op->instr = INSTR_ADD; 54.43 + return DECODE_success; 54.44 + 54.45 + case 1: /* dec */ 54.46 + mmio_op->instr = INSTR_SUB; 54.47 + return DECODE_success; 54.48 + 54.49 + default: 54.50 + printk("%x/%x, This opcode isn't handled yet!\n", 54.51 + *opcode, ins_subtype); 54.52 + return DECODE_failure; 54.53 + } 54.54 + } 54.55 + 54.56 case 0x0F: 54.57 break; 54.58 54.59 @@ -809,7 +842,7 @@ void send_pio_req(unsigned long port, un 54.60 if ( value_is_ptr ) /* get physical address of data */ 54.61 { 54.62 if ( hvm_paging_enabled(current) ) 54.63 - p->data = shadow_gva_to_gpa(current, value); 54.64 + p->data = paging_gva_to_gpa(current, value); 54.65 else 54.66 p->data = value; /* guest VA == guest PA */ 54.67 } 54.68 @@ -865,7 +898,7 @@ static void send_mmio_req(unsigned char 54.69 if ( value_is_ptr ) 54.70 { 54.71 if ( hvm_paging_enabled(v) ) 54.72 - p->data = shadow_gva_to_gpa(v, value); 54.73 + p->data = paging_gva_to_gpa(v, value); 54.74 else 54.75 p->data = value; /* guest VA == guest PA */ 54.76 } 54.77 @@ -981,7 +1014,7 @@ void handle_mmio(unsigned long gpa) 54.78 if ( ad_size == WORD ) 54.79 addr &= 0xFFFF; 54.80 addr += hvm_get_segment_base(v, x86_seg_es); 54.81 - if ( shadow_gva_to_gpa(v, addr) == gpa ) 54.82 + if ( paging_gva_to_gpa(v, addr) == gpa ) 54.83 { 54.84 enum x86_segment seg; 54.85
55.1 --- a/xen/arch/x86/hvm/svm/intr.c Thu Feb 15 13:13:36 2007 -0700 55.2 +++ b/xen/arch/x86/hvm/svm/intr.c Thu Feb 15 14:09:39 2007 -0700 55.3 @@ -24,10 +24,10 @@ 55.4 #include <xen/lib.h> 55.5 #include <xen/trace.h> 55.6 #include <xen/errno.h> 55.7 -#include <xen/shadow.h> 55.8 #include <asm/cpufeature.h> 55.9 #include <asm/processor.h> 55.10 #include <asm/msr.h> 55.11 +#include <asm/paging.h> 55.12 #include <asm/hvm/hvm.h> 55.13 #include <asm/hvm/io.h> 55.14 #include <asm/hvm/support.h>
56.1 --- a/xen/arch/x86/hvm/svm/svm.c Thu Feb 15 13:13:36 2007 -0700 56.2 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Feb 15 14:09:39 2007 -0700 56.3 @@ -29,7 +29,8 @@ 56.4 #include <xen/domain_page.h> 56.5 #include <asm/current.h> 56.6 #include <asm/io.h> 56.7 -#include <asm/shadow.h> 56.8 +#include <asm/paging.h> 56.9 +#include <asm/p2m.h> 56.10 #include <asm/regs.h> 56.11 #include <asm/cpufeature.h> 56.12 #include <asm/processor.h> 56.13 @@ -491,9 +492,6 @@ int svm_vmcb_restore(struct vcpu *v, str 56.14 v->arch.guest_table = pagetable_from_pfn(mfn); 56.15 if (old_base_mfn) 56.16 put_page(mfn_to_page(old_base_mfn)); 56.17 - /* 56.18 - * arch.shadow_table should now hold the next CR3 for shadow 56.19 - */ 56.20 v->arch.hvm_svm.cpu_cr3 = c->cr3; 56.21 } 56.22 56.23 @@ -560,7 +558,7 @@ int svm_vmcb_restore(struct vcpu *v, str 56.24 vmcb->sysenter_esp = c->sysenter_esp; 56.25 vmcb->sysenter_eip = c->sysenter_eip; 56.26 56.27 - shadow_update_paging_modes(v); 56.28 + paging_update_paging_modes(v); 56.29 return 0; 56.30 56.31 bad_cr3: 56.32 @@ -1095,7 +1093,7 @@ static int svm_do_page_fault(unsigned lo 56.33 "svm_do_page_fault = 0x%lx, eip = %lx, error_code = %lx", 56.34 va, (unsigned long)current->arch.hvm_svm.vmcb->rip, 56.35 (unsigned long)regs->error_code); 56.36 - return shadow_fault(va, regs); 56.37 + return paging_fault(va, regs); 56.38 } 56.39 56.40 56.41 @@ -1730,7 +1728,7 @@ static int svm_set_cr0(unsigned long val 56.42 v->arch.guest_table = pagetable_from_pfn(mfn); 56.43 if ( old_base_mfn ) 56.44 put_page(mfn_to_page(old_base_mfn)); 56.45 - shadow_update_paging_modes(v); 56.46 + paging_update_paging_modes(v); 56.47 56.48 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 56.49 (unsigned long) (mfn << PAGE_SHIFT)); 56.50 @@ -1753,7 +1751,7 @@ static int svm_set_cr0(unsigned long val 56.51 svm_inject_exception(v, TRAP_gp_fault, 1, 0); 56.52 return 0; 56.53 } 56.54 - shadow_update_paging_modes(v); 56.55 + paging_update_paging_modes(v); 56.56 } 56.57 else if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) 56.58 { 56.59 @@ -1763,7 +1761,7 @@ static int svm_set_cr0(unsigned long val 56.60 clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); 56.61 } 56.62 /* we should take care of this kind of situation */ 56.63 - shadow_update_paging_modes(v); 56.64 + paging_update_paging_modes(v); 56.65 } 56.66 56.67 return 1; 56.68 @@ -1866,7 +1864,7 @@ static int mov_to_cr(int gpreg, int cr, 56.69 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 56.70 if (mfn != pagetable_get_pfn(v->arch.guest_table)) 56.71 goto bad_cr3; 56.72 - shadow_update_cr3(v); 56.73 + paging_update_cr3(v); 56.74 } 56.75 else 56.76 { 56.77 @@ -1917,7 +1915,7 @@ static int mov_to_cr(int gpreg, int cr, 56.78 v->arch.guest_table = pagetable_from_pfn(mfn); 56.79 if ( old_base_mfn ) 56.80 put_page(mfn_to_page(old_base_mfn)); 56.81 - shadow_update_paging_modes(v); 56.82 + paging_update_paging_modes(v); 56.83 56.84 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 56.85 (unsigned long) (mfn << PAGE_SHIFT)); 56.86 @@ -1946,7 +1944,7 @@ static int mov_to_cr(int gpreg, int cr, 56.87 * all TLB entries except global entries. 56.88 */ 56.89 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) 56.90 - shadow_update_paging_modes(v); 56.91 + paging_update_paging_modes(v); 56.92 break; 56.93 56.94 case 8: 56.95 @@ -2289,7 +2287,7 @@ void svm_handle_invlpg(const short invlp 56.96 __update_guest_eip (vmcb, inst_len); 56.97 } 56.98 56.99 - shadow_invlpg(v, g_vaddr); 56.100 + paging_invlpg(v, g_vaddr); 56.101 } 56.102 56.103 56.104 @@ -2660,7 +2658,7 @@ void walk_shadow_and_guest_pt(unsigned l 56.105 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 56.106 paddr_t gpa; 56.107 56.108 - gpa = shadow_gva_to_gpa(current, gva); 56.109 + gpa = paging_gva_to_gpa(current, gva); 56.110 printk("gva = %lx, gpa=%"PRIpaddr", gCR3=%x\n", gva, gpa, (u32)vmcb->cr3); 56.111 if( !svm_paging_enabled(v) || mmio_space(gpa) ) 56.112 return; 56.113 @@ -2681,7 +2679,7 @@ void walk_shadow_and_guest_pt(unsigned l 56.114 shadow_sync_va(v, gva); 56.115 56.116 gpte.l1 = 0; 56.117 - __copy_from_user(&gpte, &linear_pg_table[ l1_linear_offset(gva) ], 56.118 + __copy_from_user(&gpte, &__linear_l1_table[ l1_linear_offset(gva) ], 56.119 sizeof(gpte) ); 56.120 printk( "G-PTE = %x, flags=%x\n", gpte.l1, l1e_get_flags(gpte) ); 56.121 56.122 @@ -2726,7 +2724,7 @@ asmlinkage void svm_vmexit_handler(struc 56.123 if (svm_dbg_on && exit_reason == VMEXIT_EXCEPTION_PF) 56.124 { 56.125 if (svm_paging_enabled(v) && 56.126 - !mmio_space(shadow_gva_to_gpa(current, vmcb->exitinfo2))) 56.127 + !mmio_space(paging_gva_to_gpa(current, vmcb->exitinfo2))) 56.128 { 56.129 printk("I%08ld,ExC=%s(%d),IP=%x:%"PRIx64"," 56.130 "I1=%"PRIx64",I2=%"PRIx64",INT=%"PRIx64", " 56.131 @@ -2736,7 +2734,7 @@ asmlinkage void svm_vmexit_handler(struc 56.132 (u64)vmcb->exitinfo1, 56.133 (u64)vmcb->exitinfo2, 56.134 (u64)vmcb->exitintinfo.bytes, 56.135 - (u64)shadow_gva_to_gpa(current, vmcb->exitinfo2)); 56.136 + (u64)paging_gva_to_gpa(current, vmcb->exitinfo2)); 56.137 } 56.138 else 56.139 {
57.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Thu Feb 15 13:13:36 2007 -0700 57.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Feb 15 14:09:39 2007 -0700 57.3 @@ -23,10 +23,10 @@ 57.4 #include <xen/mm.h> 57.5 #include <xen/lib.h> 57.6 #include <xen/errno.h> 57.7 -#include <xen/shadow.h> 57.8 #include <asm/cpufeature.h> 57.9 #include <asm/processor.h> 57.10 #include <asm/msr.h> 57.11 +#include <asm/paging.h> 57.12 #include <asm/hvm/hvm.h> 57.13 #include <asm/hvm/io.h> 57.14 #include <asm/hvm/support.h> 57.15 @@ -196,7 +196,7 @@ static int construct_vmcb(struct vcpu *v 57.16 read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE); 57.17 vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK; 57.18 57.19 - shadow_update_paging_modes(v); 57.20 + paging_update_paging_modes(v); 57.21 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 57.22 57.23 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
58.1 --- a/xen/arch/x86/hvm/vlapic.c Thu Feb 15 13:13:36 2007 -0700 58.2 +++ b/xen/arch/x86/hvm/vlapic.c Thu Feb 15 14:09:39 2007 -0700 58.3 @@ -22,7 +22,6 @@ 58.4 #include <xen/types.h> 58.5 #include <xen/mm.h> 58.6 #include <xen/xmalloc.h> 58.7 -#include <xen/shadow.h> 58.8 #include <xen/domain_page.h> 58.9 #include <asm/page.h> 58.10 #include <xen/event.h>
59.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c Thu Feb 15 13:13:36 2007 -0700 59.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c Thu Feb 15 14:09:39 2007 -0700 59.3 @@ -448,7 +448,7 @@ static void construct_vmcs(struct vcpu * 59.4 59.5 vmx_vmcs_exit(v); 59.6 59.7 - shadow_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ 59.8 + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ 59.9 } 59.10 59.11 int vmx_create_vmcs(struct vcpu *v)
60.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 15 13:13:36 2007 -0700 60.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Thu Feb 15 14:09:39 2007 -0700 60.3 @@ -35,12 +35,13 @@ 60.4 #include <asm/types.h> 60.5 #include <asm/msr.h> 60.6 #include <asm/spinlock.h> 60.7 +#include <asm/paging.h> 60.8 +#include <asm/p2m.h> 60.9 #include <asm/hvm/hvm.h> 60.10 #include <asm/hvm/support.h> 60.11 #include <asm/hvm/vmx/vmx.h> 60.12 #include <asm/hvm/vmx/vmcs.h> 60.13 #include <asm/hvm/vmx/cpu.h> 60.14 -#include <asm/shadow.h> 60.15 #include <public/sched.h> 60.16 #include <public/hvm/ioreq.h> 60.17 #include <asm/hvm/vpic.h> 60.18 @@ -484,9 +485,6 @@ int vmx_vmcs_restore(struct vcpu *v, str 60.19 v->arch.guest_table = pagetable_from_pfn(mfn); 60.20 if (old_base_mfn) 60.21 put_page(mfn_to_page(old_base_mfn)); 60.22 - /* 60.23 - * arch.shadow_table should now hold the next CR3 for shadow 60.24 - */ 60.25 v->arch.hvm_vmx.cpu_cr3 = c->cr3; 60.26 } 60.27 60.28 @@ -556,7 +554,7 @@ int vmx_vmcs_restore(struct vcpu *v, str 60.29 60.30 vmx_vmcs_exit(v); 60.31 60.32 - shadow_update_paging_modes(v); 60.33 + paging_update_paging_modes(v); 60.34 return 0; 60.35 60.36 bad_cr3: 60.37 @@ -1126,7 +1124,7 @@ static int vmx_do_page_fault(unsigned lo 60.38 } 60.39 #endif 60.40 60.41 - result = shadow_fault(va, regs); 60.42 + result = paging_fault(va, regs); 60.43 60.44 TRACE_VMEXIT(2, result); 60.45 #if 0 60.46 @@ -1277,7 +1275,7 @@ static void vmx_do_invlpg(unsigned long 60.47 * We do the safest things first, then try to update the shadow 60.48 * copying from guest 60.49 */ 60.50 - shadow_invlpg(v, va); 60.51 + paging_invlpg(v, va); 60.52 } 60.53 60.54 60.55 @@ -1691,9 +1689,6 @@ static int vmx_world_restore(struct vcpu 60.56 v->arch.guest_table = pagetable_from_pfn(mfn); 60.57 if (old_base_mfn) 60.58 put_page(mfn_to_page(old_base_mfn)); 60.59 - /* 60.60 - * arch.shadow_table should now hold the next CR3 for shadow 60.61 - */ 60.62 v->arch.hvm_vmx.cpu_cr3 = c->cr3; 60.63 } 60.64 60.65 @@ -1753,7 +1748,7 @@ static int vmx_world_restore(struct vcpu 60.66 __vmwrite(GUEST_LDTR_BASE, c->ldtr_base); 60.67 __vmwrite(GUEST_LDTR_AR_BYTES, c->ldtr_arbytes.bytes); 60.68 60.69 - shadow_update_paging_modes(v); 60.70 + paging_update_paging_modes(v); 60.71 return 0; 60.72 60.73 bad_cr3: 60.74 @@ -1906,14 +1901,11 @@ static int vmx_set_cr0(unsigned long val 60.75 v->arch.guest_table = pagetable_from_pfn(mfn); 60.76 if (old_base_mfn) 60.77 put_page(mfn_to_page(old_base_mfn)); 60.78 - shadow_update_paging_modes(v); 60.79 + paging_update_paging_modes(v); 60.80 60.81 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 60.82 (unsigned long) (mfn << PAGE_SHIFT)); 60.83 60.84 - /* 60.85 - * arch->shadow_table should hold the next CR3 for shadow 60.86 - */ 60.87 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 60.88 v->arch.hvm_vmx.cpu_cr3, mfn); 60.89 } 60.90 @@ -1981,7 +1973,7 @@ static int vmx_set_cr0(unsigned long val 60.91 vm_entry_value &= ~VM_ENTRY_IA32E_MODE; 60.92 __vmwrite(VM_ENTRY_CONTROLS, vm_entry_value); 60.93 } 60.94 - shadow_update_paging_modes(v); 60.95 + paging_update_paging_modes(v); 60.96 } 60.97 60.98 return 1; 60.99 @@ -2070,7 +2062,7 @@ static int mov_to_cr(int gp, int cr, str 60.100 mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 60.101 if (mfn != pagetable_get_pfn(v->arch.guest_table)) 60.102 goto bad_cr3; 60.103 - shadow_update_cr3(v); 60.104 + paging_update_cr3(v); 60.105 } else { 60.106 /* 60.107 * If different, make a shadow. Check if the PDBR is valid 60.108 @@ -2084,9 +2076,6 @@ static int mov_to_cr(int gp, int cr, str 60.109 v->arch.guest_table = pagetable_from_pfn(mfn); 60.110 if (old_base_mfn) 60.111 put_page(mfn_to_page(old_base_mfn)); 60.112 - /* 60.113 - * arch.shadow_table should now hold the next CR3 for shadow 60.114 - */ 60.115 v->arch.hvm_vmx.cpu_cr3 = value; 60.116 update_cr3(v); 60.117 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx", value); 60.118 @@ -2120,9 +2109,6 @@ static int mov_to_cr(int gp, int cr, str 60.119 HVM_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 60.120 (unsigned long) (mfn << PAGE_SHIFT)); 60.121 60.122 - /* 60.123 - * arch->shadow_table should hold the next CR3 for shadow 60.124 - */ 60.125 HVM_DBG_LOG(DBG_LEVEL_VMMU, 60.126 "Update CR3 value = %lx, mfn = %lx", 60.127 v->arch.hvm_vmx.cpu_cr3, mfn); 60.128 @@ -2148,7 +2134,7 @@ static int mov_to_cr(int gp, int cr, str 60.129 * all TLB entries except global entries. 60.130 */ 60.131 if ( (old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE) ) 60.132 - shadow_update_paging_modes(v); 60.133 + paging_update_paging_modes(v); 60.134 break; 60.135 60.136 case 8:
61.1 --- a/xen/arch/x86/mm.c Thu Feb 15 13:13:36 2007 -0700 61.2 +++ b/xen/arch/x86/mm.c Thu Feb 15 14:09:39 2007 -0700 61.3 @@ -99,6 +99,7 @@ 61.4 #include <xen/event.h> 61.5 #include <xen/iocap.h> 61.6 #include <xen/guest_access.h> 61.7 +#include <asm/paging.h> 61.8 #include <asm/shadow.h> 61.9 #include <asm/page.h> 61.10 #include <asm/flushtlb.h> 61.11 @@ -373,9 +374,6 @@ void write_ptbase(struct vcpu *v) 61.12 /* Should be called after CR3 is updated. 61.13 * Updates vcpu->arch.cr3 and, for HVM guests, vcpu->arch.hvm_vcpu.cpu_cr3. 61.14 * 61.15 - * Also updates other state derived from CR3 (vcpu->arch.guest_vtable, 61.16 - * shadow_vtable, etc). 61.17 - * 61.18 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and 61.19 * for HVM guests, arch.monitor_table and hvm's guest CR3. 61.20 * 61.21 @@ -385,9 +383,9 @@ void update_cr3(struct vcpu *v) 61.22 { 61.23 unsigned long cr3_mfn=0; 61.24 61.25 - if ( shadow_mode_enabled(v->domain) ) 61.26 + if ( paging_mode_enabled(v->domain) ) 61.27 { 61.28 - shadow_update_cr3(v); 61.29 + paging_update_cr3(v); 61.30 return; 61.31 } 61.32 61.33 @@ -615,7 +613,7 @@ get_page_from_l1e( 61.34 * qemu-dm helper process in dom0 to map the domain's memory without 61.35 * messing up the count of "real" writable mappings.) */ 61.36 okay = (((l1e_get_flags(l1e) & _PAGE_RW) && 61.37 - !(unlikely(shadow_mode_external(d) && (d != current->domain)))) 61.38 + !(unlikely(paging_mode_external(d) && (d != current->domain)))) 61.39 ? get_page_and_type(page, d, PGT_writable_page) 61.40 : get_page(page, d)); 61.41 if ( !okay ) 61.42 @@ -804,9 +802,9 @@ void put_page_from_l1e(l1_pgentry_t l1e, 61.43 } 61.44 61.45 /* Remember we didn't take a type-count of foreign writable mappings 61.46 - * to shadow external domains */ 61.47 + * to paging-external domains */ 61.48 if ( (l1e_get_flags(l1e) & _PAGE_RW) && 61.49 - !(unlikely((e != d) && shadow_mode_external(e))) ) 61.50 + !(unlikely((e != d) && paging_mode_external(e))) ) 61.51 { 61.52 put_page_and_type(page); 61.53 } 61.54 @@ -976,6 +974,19 @@ static void pae_flush_pgd( 61.55 l3_pgentry_t *l3tab_ptr; 61.56 struct pae_l3_cache *cache; 61.57 61.58 + if ( unlikely(shadow_mode_enabled(d)) ) 61.59 + { 61.60 + cpumask_t m = CPU_MASK_NONE; 61.61 + /* Re-shadow this l3 table on any vcpus that are using it */ 61.62 + for_each_vcpu ( d, v ) 61.63 + if ( pagetable_get_pfn(v->arch.guest_table) == mfn ) 61.64 + { 61.65 + paging_update_cr3(v); 61.66 + cpus_or(m, m, v->vcpu_dirty_cpumask); 61.67 + } 61.68 + flush_tlb_mask(m); 61.69 + } 61.70 + 61.71 /* If below 4GB then the pgdir is not shadowed in low memory. */ 61.72 if ( !l3tab_needs_shadow(mfn) ) 61.73 return; 61.74 @@ -1259,20 +1270,13 @@ static inline int update_intpte(intpte_t 61.75 { 61.76 int rv = 1; 61.77 #ifndef PTE_UPDATE_WITH_CMPXCHG 61.78 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 61.79 - rv = shadow_write_guest_entry(v, p, new, _mfn(mfn)); 61.80 - else 61.81 - rv = (!__copy_to_user(p, &new, sizeof(new))); 61.82 + rv = paging_write_guest_entry(v, p, new, _mfn(mfn)); 61.83 #else 61.84 { 61.85 intpte_t t = old; 61.86 for ( ; ; ) 61.87 { 61.88 - if ( unlikely(shadow_mode_enabled(v->domain)) ) 61.89 - rv = shadow_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); 61.90 - else 61.91 - rv = (!cmpxchg_user(p, t, new)); 61.92 - 61.93 + rv = paging_cmpxchg_guest_entry(v, p, &t, new, _mfn(mfn)); 61.94 if ( unlikely(rv == 0) ) 61.95 { 61.96 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte 61.97 @@ -1310,7 +1314,7 @@ static int mod_l1_entry(l1_pgentry_t *pl 61.98 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) ) 61.99 return 0; 61.100 61.101 - if ( unlikely(shadow_mode_refcounts(d)) ) 61.102 + if ( unlikely(paging_mode_refcounts(d)) ) 61.103 return UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, current); 61.104 61.105 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT ) 61.106 @@ -1572,7 +1576,7 @@ void free_page_type(struct page_info *pa 61.107 */ 61.108 queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS); 61.109 61.110 - if ( unlikely(shadow_mode_enabled(owner)) ) 61.111 + if ( unlikely(paging_mode_enabled(owner)) ) 61.112 { 61.113 /* A page table is dirtied when its type count becomes zero. */ 61.114 mark_dirty(owner, page_to_mfn(page)); 61.115 @@ -1771,7 +1775,7 @@ int new_guest_cr3(unsigned long mfn) 61.116 #ifdef CONFIG_COMPAT 61.117 if ( IS_COMPAT(d) ) 61.118 { 61.119 - okay = shadow_mode_refcounts(d) 61.120 + okay = paging_mode_refcounts(d) 61.121 ? 0 /* Old code was broken, but what should it be? */ 61.122 : mod_l4_entry(__va(pagetable_get_paddr(v->arch.guest_table)), 61.123 l4e_from_pfn(mfn, (_PAGE_PRESENT|_PAGE_RW| 61.124 @@ -1788,7 +1792,7 @@ int new_guest_cr3(unsigned long mfn) 61.125 return 1; 61.126 } 61.127 #endif 61.128 - okay = shadow_mode_refcounts(d) 61.129 + okay = paging_mode_refcounts(d) 61.130 ? get_page_from_pagenr(mfn, d) 61.131 : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); 61.132 if ( unlikely(!okay) ) 61.133 @@ -1808,7 +1812,7 @@ int new_guest_cr3(unsigned long mfn) 61.134 61.135 if ( likely(old_base_mfn != 0) ) 61.136 { 61.137 - if ( shadow_mode_refcounts(d) ) 61.138 + if ( paging_mode_refcounts(d) ) 61.139 put_page(mfn_to_page(old_base_mfn)); 61.140 else 61.141 put_page_and_type(mfn_to_page(old_base_mfn)); 61.142 @@ -1861,7 +1865,7 @@ static int set_foreigndom(domid_t domid) 61.143 d->domain_id); 61.144 okay = 0; 61.145 } 61.146 - else if ( unlikely(shadow_mode_translate(d)) ) 61.147 + else if ( unlikely(paging_mode_translate(d)) ) 61.148 { 61.149 MEM_LOG("Cannot mix foreign mappings with translated domains"); 61.150 okay = 0; 61.151 @@ -2007,7 +2011,7 @@ int do_mmuext_op( 61.152 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) ) 61.153 break; 61.154 61.155 - if ( shadow_mode_refcounts(FOREIGNDOM) ) 61.156 + if ( paging_mode_refcounts(FOREIGNDOM) ) 61.157 break; 61.158 61.159 okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); 61.160 @@ -2032,7 +2036,7 @@ int do_mmuext_op( 61.161 break; 61.162 61.163 case MMUEXT_UNPIN_TABLE: 61.164 - if ( shadow_mode_refcounts(d) ) 61.165 + if ( paging_mode_refcounts(d) ) 61.166 break; 61.167 61.168 if ( unlikely(!(okay = get_page_from_pagenr(mfn, d))) ) 61.169 @@ -2070,7 +2074,7 @@ int do_mmuext_op( 61.170 } 61.171 if (likely(mfn != 0)) 61.172 { 61.173 - if ( shadow_mode_refcounts(d) ) 61.174 + if ( paging_mode_refcounts(d) ) 61.175 okay = get_page_from_pagenr(mfn, d); 61.176 else 61.177 okay = get_page_and_type_from_pagenr( 61.178 @@ -2087,7 +2091,7 @@ int do_mmuext_op( 61.179 v->arch.guest_table_user = pagetable_from_pfn(mfn); 61.180 if ( old_mfn != 0 ) 61.181 { 61.182 - if ( shadow_mode_refcounts(d) ) 61.183 + if ( paging_mode_refcounts(d) ) 61.184 put_page(mfn_to_page(old_mfn)); 61.185 else 61.186 put_page_and_type(mfn_to_page(old_mfn)); 61.187 @@ -2101,8 +2105,8 @@ int do_mmuext_op( 61.188 break; 61.189 61.190 case MMUEXT_INVLPG_LOCAL: 61.191 - if ( !shadow_mode_enabled(d) 61.192 - || shadow_invlpg(v, op.arg1.linear_addr) != 0 ) 61.193 + if ( !paging_mode_enabled(d) 61.194 + || paging_invlpg(v, op.arg1.linear_addr) != 0 ) 61.195 local_flush_tlb_one(op.arg1.linear_addr); 61.196 break; 61.197 61.198 @@ -2149,7 +2153,7 @@ int do_mmuext_op( 61.199 unsigned long ptr = op.arg1.linear_addr; 61.200 unsigned long ents = op.arg2.nr_ents; 61.201 61.202 - if ( shadow_mode_external(d) ) 61.203 + if ( paging_mode_external(d) ) 61.204 { 61.205 MEM_LOG("ignoring SET_LDT hypercall from external " 61.206 "domain %u", d->domain_id); 61.207 @@ -2298,9 +2302,9 @@ int do_mmu_update( 61.208 case PGT_l3_page_table: 61.209 case PGT_l4_page_table: 61.210 { 61.211 - if ( shadow_mode_refcounts(d) ) 61.212 + if ( paging_mode_refcounts(d) ) 61.213 { 61.214 - MEM_LOG("mmu update on shadow-refcounted domain!"); 61.215 + MEM_LOG("mmu update on auto-refcounted domain!"); 61.216 break; 61.217 } 61.218 61.219 @@ -2351,13 +2355,7 @@ int do_mmu_update( 61.220 if ( unlikely(!get_page_type(page, PGT_writable_page)) ) 61.221 break; 61.222 61.223 - if ( unlikely(shadow_mode_enabled(d)) ) 61.224 - okay = shadow_write_guest_entry(v, va, req.val, _mfn(mfn)); 61.225 - else 61.226 - { 61.227 - *(intpte_t *)va = req.val; 61.228 - okay = 1; 61.229 - } 61.230 + okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn)); 61.231 61.232 put_page_type(page); 61.233 } 61.234 @@ -2380,9 +2378,9 @@ int do_mmu_update( 61.235 break; 61.236 } 61.237 61.238 - if ( unlikely(shadow_mode_translate(FOREIGNDOM)) ) 61.239 + if ( unlikely(paging_mode_translate(FOREIGNDOM)) ) 61.240 { 61.241 - MEM_LOG("Mach-phys update on shadow-translate guest"); 61.242 + MEM_LOG("Mach-phys update on auto-translate guest"); 61.243 break; 61.244 } 61.245 61.246 @@ -2472,7 +2470,7 @@ static int create_grant_pte_mapping( 61.247 goto failed; 61.248 } 61.249 61.250 - if ( !shadow_mode_refcounts(d) ) 61.251 + if ( !paging_mode_refcounts(d) ) 61.252 put_page_from_l1e(ol1e, d); 61.253 61.254 put_page_type(page); 61.255 @@ -2578,7 +2576,7 @@ static int create_grant_va_mapping( 61.256 if ( !okay ) 61.257 return GNTST_general_error; 61.258 61.259 - if ( !shadow_mode_refcounts(d) ) 61.260 + if ( !paging_mode_refcounts(d) ) 61.261 put_page_from_l1e(ol1e, d); 61.262 61.263 return GNTST_okay; 61.264 @@ -2704,7 +2702,7 @@ int do_update_va_mapping(unsigned long v 61.265 61.266 perfc_incrc(calls_to_update_va); 61.267 61.268 - if ( unlikely(!__addr_ok(va) && !shadow_mode_external(d)) ) 61.269 + if ( unlikely(!__addr_ok(va) && !paging_mode_external(d)) ) 61.270 return -EINVAL; 61.271 61.272 LOCK_BIGLOCK(d); 61.273 @@ -2744,8 +2742,8 @@ int do_update_va_mapping(unsigned long v 61.274 switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) ) 61.275 { 61.276 case UVMF_LOCAL: 61.277 - if ( !shadow_mode_enabled(d) 61.278 - || (shadow_invlpg(current, va) != 0) ) 61.279 + if ( !paging_mode_enabled(d) 61.280 + || (paging_invlpg(current, va) != 0) ) 61.281 local_flush_tlb_one(va); 61.282 break; 61.283 case UVMF_ALL: 61.284 @@ -2980,7 +2978,7 @@ long arch_memory_op(int op, XEN_GUEST_HA 61.285 break; 61.286 } 61.287 61.288 - if ( !shadow_mode_translate(d) || (mfn == 0) ) 61.289 + if ( !paging_mode_translate(d) || (mfn == 0) ) 61.290 { 61.291 put_domain(d); 61.292 return -EINVAL; 61.293 @@ -3235,17 +3233,12 @@ static int ptwr_emulated_update( 61.294 if ( do_cmpxchg ) 61.295 { 61.296 int okay; 61.297 + intpte_t t = old; 61.298 ol1e = l1e_from_intpte(old); 61.299 61.300 - if ( shadow_mode_enabled(d) ) 61.301 - { 61.302 - intpte_t t = old; 61.303 - okay = shadow_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 61.304 - &t, val, _mfn(mfn)); 61.305 - okay = (okay && t == old); 61.306 - } 61.307 - else 61.308 - okay = (cmpxchg((intpte_t *)pl1e, old, val) == old); 61.309 + okay = paging_cmpxchg_guest_entry(v, (intpte_t *) pl1e, 61.310 + &t, val, _mfn(mfn)); 61.311 + okay = (okay && t == old); 61.312 61.313 if ( !okay ) 61.314 {
62.1 --- a/xen/arch/x86/mm/Makefile Thu Feb 15 13:13:36 2007 -0700 62.2 +++ b/xen/arch/x86/mm/Makefile Thu Feb 15 14:09:39 2007 -0700 62.3 @@ -1,1 +1,4 @@ 62.4 subdir-y += shadow 62.5 + 62.6 +obj-y += paging.o 62.7 +obj-y += p2m.o
63.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 63.2 +++ b/xen/arch/x86/mm/p2m.c Thu Feb 15 14:09:39 2007 -0700 63.3 @@ -0,0 +1,699 @@ 63.4 +/****************************************************************************** 63.5 + * arch/x86/mm/p2m.c 63.6 + * 63.7 + * physical-to-machine mappings for automatically-translated domains. 63.8 + * 63.9 + * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. 63.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 63.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 63.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 63.13 + * 63.14 + * This program is free software; you can redistribute it and/or modify 63.15 + * it under the terms of the GNU General Public License as published by 63.16 + * the Free Software Foundation; either version 2 of the License, or 63.17 + * (at your option) any later version. 63.18 + * 63.19 + * This program is distributed in the hope that it will be useful, 63.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 63.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 63.22 + * GNU General Public License for more details. 63.23 + * 63.24 + * You should have received a copy of the GNU General Public License 63.25 + * along with this program; if not, write to the Free Software 63.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 63.27 + */ 63.28 + 63.29 +#include <asm/domain.h> 63.30 +#include <asm/page.h> 63.31 +#include <asm/paging.h> 63.32 +#include <asm/p2m.h> 63.33 + 63.34 +/* Debugging and auditing of the P2M code? */ 63.35 +#define P2M_AUDIT 0 63.36 +#define P2M_DEBUGGING 1 63.37 + 63.38 +/* The P2M lock. This protects all updates to the p2m table. 63.39 + * Updates are expected to be safe against concurrent reads, 63.40 + * which do *not* require the lock */ 63.41 + 63.42 +#define p2m_lock_init(_d) \ 63.43 + do { \ 63.44 + spin_lock_init(&(_d)->arch.p2m.lock); \ 63.45 + (_d)->arch.p2m.locker = -1; \ 63.46 + (_d)->arch.p2m.locker_function = "nobody"; \ 63.47 + } while (0) 63.48 + 63.49 +#define p2m_lock(_d) \ 63.50 + do { \ 63.51 + if ( unlikely((_d)->arch.p2m.locker == current->processor) )\ 63.52 + { \ 63.53 + printk("Error: p2m lock held by %s\n", \ 63.54 + (_d)->arch.p2m.locker_function); \ 63.55 + BUG(); \ 63.56 + } \ 63.57 + spin_lock(&(_d)->arch.p2m.lock); \ 63.58 + ASSERT((_d)->arch.p2m.locker == -1); \ 63.59 + (_d)->arch.p2m.locker = current->processor; \ 63.60 + (_d)->arch.p2m.locker_function = __func__; \ 63.61 + } while (0) 63.62 + 63.63 +#define p2m_unlock(_d) \ 63.64 + do { \ 63.65 + ASSERT((_d)->arch.p2m.locker == current->processor); \ 63.66 + (_d)->arch.p2m.locker = -1; \ 63.67 + (_d)->arch.p2m.locker_function = "nobody"; \ 63.68 + spin_unlock(&(_d)->arch.p2m.lock); \ 63.69 + } while (0) 63.70 + 63.71 + 63.72 + 63.73 +/* Printouts */ 63.74 +#define P2M_PRINTK(_f, _a...) \ 63.75 + debugtrace_printk("p2m: %s(): " _f, __func__, ##_a) 63.76 +#define P2M_ERROR(_f, _a...) \ 63.77 + printk("pg error: %s(): " _f, __func__, ##_a) 63.78 +#if P2M_DEBUGGING 63.79 +#define P2M_DEBUG(_f, _a...) \ 63.80 + debugtrace_printk("p2mdebug: %s(): " _f, __func__, ##_a) 63.81 +#else 63.82 +#define P2M_DEBUG(_f, _a...) do { (void)(_f); } while(0) 63.83 +#endif 63.84 + 63.85 + 63.86 +/* Override macros from asm/page.h to make them work with mfn_t */ 63.87 +#undef mfn_to_page 63.88 +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 63.89 +#undef mfn_valid 63.90 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 63.91 +#undef page_to_mfn 63.92 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 63.93 + 63.94 + 63.95 + 63.96 +// Find the next level's P2M entry, checking for out-of-range gfn's... 63.97 +// Returns NULL on error. 63.98 +// 63.99 +static l1_pgentry_t * 63.100 +p2m_find_entry(void *table, unsigned long *gfn_remainder, 63.101 + unsigned long gfn, u32 shift, u32 max) 63.102 +{ 63.103 + u32 index; 63.104 + 63.105 + index = *gfn_remainder >> shift; 63.106 + if ( index >= max ) 63.107 + { 63.108 + P2M_DEBUG("gfn=0x%lx out of range " 63.109 + "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", 63.110 + gfn, *gfn_remainder, shift, index, max); 63.111 + return NULL; 63.112 + } 63.113 + *gfn_remainder &= (1 << shift) - 1; 63.114 + return (l1_pgentry_t *)table + index; 63.115 +} 63.116 + 63.117 +// Walk one level of the P2M table, allocating a new table if required. 63.118 +// Returns 0 on error. 63.119 +// 63.120 +static int 63.121 +p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 63.122 + unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 63.123 + u32 max, unsigned long type) 63.124 +{ 63.125 + l1_pgentry_t *p2m_entry; 63.126 + l1_pgentry_t new_entry; 63.127 + void *next; 63.128 + ASSERT(d->arch.p2m.alloc_page); 63.129 + 63.130 + if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, 63.131 + shift, max)) ) 63.132 + return 0; 63.133 + 63.134 + if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) 63.135 + { 63.136 + struct page_info *pg = d->arch.p2m.alloc_page(d); 63.137 + if ( pg == NULL ) 63.138 + return 0; 63.139 + list_add_tail(&pg->list, &d->arch.p2m.pages); 63.140 + pg->u.inuse.type_info = type | 1 | PGT_validated; 63.141 + pg->count_info = 1; 63.142 + 63.143 + new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)), 63.144 + __PAGE_HYPERVISOR|_PAGE_USER); 63.145 + 63.146 + switch ( type ) { 63.147 + case PGT_l3_page_table: 63.148 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 4); 63.149 + break; 63.150 + case PGT_l2_page_table: 63.151 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 3); 63.152 + break; 63.153 + case PGT_l1_page_table: 63.154 + paging_write_p2m_entry(d, gfn, p2m_entry, new_entry, 2); 63.155 + break; 63.156 + default: 63.157 + BUG(); 63.158 + break; 63.159 + } 63.160 + } 63.161 + *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); 63.162 + next = map_domain_page(mfn_x(*table_mfn)); 63.163 + unmap_domain_page(*table); 63.164 + *table = next; 63.165 + 63.166 + return 1; 63.167 +} 63.168 + 63.169 +// Returns 0 on error (out of memory) 63.170 +static int 63.171 +set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) 63.172 +{ 63.173 + // XXX -- this might be able to be faster iff current->domain == d 63.174 + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 63.175 + void *table =map_domain_page(mfn_x(table_mfn)); 63.176 + unsigned long gfn_remainder = gfn; 63.177 + l1_pgentry_t *p2m_entry; 63.178 + l1_pgentry_t entry_content; 63.179 + int rv=0; 63.180 + 63.181 +#if CONFIG_PAGING_LEVELS >= 4 63.182 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 63.183 + L4_PAGETABLE_SHIFT - PAGE_SHIFT, 63.184 + L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) 63.185 + goto out; 63.186 +#endif 63.187 +#if CONFIG_PAGING_LEVELS >= 3 63.188 + // When using PAE Xen, we only allow 33 bits of pseudo-physical 63.189 + // address in translated guests (i.e. 8 GBytes). This restriction 63.190 + // comes from wanting to map the P2M table into the 16MB RO_MPT hole 63.191 + // in Xen's address space for translated PV guests. 63.192 + // 63.193 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 63.194 + L3_PAGETABLE_SHIFT - PAGE_SHIFT, 63.195 + (CONFIG_PAGING_LEVELS == 3 63.196 + ? 8 63.197 + : L3_PAGETABLE_ENTRIES), 63.198 + PGT_l2_page_table) ) 63.199 + goto out; 63.200 +#endif 63.201 + if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 63.202 + L2_PAGETABLE_SHIFT - PAGE_SHIFT, 63.203 + L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) 63.204 + goto out; 63.205 + 63.206 + p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 63.207 + 0, L1_PAGETABLE_ENTRIES); 63.208 + ASSERT(p2m_entry); 63.209 + 63.210 + /* Track the highest gfn for which we have ever had a valid mapping */ 63.211 + if ( mfn_valid(mfn) && (gfn > d->arch.p2m.max_mapped_pfn) ) 63.212 + d->arch.p2m.max_mapped_pfn = gfn; 63.213 + 63.214 + if ( mfn_valid(mfn) ) 63.215 + entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 63.216 + else 63.217 + entry_content = l1e_empty(); 63.218 + 63.219 + /* level 1 entry */ 63.220 + paging_write_p2m_entry(d, gfn, p2m_entry, entry_content, 1); 63.221 + 63.222 + /* Success */ 63.223 + rv = 1; 63.224 + 63.225 + out: 63.226 + unmap_domain_page(table); 63.227 + return rv; 63.228 +} 63.229 + 63.230 + 63.231 +/* Init the datastructures for later use by the p2m code */ 63.232 +void p2m_init(struct domain *d) 63.233 +{ 63.234 + p2m_lock_init(d); 63.235 + INIT_LIST_HEAD(&d->arch.p2m.pages); 63.236 +} 63.237 + 63.238 + 63.239 +// Allocate a new p2m table for a domain. 63.240 +// 63.241 +// The structure of the p2m table is that of a pagetable for xen (i.e. it is 63.242 +// controlled by CONFIG_PAGING_LEVELS). 63.243 +// 63.244 +// The alloc_page and free_page functions will be used to get memory to 63.245 +// build the p2m, and to release it again at the end of day. 63.246 +// 63.247 +// Returns 0 for success or -errno. 63.248 +// 63.249 +int p2m_alloc_table(struct domain *d, 63.250 + struct page_info * (*alloc_page)(struct domain *d), 63.251 + void (*free_page)(struct domain *d, struct page_info *pg)) 63.252 + 63.253 +{ 63.254 + mfn_t mfn; 63.255 + struct list_head *entry; 63.256 + struct page_info *page, *p2m_top; 63.257 + unsigned int page_count = 0; 63.258 + unsigned long gfn; 63.259 + 63.260 + p2m_lock(d); 63.261 + 63.262 + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 63.263 + { 63.264 + P2M_ERROR("p2m already allocated for this domain\n"); 63.265 + p2m_unlock(d); 63.266 + return -EINVAL; 63.267 + } 63.268 + 63.269 + P2M_PRINTK("allocating p2m table\n"); 63.270 + 63.271 + d->arch.p2m.alloc_page = alloc_page; 63.272 + d->arch.p2m.free_page = free_page; 63.273 + 63.274 + p2m_top = d->arch.p2m.alloc_page(d); 63.275 + if ( p2m_top == NULL ) 63.276 + { 63.277 + p2m_unlock(d); 63.278 + return -ENOMEM; 63.279 + } 63.280 +list_add_tail(&p2m_top->list, &d->arch.p2m.pages); 63.281 + 63.282 + p2m_top->count_info = 1; 63.283 + p2m_top->u.inuse.type_info = 63.284 +#if CONFIG_PAGING_LEVELS == 4 63.285 + PGT_l4_page_table 63.286 +#elif CONFIG_PAGING_LEVELS == 3 63.287 + PGT_l3_page_table 63.288 +#elif CONFIG_PAGING_LEVELS == 2 63.289 + PGT_l2_page_table 63.290 +#endif 63.291 + | 1 | PGT_validated; 63.292 + 63.293 + d->arch.phys_table = pagetable_from_mfn(page_to_mfn(p2m_top)); 63.294 + 63.295 + P2M_PRINTK("populating p2m table\n"); 63.296 + 63.297 + /* Initialise physmap tables for slot zero. Other code assumes this. */ 63.298 + gfn = 0; 63.299 +mfn = _mfn(INVALID_MFN); 63.300 + if ( !set_p2m_entry(d, gfn, mfn) ) 63.301 + goto error; 63.302 + 63.303 + for ( entry = d->page_list.next; 63.304 + entry != &d->page_list; 63.305 + entry = entry->next ) 63.306 + { 63.307 + page = list_entry(entry, struct page_info, list); 63.308 + mfn = page_to_mfn(page); 63.309 + gfn = get_gpfn_from_mfn(mfn_x(mfn)); 63.310 + page_count++; 63.311 + if ( 63.312 +#ifdef __x86_64__ 63.313 + (gfn != 0x5555555555555555L) 63.314 +#else 63.315 + (gfn != 0x55555555L) 63.316 +#endif 63.317 + && gfn != INVALID_M2P_ENTRY 63.318 + && !set_p2m_entry(d, gfn, mfn) ) 63.319 + goto error; 63.320 + } 63.321 + 63.322 + P2M_PRINTK("p2m table initialised (%u pages)\n", page_count); 63.323 + p2m_unlock(d); 63.324 + return 0; 63.325 + 63.326 + error: 63.327 + P2M_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 63.328 + PRI_mfn "\n", gfn, mfn_x(mfn)); 63.329 + p2m_unlock(d); 63.330 + return -ENOMEM; 63.331 +} 63.332 + 63.333 +void p2m_teardown(struct domain *d) 63.334 +/* Return all the p2m pages to Xen. 63.335 + * We know we don't have any extra mappings to these pages */ 63.336 +{ 63.337 + struct list_head *entry, *n; 63.338 + struct page_info *pg; 63.339 + 63.340 + p2m_lock(d); 63.341 + d->arch.phys_table = pagetable_null(); 63.342 + 63.343 + list_for_each_safe(entry, n, &d->arch.p2m.pages) 63.344 + { 63.345 + pg = list_entry(entry, struct page_info, list); 63.346 + list_del(entry); 63.347 + d->arch.p2m.free_page(d, pg); 63.348 + } 63.349 + p2m_unlock(d); 63.350 +} 63.351 + 63.352 +mfn_t 63.353 +gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) 63.354 +/* Read another domain's p2m entries */ 63.355 +{ 63.356 + mfn_t mfn; 63.357 + paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; 63.358 + l2_pgentry_t *l2e; 63.359 + l1_pgentry_t *l1e; 63.360 + 63.361 + ASSERT(paging_mode_translate(d)); 63.362 + mfn = pagetable_get_mfn(d->arch.phys_table); 63.363 + 63.364 + 63.365 + if ( gpfn > d->arch.p2m.max_mapped_pfn ) 63.366 + /* This pfn is higher than the highest the p2m map currently holds */ 63.367 + return _mfn(INVALID_MFN); 63.368 + 63.369 +#if CONFIG_PAGING_LEVELS >= 4 63.370 + { 63.371 + l4_pgentry_t *l4e = map_domain_page(mfn_x(mfn)); 63.372 + l4e += l4_table_offset(addr); 63.373 + if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) 63.374 + { 63.375 + unmap_domain_page(l4e); 63.376 + return _mfn(INVALID_MFN); 63.377 + } 63.378 + mfn = _mfn(l4e_get_pfn(*l4e)); 63.379 + unmap_domain_page(l4e); 63.380 + } 63.381 +#endif 63.382 +#if CONFIG_PAGING_LEVELS >= 3 63.383 + { 63.384 + l3_pgentry_t *l3e = map_domain_page(mfn_x(mfn)); 63.385 +#if CONFIG_PAGING_LEVELS == 3 63.386 + /* On PAE hosts the p2m has eight l3 entries, not four (see 63.387 + * shadow_set_p2m_entry()) so we can't use l3_table_offset. 63.388 + * Instead, just count the number of l3es from zero. It's safe 63.389 + * to do this because we already checked that the gfn is within 63.390 + * the bounds of the p2m. */ 63.391 + l3e += (addr >> L3_PAGETABLE_SHIFT); 63.392 +#else 63.393 + l3e += l3_table_offset(addr); 63.394 +#endif 63.395 + if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) 63.396 + { 63.397 + unmap_domain_page(l3e); 63.398 + return _mfn(INVALID_MFN); 63.399 + } 63.400 + mfn = _mfn(l3e_get_pfn(*l3e)); 63.401 + unmap_domain_page(l3e); 63.402 + } 63.403 +#endif 63.404 + 63.405 + l2e = map_domain_page(mfn_x(mfn)); 63.406 + l2e += l2_table_offset(addr); 63.407 + if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) 63.408 + { 63.409 + unmap_domain_page(l2e); 63.410 + return _mfn(INVALID_MFN); 63.411 + } 63.412 + mfn = _mfn(l2e_get_pfn(*l2e)); 63.413 + unmap_domain_page(l2e); 63.414 + 63.415 + l1e = map_domain_page(mfn_x(mfn)); 63.416 + l1e += l1_table_offset(addr); 63.417 + if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) 63.418 + { 63.419 + unmap_domain_page(l1e); 63.420 + return _mfn(INVALID_MFN); 63.421 + } 63.422 + mfn = _mfn(l1e_get_pfn(*l1e)); 63.423 + unmap_domain_page(l1e); 63.424 + 63.425 + return mfn; 63.426 +} 63.427 + 63.428 +#if P2M_AUDIT 63.429 +static void audit_p2m(struct domain *d) 63.430 +{ 63.431 + struct list_head *entry; 63.432 + struct page_info *page; 63.433 + struct domain *od; 63.434 + unsigned long mfn, gfn, m2pfn, lp2mfn = 0; 63.435 + mfn_t p2mfn; 63.436 + unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; 63.437 + int test_linear; 63.438 + 63.439 + if ( !paging_mode_translate(d) ) 63.440 + return; 63.441 + 63.442 + //P2M_PRINTK("p2m audit starts\n"); 63.443 + 63.444 + test_linear = ( (d == current->domain) 63.445 + && !pagetable_is_null(current->arch.monitor_table) ); 63.446 + if ( test_linear ) 63.447 + local_flush_tlb(); 63.448 + 63.449 + /* Audit part one: walk the domain's page allocation list, checking 63.450 + * the m2p entries. */ 63.451 + for ( entry = d->page_list.next; 63.452 + entry != &d->page_list; 63.453 + entry = entry->next ) 63.454 + { 63.455 + page = list_entry(entry, struct page_info, list); 63.456 + mfn = mfn_x(page_to_mfn(page)); 63.457 + 63.458 + // P2M_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 63.459 + 63.460 + od = page_get_owner(page); 63.461 + 63.462 + if ( od != d ) 63.463 + { 63.464 + P2M_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", 63.465 + mfn, od, (od?od->domain_id:-1), d, d->domain_id); 63.466 + continue; 63.467 + } 63.468 + 63.469 + gfn = get_gpfn_from_mfn(mfn); 63.470 + if ( gfn == INVALID_M2P_ENTRY ) 63.471 + { 63.472 + orphans_i++; 63.473 + //P2M_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", 63.474 + // mfn); 63.475 + continue; 63.476 + } 63.477 + 63.478 + if ( gfn == 0x55555555 ) 63.479 + { 63.480 + orphans_d++; 63.481 + //P2M_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 63.482 + // mfn); 63.483 + continue; 63.484 + } 63.485 + 63.486 + p2mfn = gfn_to_mfn_foreign(d, gfn); 63.487 + if ( mfn_x(p2mfn) != mfn ) 63.488 + { 63.489 + mpbad++; 63.490 + P2M_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" 63.491 + " (-> gfn %#lx)\n", 63.492 + mfn, gfn, mfn_x(p2mfn), 63.493 + (mfn_valid(p2mfn) 63.494 + ? get_gpfn_from_mfn(mfn_x(p2mfn)) 63.495 + : -1u)); 63.496 + /* This m2p entry is stale: the domain has another frame in 63.497 + * this physical slot. No great disaster, but for neatness, 63.498 + * blow away the m2p entry. */ 63.499 + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 63.500 + } 63.501 + 63.502 + if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) ) 63.503 + { 63.504 + lp2mfn = mfn_x(gfn_to_mfn_current(gfn)); 63.505 + if ( lp2mfn != mfn_x(p2mfn) ) 63.506 + { 63.507 + P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " 63.508 + "(!= mfn %#lx)\n", gfn, lp2mfn, mfn_x(p2mfn)); 63.509 + } 63.510 + } 63.511 + 63.512 + // P2M_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 63.513 + // mfn, gfn, p2mfn, lp2mfn); 63.514 + } 63.515 + 63.516 + /* Audit part two: walk the domain's p2m table, checking the entries. */ 63.517 + if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 63.518 + { 63.519 + l2_pgentry_t *l2e; 63.520 + l1_pgentry_t *l1e; 63.521 + int i1, i2; 63.522 + 63.523 +#if CONFIG_PAGING_LEVELS == 4 63.524 + l4_pgentry_t *l4e; 63.525 + l3_pgentry_t *l3e; 63.526 + int i3, i4; 63.527 + l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 63.528 +#elif CONFIG_PAGING_LEVELS == 3 63.529 + l3_pgentry_t *l3e; 63.530 + int i3; 63.531 + l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 63.532 +#else /* CONFIG_PAGING_LEVELS == 2 */ 63.533 + l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table))); 63.534 +#endif 63.535 + 63.536 + gfn = 0; 63.537 +#if CONFIG_PAGING_LEVELS >= 3 63.538 +#if CONFIG_PAGING_LEVELS >= 4 63.539 + for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) 63.540 + { 63.541 + if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) 63.542 + { 63.543 + gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); 63.544 + continue; 63.545 + } 63.546 + l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4])))); 63.547 +#endif /* now at levels 3 or 4... */ 63.548 + for ( i3 = 0; 63.549 + i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 63.550 + i3++ ) 63.551 + { 63.552 + if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) 63.553 + { 63.554 + gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); 63.555 + continue; 63.556 + } 63.557 + l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3])))); 63.558 +#endif /* all levels... */ 63.559 + for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) 63.560 + { 63.561 + if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) 63.562 + { 63.563 + gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); 63.564 + continue; 63.565 + } 63.566 + l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2])))); 63.567 + 63.568 + for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) 63.569 + { 63.570 + if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) 63.571 + continue; 63.572 + mfn = l1e_get_pfn(l1e[i1]); 63.573 + ASSERT(mfn_valid(_mfn(mfn))); 63.574 + m2pfn = get_gpfn_from_mfn(mfn); 63.575 + if ( m2pfn != gfn ) 63.576 + { 63.577 + pmbad++; 63.578 + P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" 63.579 + " -> gfn %#lx\n", gfn, mfn, m2pfn); 63.580 + BUG(); 63.581 + } 63.582 + } 63.583 + unmap_domain_page(l1e); 63.584 + } 63.585 +#if CONFIG_PAGING_LEVELS >= 3 63.586 + unmap_domain_page(l2e); 63.587 + } 63.588 +#if CONFIG_PAGING_LEVELS >= 4 63.589 + unmap_domain_page(l3e); 63.590 + } 63.591 +#endif 63.592 +#endif 63.593 + 63.594 +#if CONFIG_PAGING_LEVELS == 4 63.595 + unmap_domain_page(l4e); 63.596 +#elif CONFIG_PAGING_LEVELS == 3 63.597 + unmap_domain_page(l3e); 63.598 +#else /* CONFIG_PAGING_LEVELS == 2 */ 63.599 + unmap_domain_page(l2e); 63.600 +#endif 63.601 + 63.602 + } 63.603 + 63.604 + //P2M_PRINTK("p2m audit complete\n"); 63.605 + //if ( orphans_i | orphans_d | mpbad | pmbad ) 63.606 + // P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", 63.607 + // orphans_i + orphans_d, orphans_i, orphans_d, 63.608 + if ( mpbad | pmbad ) 63.609 + P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", 63.610 + pmbad, mpbad); 63.611 +} 63.612 +#else 63.613 +#define audit_p2m(_d) do { (void)(_d); } while(0) 63.614 +#endif /* P2M_AUDIT */ 63.615 + 63.616 + 63.617 + 63.618 +static void 63.619 +p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) 63.620 +{ 63.621 + if ( !paging_mode_translate(d) ) 63.622 + return; 63.623 + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); 63.624 + 63.625 + ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); 63.626 + //ASSERT(mfn_to_gfn(d, mfn) == gfn); 63.627 + 63.628 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 63.629 + set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 63.630 +} 63.631 + 63.632 +void 63.633 +guest_physmap_remove_page(struct domain *d, unsigned long gfn, 63.634 + unsigned long mfn) 63.635 +{ 63.636 + p2m_lock(d); 63.637 + audit_p2m(d); 63.638 + p2m_remove_page(d, gfn, mfn); 63.639 + audit_p2m(d); 63.640 + p2m_unlock(d); 63.641 +} 63.642 + 63.643 +void 63.644 +guest_physmap_add_page(struct domain *d, unsigned long gfn, 63.645 + unsigned long mfn) 63.646 +{ 63.647 + unsigned long ogfn; 63.648 + mfn_t omfn; 63.649 + 63.650 + if ( !paging_mode_translate(d) ) 63.651 + return; 63.652 + 63.653 + p2m_lock(d); 63.654 + audit_p2m(d); 63.655 + 63.656 + P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); 63.657 + 63.658 + omfn = gfn_to_mfn(d, gfn); 63.659 + if ( mfn_valid(omfn) ) 63.660 + { 63.661 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 63.662 + set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 63.663 + } 63.664 + 63.665 + ogfn = mfn_to_gfn(d, _mfn(mfn)); 63.666 + if ( 63.667 +#ifdef __x86_64__ 63.668 + (ogfn != 0x5555555555555555L) 63.669 +#else 63.670 + (ogfn != 0x55555555L) 63.671 +#endif 63.672 + && (ogfn != INVALID_M2P_ENTRY) 63.673 + && (ogfn != gfn) ) 63.674 + { 63.675 + /* This machine frame is already mapped at another physical address */ 63.676 + P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", 63.677 + mfn, ogfn, gfn); 63.678 + if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) 63.679 + { 63.680 + P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", 63.681 + ogfn , mfn_x(omfn)); 63.682 + if ( mfn_x(omfn) == mfn ) 63.683 + p2m_remove_page(d, ogfn, mfn); 63.684 + } 63.685 + } 63.686 + 63.687 + set_p2m_entry(d, gfn, _mfn(mfn)); 63.688 + set_gpfn_from_mfn(mfn, gfn); 63.689 + 63.690 + audit_p2m(d); 63.691 + p2m_unlock(d); 63.692 +} 63.693 + 63.694 + 63.695 +/* 63.696 + * Local variables: 63.697 + * mode: C 63.698 + * c-set-style: "BSD" 63.699 + * c-basic-offset: 4 63.700 + * indent-tabs-mode: nil 63.701 + * End: 63.702 + */
64.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 64.2 +++ b/xen/arch/x86/mm/paging.c Thu Feb 15 14:09:39 2007 -0700 64.3 @@ -0,0 +1,143 @@ 64.4 +/****************************************************************************** 64.5 + * arch/x86/paging.c 64.6 + * 64.7 + * x86 specific paging support 64.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 64.9 + * Copyright (c) 2007 XenSource Inc. 64.10 + * 64.11 + * This program is free software; you can redistribute it and/or modify 64.12 + * it under the terms of the GNU General Public License as published by 64.13 + * the Free Software Foundation; either version 2 of the License, or 64.14 + * (at your option) any later version. 64.15 + * 64.16 + * This program is distributed in the hope that it will be useful, 64.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 64.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 64.19 + * GNU General Public License for more details. 64.20 + * 64.21 + * You should have received a copy of the GNU General Public License 64.22 + * along with this program; if not, write to the Free Software 64.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 64.24 + */ 64.25 + 64.26 +#include <xen/init.h> 64.27 +#include <asm/paging.h> 64.28 +#include <asm/shadow.h> 64.29 +#include <asm/p2m.h> 64.30 + 64.31 +/* Xen command-line option to enable hardware-assisted paging */ 64.32 +int opt_hap_enabled = 0; 64.33 +boolean_param("hap", opt_hap_enabled); 64.34 + 64.35 +/* Printouts */ 64.36 +#define PAGING_PRINTK(_f, _a...) \ 64.37 + debugtrace_printk("pg: %s(): " _f, __func__, ##_a) 64.38 +#define PAGING_ERROR(_f, _a...) \ 64.39 + printk("pg error: %s(): " _f, __func__, ##_a) 64.40 +#define PAGING_DEBUG(flag, _f, _a...) \ 64.41 + do { \ 64.42 + if (PAGING_DEBUG_ ## flag) \ 64.43 + debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \ 64.44 + } while (0) 64.45 + 64.46 + 64.47 +/* Domain paging struct initialization. */ 64.48 +void paging_domain_init(struct domain *d) 64.49 +{ 64.50 + p2m_init(d); 64.51 + shadow_domain_init(d); 64.52 +} 64.53 + 64.54 +/* vcpu paging struct initialization goes here */ 64.55 +void paging_vcpu_init(struct vcpu *v) 64.56 +{ 64.57 + shadow_vcpu_init(v); 64.58 +} 64.59 + 64.60 + 64.61 +int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 64.62 + XEN_GUEST_HANDLE(void) u_domctl) 64.63 +{ 64.64 + /* Here, dispatch domctl to the appropriate paging code */ 64.65 + return shadow_domctl(d, sc, u_domctl); 64.66 +} 64.67 + 64.68 +/* Call when destroying a domain */ 64.69 +void paging_teardown(struct domain *d) 64.70 +{ 64.71 + shadow_teardown(d); 64.72 + /* Call other modes' teardown code here */ 64.73 +} 64.74 + 64.75 +/* Call once all of the references to the domain have gone away */ 64.76 +void paging_final_teardown(struct domain *d) 64.77 +{ 64.78 + shadow_teardown(d); 64.79 + /* Call other modes' final teardown code here */ 64.80 +} 64.81 + 64.82 +/* Enable an arbitrary paging-assistance mode. Call once at domain 64.83 + * creation. */ 64.84 +int paging_enable(struct domain *d, u32 mode) 64.85 +{ 64.86 + if ( mode & PG_SH_enable ) 64.87 + return shadow_enable(d, mode); 64.88 + else 64.89 + /* No other modes supported yet */ 64.90 + return -EINVAL; 64.91 +} 64.92 + 64.93 +/* Print paging-assistance info to the console */ 64.94 +void paging_dump_domain_info(struct domain *d) 64.95 +{ 64.96 + if ( paging_mode_enabled(d) ) 64.97 + { 64.98 + printk(" paging assistance: "); 64.99 + if ( paging_mode_shadow(d) ) 64.100 + printk("shadow "); 64.101 + if ( paging_mode_hap(d) ) 64.102 + printk("hap "); 64.103 + if ( paging_mode_refcounts(d) ) 64.104 + printk("refcounts "); 64.105 + if ( paging_mode_log_dirty(d) ) 64.106 + printk("log_dirty "); 64.107 + if ( paging_mode_translate(d) ) 64.108 + printk("translate "); 64.109 + if ( paging_mode_external(d) ) 64.110 + printk("external "); 64.111 + printk("\n"); 64.112 + } 64.113 +} 64.114 + 64.115 +void paging_dump_vcpu_info(struct vcpu *v) 64.116 +{ 64.117 + if ( paging_mode_enabled(v->domain) ) 64.118 + { 64.119 + printk(" paging assistance: "); 64.120 + if ( paging_mode_shadow(v->domain) ) 64.121 + { 64.122 + if ( v->arch.paging.mode ) 64.123 + printk("shadowed %u-on-%u, %stranslated\n", 64.124 + v->arch.paging.mode->guest_levels, 64.125 + v->arch.paging.mode->shadow.shadow_levels, 64.126 + paging_vcpu_mode_translate(v) ? "" : "not "); 64.127 + else 64.128 + printk("not shadowed\n"); 64.129 + } 64.130 + else if ( paging_mode_hap(v->domain) && v->arch.paging.mode ) 64.131 + printk("hap, %u levels\n", 64.132 + v->arch.paging.mode->guest_levels); 64.133 + else 64.134 + printk("none\n"); 64.135 + } 64.136 +} 64.137 + 64.138 + 64.139 +/* 64.140 + * Local variables: 64.141 + * mode: C 64.142 + * c-set-style: "BSD" 64.143 + * c-basic-offset: 4 64.144 + * indent-tabs-mode: nil 64.145 + * End: 64.146 + */
65.1 --- a/xen/arch/x86/mm/shadow/common.c Thu Feb 15 13:13:36 2007 -0700 65.2 +++ b/xen/arch/x86/mm/shadow/common.c Thu Feb 15 14:09:39 2007 -0700 65.3 @@ -47,12 +47,27 @@ void shadow_domain_init(struct domain *d 65.4 int i; 65.5 shadow_lock_init(d); 65.6 for ( i = 0; i <= SHADOW_MAX_ORDER; i++ ) 65.7 - INIT_LIST_HEAD(&d->arch.shadow.freelists[i]); 65.8 - INIT_LIST_HEAD(&d->arch.shadow.p2m_freelist); 65.9 - INIT_LIST_HEAD(&d->arch.shadow.p2m_inuse); 65.10 - INIT_LIST_HEAD(&d->arch.shadow.pinned_shadows); 65.11 + INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]); 65.12 + INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist); 65.13 + INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows); 65.14 } 65.15 65.16 +/* Setup the shadow-specfic parts of a vcpu struct. Note: The most important 65.17 + * job is to initialize the update_paging_modes() function pointer, which is 65.18 + * used to initialized the rest of resources. Therefore, it really does not 65.19 + * matter to have v->arch.paging.mode pointing to any mode, as long as it can 65.20 + * be compiled. 65.21 + */ 65.22 +void shadow_vcpu_init(struct vcpu *v) 65.23 +{ 65.24 +#if CONFIG_PAGING_LEVELS == 4 65.25 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.26 +#elif CONFIG_PAGING_LEVELS == 3 65.27 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.28 +#elif CONFIG_PAGING_LEVELS == 2 65.29 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 65.30 +#endif 65.31 +} 65.32 65.33 #if SHADOW_AUDIT 65.34 int shadow_audit_enable = 0; 65.35 @@ -265,7 +280,7 @@ hvm_emulate_write(enum x86_segment seg, 65.36 if ( rc ) 65.37 return rc; 65.38 65.39 - return v->arch.shadow.mode->x86_emulate_write( 65.40 + return v->arch.paging.mode->shadow.x86_emulate_write( 65.41 v, addr, &val, bytes, sh_ctxt); 65.42 } 65.43 65.44 @@ -288,7 +303,7 @@ hvm_emulate_cmpxchg(enum x86_segment seg 65.45 if ( rc ) 65.46 return rc; 65.47 65.48 - return v->arch.shadow.mode->x86_emulate_cmpxchg( 65.49 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( 65.50 v, addr, old, new, bytes, sh_ctxt); 65.51 } 65.52 65.53 @@ -312,7 +327,7 @@ hvm_emulate_cmpxchg8b(enum x86_segment s 65.54 if ( rc ) 65.55 return rc; 65.56 65.57 - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( 65.58 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( 65.59 v, addr, old_lo, old_hi, new_lo, new_hi, sh_ctxt); 65.60 } 65.61 65.62 @@ -353,7 +368,7 @@ pv_emulate_write(enum x86_segment seg, 65.63 struct sh_emulate_ctxt *sh_ctxt = 65.64 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 65.65 struct vcpu *v = current; 65.66 - return v->arch.shadow.mode->x86_emulate_write( 65.67 + return v->arch.paging.mode->shadow.x86_emulate_write( 65.68 v, offset, &val, bytes, sh_ctxt); 65.69 } 65.70 65.71 @@ -368,7 +383,7 @@ pv_emulate_cmpxchg(enum x86_segment seg, 65.72 struct sh_emulate_ctxt *sh_ctxt = 65.73 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 65.74 struct vcpu *v = current; 65.75 - return v->arch.shadow.mode->x86_emulate_cmpxchg( 65.76 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg( 65.77 v, offset, old, new, bytes, sh_ctxt); 65.78 } 65.79 65.80 @@ -384,7 +399,7 @@ pv_emulate_cmpxchg8b(enum x86_segment se 65.81 struct sh_emulate_ctxt *sh_ctxt = 65.82 container_of(ctxt, struct sh_emulate_ctxt, ctxt); 65.83 struct vcpu *v = current; 65.84 - return v->arch.shadow.mode->x86_emulate_cmpxchg8b( 65.85 + return v->arch.paging.mode->shadow.x86_emulate_cmpxchg8b( 65.86 v, offset, old_lo, old_hi, new_lo, new_hi, sh_ctxt); 65.87 } 65.88 65.89 @@ -721,7 +736,7 @@ static inline int chunk_is_available(str 65.90 int i; 65.91 65.92 for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) 65.93 - if ( !list_empty(&d->arch.shadow.freelists[i]) ) 65.94 + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) 65.95 return 1; 65.96 return 0; 65.97 } 65.98 @@ -783,7 +798,7 @@ void shadow_prealloc(struct domain *d, u 65.99 65.100 /* Stage one: walk the list of pinned pages, unpinning them */ 65.101 perfc_incrc(shadow_prealloc_1); 65.102 - list_for_each_backwards_safe(l, t, &d->arch.shadow.pinned_shadows) 65.103 + list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows) 65.104 { 65.105 sp = list_entry(l, struct shadow_page_info, list); 65.106 smfn = shadow_page_to_mfn(sp); 65.107 @@ -823,9 +838,9 @@ void shadow_prealloc(struct domain *d, u 65.108 SHADOW_PRINTK("Can't pre-allocate %i shadow pages!\n" 65.109 " shadow pages total = %u, free = %u, p2m=%u\n", 65.110 1 << order, 65.111 - d->arch.shadow.total_pages, 65.112 - d->arch.shadow.free_pages, 65.113 - d->arch.shadow.p2m_pages); 65.114 + d->arch.paging.shadow.total_pages, 65.115 + d->arch.paging.shadow.free_pages, 65.116 + d->arch.paging.shadow.p2m_pages); 65.117 BUG(); 65.118 } 65.119 65.120 @@ -840,7 +855,7 @@ static void shadow_blow_tables(struct do 65.121 int i; 65.122 65.123 /* Pass one: unpin all pinned pages */ 65.124 - list_for_each_backwards_safe(l,t, &d->arch.shadow.pinned_shadows) 65.125 + list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows) 65.126 { 65.127 sp = list_entry(l, struct shadow_page_info, list); 65.128 smfn = shadow_page_to_mfn(sp); 65.129 @@ -905,9 +920,9 @@ mfn_t shadow_alloc(struct domain *d, 65.130 65.131 /* Find smallest order which can satisfy the request. */ 65.132 for ( i = order; i <= SHADOW_MAX_ORDER; i++ ) 65.133 - if ( !list_empty(&d->arch.shadow.freelists[i]) ) 65.134 + if ( !list_empty(&d->arch.paging.shadow.freelists[i]) ) 65.135 { 65.136 - sp = list_entry(d->arch.shadow.freelists[i].next, 65.137 + sp = list_entry(d->arch.paging.shadow.freelists[i].next, 65.138 struct shadow_page_info, list); 65.139 list_del(&sp->list); 65.140 65.141 @@ -916,10 +931,10 @@ mfn_t shadow_alloc(struct domain *d, 65.142 { 65.143 i--; 65.144 sp->order = i; 65.145 - list_add_tail(&sp->list, &d->arch.shadow.freelists[i]); 65.146 + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]); 65.147 sp += 1 << i; 65.148 } 65.149 - d->arch.shadow.free_pages -= 1 << order; 65.150 + d->arch.paging.shadow.free_pages -= 1 << order; 65.151 65.152 /* Init page info fields and clear the pages */ 65.153 for ( i = 0; i < 1<<order ; i++ ) 65.154 @@ -976,7 +991,7 @@ void shadow_free(struct domain *d, mfn_t 65.155 ASSERT(shadow_type != SH_type_p2m_table); 65.156 order = shadow_order(shadow_type); 65.157 65.158 - d->arch.shadow.free_pages += 1 << order; 65.159 + d->arch.paging.shadow.free_pages += 1 << order; 65.160 65.161 for ( i = 0; i < 1<<order; i++ ) 65.162 { 65.163 @@ -985,8 +1000,8 @@ void shadow_free(struct domain *d, mfn_t 65.164 for_each_vcpu(d, v) 65.165 { 65.166 /* No longer safe to look for a writeable mapping in this shadow */ 65.167 - if ( v->arch.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 65.168 - v->arch.shadow.last_writeable_pte_smfn = 0; 65.169 + if ( v->arch.paging.shadow.last_writeable_pte_smfn == mfn_x(smfn) + i ) 65.170 + v->arch.paging.shadow.last_writeable_pte_smfn = 0; 65.171 } 65.172 #endif 65.173 /* Strip out the type: this is now a free shadow page */ 65.174 @@ -1019,7 +1034,7 @@ void shadow_free(struct domain *d, mfn_t 65.175 } 65.176 65.177 sp->order = order; 65.178 - list_add_tail(&sp->list, &d->arch.shadow.freelists[order]); 65.179 + list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]); 65.180 } 65.181 65.182 /* Divert some memory from the pool to be used by the p2m mapping. 65.183 @@ -1033,19 +1048,19 @@ void shadow_free(struct domain *d, mfn_t 65.184 * returns non-zero on success. 65.185 */ 65.186 static int 65.187 -shadow_alloc_p2m_pages(struct domain *d) 65.188 +sh_alloc_p2m_pages(struct domain *d) 65.189 { 65.190 struct page_info *pg; 65.191 u32 i; 65.192 ASSERT(shadow_locked_by_me(d)); 65.193 65.194 - if ( d->arch.shadow.total_pages 65.195 + if ( d->arch.paging.shadow.total_pages 65.196 < (shadow_min_acceptable_pages(d) + (1<<SHADOW_MAX_ORDER)) ) 65.197 return 0; /* Not enough shadow memory: need to increase it first */ 65.198 65.199 pg = mfn_to_page(shadow_alloc(d, SH_type_p2m_table, 0)); 65.200 - d->arch.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); 65.201 - d->arch.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); 65.202 + d->arch.paging.shadow.p2m_pages += (1<<SHADOW_MAX_ORDER); 65.203 + d->arch.paging.shadow.total_pages -= (1<<SHADOW_MAX_ORDER); 65.204 for (i = 0; i < (1<<SHADOW_MAX_ORDER); i++) 65.205 { 65.206 /* Unlike shadow pages, mark p2m pages as owned by the domain. 65.207 @@ -1055,34 +1070,59 @@ shadow_alloc_p2m_pages(struct domain *d) 65.208 * believed to be a concern. 65.209 */ 65.210 page_set_owner(&pg[i], d); 65.211 - list_add_tail(&pg[i].list, &d->arch.shadow.p2m_freelist); 65.212 + pg->count_info = 1; 65.213 + list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist); 65.214 } 65.215 return 1; 65.216 } 65.217 65.218 // Returns 0 if no memory is available... 65.219 -mfn_t 65.220 +struct page_info * 65.221 shadow_alloc_p2m_page(struct domain *d) 65.222 { 65.223 struct list_head *entry; 65.224 struct page_info *pg; 65.225 mfn_t mfn; 65.226 void *p; 65.227 - 65.228 - if ( list_empty(&d->arch.shadow.p2m_freelist) && 65.229 - !shadow_alloc_p2m_pages(d) ) 65.230 - return _mfn(0); 65.231 - entry = d->arch.shadow.p2m_freelist.next; 65.232 + 65.233 + shadow_lock(d); 65.234 + 65.235 + if ( list_empty(&d->arch.paging.shadow.p2m_freelist) && 65.236 + !sh_alloc_p2m_pages(d) ) 65.237 + { 65.238 + shadow_unlock(d); 65.239 + return NULL; 65.240 + } 65.241 + entry = d->arch.paging.shadow.p2m_freelist.next; 65.242 list_del(entry); 65.243 - list_add_tail(entry, &d->arch.shadow.p2m_inuse); 65.244 + 65.245 + shadow_unlock(d); 65.246 + 65.247 pg = list_entry(entry, struct page_info, list); 65.248 - pg->count_info = 1; 65.249 mfn = page_to_mfn(pg); 65.250 p = sh_map_domain_page(mfn); 65.251 clear_page(p); 65.252 sh_unmap_domain_page(p); 65.253 65.254 - return mfn; 65.255 + return pg; 65.256 +} 65.257 + 65.258 +void 65.259 +shadow_free_p2m_page(struct domain *d, struct page_info *pg) 65.260 +{ 65.261 + ASSERT(page_get_owner(pg) == d); 65.262 + /* Should have just the one ref we gave it in alloc_p2m_page() */ 65.263 + if ( (pg->count_info & PGC_count_mask) != 1 ) 65.264 + { 65.265 + SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", 65.266 + pg->count_info, pg->u.inuse.type_info); 65.267 + } 65.268 + /* Free should not decrement domain's total allocation, since 65.269 + * these pages were allocated without an owner. */ 65.270 + page_set_owner(pg, NULL); 65.271 + free_domheap_pages(pg, 0); 65.272 + d->arch.paging.shadow.p2m_pages--; 65.273 + perfc_decr(shadow_alloc_count); 65.274 } 65.275 65.276 #if CONFIG_PAGING_LEVELS == 3 65.277 @@ -1130,344 +1170,6 @@ static void p2m_install_entry_in_monitor 65.278 } 65.279 #endif 65.280 65.281 -// Find the next level's P2M entry, checking for out-of-range gfn's... 65.282 -// Returns NULL on error. 65.283 -// 65.284 -static l1_pgentry_t * 65.285 -p2m_find_entry(void *table, unsigned long *gfn_remainder, 65.286 - unsigned long gfn, u32 shift, u32 max) 65.287 -{ 65.288 - u32 index; 65.289 - 65.290 - index = *gfn_remainder >> shift; 65.291 - if ( index >= max ) 65.292 - { 65.293 - SHADOW_DEBUG(P2M, "gfn=0x%lx out of range " 65.294 - "(gfn_remainder=0x%lx shift=%d index=0x%x max=0x%x)\n", 65.295 - gfn, *gfn_remainder, shift, index, max); 65.296 - return NULL; 65.297 - } 65.298 - *gfn_remainder &= (1 << shift) - 1; 65.299 - return (l1_pgentry_t *)table + index; 65.300 -} 65.301 - 65.302 -// Walk one level of the P2M table, allocating a new table if required. 65.303 -// Returns 0 on error. 65.304 -// 65.305 -static int 65.306 -p2m_next_level(struct domain *d, mfn_t *table_mfn, void **table, 65.307 - unsigned long *gfn_remainder, unsigned long gfn, u32 shift, 65.308 - u32 max, unsigned long type) 65.309 -{ 65.310 - l1_pgentry_t *p2m_entry; 65.311 - void *next; 65.312 - 65.313 - if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, 65.314 - shift, max)) ) 65.315 - return 0; 65.316 - 65.317 - if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) ) 65.318 - { 65.319 - mfn_t mfn = shadow_alloc_p2m_page(d); 65.320 - if ( mfn_x(mfn) == 0 ) 65.321 - return 0; 65.322 - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 65.323 - mfn_to_page(mfn)->u.inuse.type_info = type | 1 | PGT_validated; 65.324 - mfn_to_page(mfn)->count_info = 1; 65.325 -#if CONFIG_PAGING_LEVELS == 3 65.326 - if (type == PGT_l2_page_table) 65.327 - { 65.328 - struct vcpu *v; 65.329 - /* We have written to the p2m l3: need to sync the per-vcpu 65.330 - * copies of it in the monitor tables */ 65.331 - p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p2m_entry); 65.332 - /* Also, any vcpus running on shadows of the p2m need to 65.333 - * reload their CR3s so the change propagates to the shadow */ 65.334 - ASSERT(shadow_locked_by_me(d)); 65.335 - for_each_vcpu(d, v) 65.336 - { 65.337 - if ( pagetable_get_pfn(v->arch.guest_table) 65.338 - == pagetable_get_pfn(d->arch.phys_table) 65.339 - && v->arch.shadow.mode != NULL ) 65.340 - v->arch.shadow.mode->update_cr3(v, 0); 65.341 - } 65.342 - } 65.343 -#endif 65.344 - /* The P2M can be shadowed: keep the shadows synced */ 65.345 - if ( d->vcpu[0] != NULL ) 65.346 - (void)sh_validate_guest_entry(d->vcpu[0], *table_mfn, 65.347 - p2m_entry, sizeof *p2m_entry); 65.348 - } 65.349 - *table_mfn = _mfn(l1e_get_pfn(*p2m_entry)); 65.350 - next = sh_map_domain_page(*table_mfn); 65.351 - sh_unmap_domain_page(*table); 65.352 - *table = next; 65.353 - 65.354 - return 1; 65.355 -} 65.356 - 65.357 -// Returns 0 on error (out of memory) 65.358 -int 65.359 -shadow_set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn) 65.360 -{ 65.361 - // XXX -- this might be able to be faster iff current->domain == d 65.362 - mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 65.363 - void *table = sh_map_domain_page(table_mfn); 65.364 - unsigned long gfn_remainder = gfn; 65.365 - l1_pgentry_t *p2m_entry; 65.366 - int rv=0; 65.367 - 65.368 -#if CONFIG_PAGING_LEVELS >= 4 65.369 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 65.370 - L4_PAGETABLE_SHIFT - PAGE_SHIFT, 65.371 - L4_PAGETABLE_ENTRIES, PGT_l3_page_table) ) 65.372 - goto out; 65.373 -#endif 65.374 -#if CONFIG_PAGING_LEVELS >= 3 65.375 - // When using PAE Xen, we only allow 33 bits of pseudo-physical 65.376 - // address in translated guests (i.e. 8 GBytes). This restriction 65.377 - // comes from wanting to map the P2M table into the 16MB RO_MPT hole 65.378 - // in Xen's address space for translated PV guests. 65.379 - // 65.380 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 65.381 - L3_PAGETABLE_SHIFT - PAGE_SHIFT, 65.382 - (CONFIG_PAGING_LEVELS == 3 65.383 - ? 8 65.384 - : L3_PAGETABLE_ENTRIES), 65.385 - PGT_l2_page_table) ) 65.386 - goto out; 65.387 -#endif 65.388 - if ( !p2m_next_level(d, &table_mfn, &table, &gfn_remainder, gfn, 65.389 - L2_PAGETABLE_SHIFT - PAGE_SHIFT, 65.390 - L2_PAGETABLE_ENTRIES, PGT_l1_page_table) ) 65.391 - goto out; 65.392 - 65.393 - p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 65.394 - 0, L1_PAGETABLE_ENTRIES); 65.395 - ASSERT(p2m_entry); 65.396 - if ( mfn_valid(mfn) ) 65.397 - *p2m_entry = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 65.398 - else 65.399 - *p2m_entry = l1e_empty(); 65.400 - 65.401 - /* Track the highest gfn for which we have ever had a valid mapping */ 65.402 - if ( mfn_valid(mfn) && (gfn > d->arch.max_mapped_pfn) ) 65.403 - d->arch.max_mapped_pfn = gfn; 65.404 - 65.405 - /* The P2M can be shadowed: keep the shadows synced */ 65.406 - if ( d->vcpu[0] != NULL ) 65.407 - (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, 65.408 - p2m_entry, sizeof(*p2m_entry)); 65.409 - 65.410 - /* Success */ 65.411 - rv = 1; 65.412 - 65.413 - out: 65.414 - sh_unmap_domain_page(table); 65.415 - return rv; 65.416 -} 65.417 - 65.418 -// Allocate a new p2m table for a domain. 65.419 -// 65.420 -// The structure of the p2m table is that of a pagetable for xen (i.e. it is 65.421 -// controlled by CONFIG_PAGING_LEVELS). 65.422 -// 65.423 -// Returns 0 if p2m table could not be initialized 65.424 -// 65.425 -static int 65.426 -shadow_alloc_p2m_table(struct domain *d) 65.427 -{ 65.428 - mfn_t p2m_top, mfn; 65.429 - struct list_head *entry; 65.430 - struct page_info *page; 65.431 - unsigned int page_count = 0; 65.432 - unsigned long gfn; 65.433 - 65.434 - SHADOW_PRINTK("allocating p2m table\n"); 65.435 - ASSERT(pagetable_get_pfn(d->arch.phys_table) == 0); 65.436 - 65.437 - p2m_top = shadow_alloc_p2m_page(d); 65.438 - mfn_to_page(p2m_top)->count_info = 1; 65.439 - mfn_to_page(p2m_top)->u.inuse.type_info = 65.440 -#if CONFIG_PAGING_LEVELS == 4 65.441 - PGT_l4_page_table 65.442 -#elif CONFIG_PAGING_LEVELS == 3 65.443 - PGT_l3_page_table 65.444 -#elif CONFIG_PAGING_LEVELS == 2 65.445 - PGT_l2_page_table 65.446 -#endif 65.447 - | 1 | PGT_validated; 65.448 - 65.449 - if ( mfn_x(p2m_top) == 0 ) 65.450 - return 0; 65.451 - 65.452 - d->arch.phys_table = pagetable_from_mfn(p2m_top); 65.453 - 65.454 - SHADOW_PRINTK("populating p2m table\n"); 65.455 - 65.456 - /* Initialise physmap tables for slot zero. Other code assumes this. */ 65.457 - gfn = 0; 65.458 - mfn = _mfn(INVALID_MFN); 65.459 - if ( !shadow_set_p2m_entry(d, gfn, mfn) ) 65.460 - goto error; 65.461 - 65.462 - /* Build a p2m map that matches the m2p entries for this domain's 65.463 - * allocated pages. Skip any pages that have an explicitly invalid 65.464 - * or obviously bogus m2p entry. */ 65.465 - for ( entry = d->page_list.next; 65.466 - entry != &d->page_list; 65.467 - entry = entry->next ) 65.468 - { 65.469 - page = list_entry(entry, struct page_info, list); 65.470 - mfn = page_to_mfn(page); 65.471 - gfn = get_gpfn_from_mfn(mfn_x(mfn)); 65.472 - page_count++; 65.473 - if ( 65.474 -#ifdef __x86_64__ 65.475 - (gfn != 0x5555555555555555L) 65.476 -#else 65.477 - (gfn != 0x55555555L) 65.478 -#endif 65.479 - && gfn != INVALID_M2P_ENTRY 65.480 - && (gfn < 65.481 - (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof (l1_pgentry_t)) 65.482 - && !shadow_set_p2m_entry(d, gfn, mfn) ) 65.483 - goto error; 65.484 - } 65.485 - 65.486 - SHADOW_PRINTK("p2m table initialised (%u pages)\n", page_count); 65.487 - return 1; 65.488 - 65.489 - error: 65.490 - SHADOW_PRINTK("failed to initialize p2m table, gfn=%05lx, mfn=%" 65.491 - SH_PRI_mfn "\n", gfn, mfn_x(mfn)); 65.492 - return 0; 65.493 -} 65.494 - 65.495 -mfn_t 65.496 -sh_gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) 65.497 -/* Read another domain's p2m entries */ 65.498 -{ 65.499 - mfn_t mfn; 65.500 - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; 65.501 - l2_pgentry_t *l2e; 65.502 - l1_pgentry_t *l1e; 65.503 - 65.504 - ASSERT(shadow_mode_translate(d)); 65.505 - mfn = pagetable_get_mfn(d->arch.phys_table); 65.506 - 65.507 - 65.508 - if ( gpfn > d->arch.max_mapped_pfn ) 65.509 - /* This pfn is higher than the highest the p2m map currently holds */ 65.510 - return _mfn(INVALID_MFN); 65.511 - 65.512 -#if CONFIG_PAGING_LEVELS >= 4 65.513 - { 65.514 - l4_pgentry_t *l4e = sh_map_domain_page(mfn); 65.515 - l4e += l4_table_offset(addr); 65.516 - if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) 65.517 - { 65.518 - sh_unmap_domain_page(l4e); 65.519 - return _mfn(INVALID_MFN); 65.520 - } 65.521 - mfn = _mfn(l4e_get_pfn(*l4e)); 65.522 - sh_unmap_domain_page(l4e); 65.523 - } 65.524 -#endif 65.525 -#if CONFIG_PAGING_LEVELS >= 3 65.526 - { 65.527 - l3_pgentry_t *l3e = sh_map_domain_page(mfn); 65.528 -#if CONFIG_PAGING_LEVELS == 3 65.529 - /* On PAE hosts the p2m has eight l3 entries, not four (see 65.530 - * shadow_set_p2m_entry()) so we can't use l3_table_offset. 65.531 - * Instead, just count the number of l3es from zero. It's safe 65.532 - * to do this because we already checked that the gfn is within 65.533 - * the bounds of the p2m. */ 65.534 - l3e += (addr >> L3_PAGETABLE_SHIFT); 65.535 -#else 65.536 - l3e += l3_table_offset(addr); 65.537 -#endif 65.538 - if ( (l3e_get_flags(*l3e) & _PAGE_PRESENT) == 0 ) 65.539 - { 65.540 - sh_unmap_domain_page(l3e); 65.541 - return _mfn(INVALID_MFN); 65.542 - } 65.543 - mfn = _mfn(l3e_get_pfn(*l3e)); 65.544 - sh_unmap_domain_page(l3e); 65.545 - } 65.546 -#endif 65.547 - 65.548 - l2e = sh_map_domain_page(mfn); 65.549 - l2e += l2_table_offset(addr); 65.550 - if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 ) 65.551 - { 65.552 - sh_unmap_domain_page(l2e); 65.553 - return _mfn(INVALID_MFN); 65.554 - } 65.555 - mfn = _mfn(l2e_get_pfn(*l2e)); 65.556 - sh_unmap_domain_page(l2e); 65.557 - 65.558 - l1e = sh_map_domain_page(mfn); 65.559 - l1e += l1_table_offset(addr); 65.560 - if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 ) 65.561 - { 65.562 - sh_unmap_domain_page(l1e); 65.563 - return _mfn(INVALID_MFN); 65.564 - } 65.565 - mfn = _mfn(l1e_get_pfn(*l1e)); 65.566 - sh_unmap_domain_page(l1e); 65.567 - 65.568 - return mfn; 65.569 -} 65.570 - 65.571 -unsigned long 65.572 -shadow_gfn_to_mfn_foreign(unsigned long gpfn) 65.573 -{ 65.574 - return mfn_x(sh_gfn_to_mfn_foreign(current->domain, gpfn)); 65.575 -} 65.576 - 65.577 - 65.578 -static void shadow_p2m_teardown(struct domain *d) 65.579 -/* Return all the p2m pages to Xen. 65.580 - * We know we don't have any extra mappings to these pages */ 65.581 -{ 65.582 - struct list_head *entry, *n; 65.583 - struct page_info *pg; 65.584 - 65.585 - d->arch.phys_table = pagetable_null(); 65.586 - 65.587 - list_for_each_safe(entry, n, &d->arch.shadow.p2m_inuse) 65.588 - { 65.589 - pg = list_entry(entry, struct page_info, list); 65.590 - list_del(entry); 65.591 - /* Should have just the one ref we gave it in alloc_p2m_page() */ 65.592 - if ( (pg->count_info & PGC_count_mask) != 1 ) 65.593 - { 65.594 - SHADOW_PRINTK("Odd p2m page count c=%#x t=%"PRtype_info"\n", 65.595 - pg->count_info, pg->u.inuse.type_info); 65.596 - } 65.597 - ASSERT(page_get_owner(pg) == d); 65.598 - /* Free should not decrement domain's total allocation, since 65.599 - * these pages were allocated without an owner. */ 65.600 - page_set_owner(pg, NULL); 65.601 - free_domheap_pages(pg, 0); 65.602 - d->arch.shadow.p2m_pages--; 65.603 - perfc_decr(shadow_alloc_count); 65.604 - } 65.605 - list_for_each_safe(entry, n, &d->arch.shadow.p2m_freelist) 65.606 - { 65.607 - list_del(entry); 65.608 - pg = list_entry(entry, struct page_info, list); 65.609 - ASSERT(page_get_owner(pg) == d); 65.610 - /* Free should not decrement domain's total allocation. */ 65.611 - page_set_owner(pg, NULL); 65.612 - free_domheap_pages(pg, 0); 65.613 - d->arch.shadow.p2m_pages--; 65.614 - perfc_decr(shadow_alloc_count); 65.615 - } 65.616 - ASSERT(d->arch.shadow.p2m_pages == 0); 65.617 -} 65.618 - 65.619 /* Set the pool of shadow pages to the required number of pages. 65.620 * Input will be rounded up to at least shadow_min_acceptable_pages(), 65.621 * plus space for the p2m table. 65.622 @@ -1491,11 +1193,11 @@ static unsigned int sh_set_allocation(st 65.623 pages = (pages + ((1<<SHADOW_MAX_ORDER)-1)) & ~((1<<SHADOW_MAX_ORDER)-1); 65.624 65.625 SHADOW_PRINTK("current %i target %i\n", 65.626 - d->arch.shadow.total_pages, pages); 65.627 - 65.628 - while ( d->arch.shadow.total_pages != pages ) 65.629 + d->arch.paging.shadow.total_pages, pages); 65.630 + 65.631 + while ( d->arch.paging.shadow.total_pages != pages ) 65.632 { 65.633 - if ( d->arch.shadow.total_pages < pages ) 65.634 + if ( d->arch.paging.shadow.total_pages < pages ) 65.635 { 65.636 /* Need to allocate more memory from domheap */ 65.637 sp = (struct shadow_page_info *) 65.638 @@ -1505,8 +1207,8 @@ static unsigned int sh_set_allocation(st 65.639 SHADOW_PRINTK("failed to allocate shadow pages.\n"); 65.640 return -ENOMEM; 65.641 } 65.642 - d->arch.shadow.free_pages += 1<<SHADOW_MAX_ORDER; 65.643 - d->arch.shadow.total_pages += 1<<SHADOW_MAX_ORDER; 65.644 + d->arch.paging.shadow.free_pages += 1<<SHADOW_MAX_ORDER; 65.645 + d->arch.paging.shadow.total_pages += 1<<SHADOW_MAX_ORDER; 65.646 for ( j = 0; j < 1<<SHADOW_MAX_ORDER; j++ ) 65.647 { 65.648 sp[j].type = 0; 65.649 @@ -1518,18 +1220,18 @@ static unsigned int sh_set_allocation(st 65.650 } 65.651 sp->order = SHADOW_MAX_ORDER; 65.652 list_add_tail(&sp->list, 65.653 - &d->arch.shadow.freelists[SHADOW_MAX_ORDER]); 65.654 + &d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER]); 65.655 } 65.656 - else if ( d->arch.shadow.total_pages > pages ) 65.657 + else if ( d->arch.paging.shadow.total_pages > pages ) 65.658 { 65.659 /* Need to return memory to domheap */ 65.660 shadow_prealloc(d, SHADOW_MAX_ORDER); 65.661 - ASSERT(!list_empty(&d->arch.shadow.freelists[SHADOW_MAX_ORDER])); 65.662 - sp = list_entry(d->arch.shadow.freelists[SHADOW_MAX_ORDER].next, 65.663 + ASSERT(!list_empty(&d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER])); 65.664 + sp = list_entry(d->arch.paging.shadow.freelists[SHADOW_MAX_ORDER].next, 65.665 struct shadow_page_info, list); 65.666 list_del(&sp->list); 65.667 - d->arch.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; 65.668 - d->arch.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; 65.669 + d->arch.paging.shadow.free_pages -= 1<<SHADOW_MAX_ORDER; 65.670 + d->arch.paging.shadow.total_pages -= 1<<SHADOW_MAX_ORDER; 65.671 free_domheap_pages((struct page_info *)sp, SHADOW_MAX_ORDER); 65.672 } 65.673 65.674 @@ -1547,7 +1249,7 @@ static unsigned int sh_set_allocation(st 65.675 /* Return the size of the shadow pool, rounded up to the nearest MB */ 65.676 static unsigned int shadow_get_allocation(struct domain *d) 65.677 { 65.678 - unsigned int pg = d->arch.shadow.total_pages; 65.679 + unsigned int pg = d->arch.paging.shadow.total_pages; 65.680 return ((pg >> (20 - PAGE_SHIFT)) 65.681 + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 65.682 } 65.683 @@ -1583,7 +1285,7 @@ static void sh_hash_audit_bucket(struct 65.684 if ( !(SHADOW_AUDIT_ENABLE) ) 65.685 return; 65.686 65.687 - sp = d->arch.shadow.hash_table[bucket]; 65.688 + sp = d->arch.paging.shadow.hash_table[bucket]; 65.689 while ( sp ) 65.690 { 65.691 /* Not a shadow? */ 65.692 @@ -1608,7 +1310,7 @@ static void sh_hash_audit_bucket(struct 65.693 if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page 65.694 && (gpg->u.inuse.type_info & PGT_count_mask) != 0 ) 65.695 { 65.696 - SHADOW_ERROR("MFN %#lx shadowed (by %#"SH_PRI_mfn")" 65.697 + SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")" 65.698 " but has typecount %#lx\n", 65.699 sp->backpointer, mfn_x(shadow_page_to_mfn(sp)), 65.700 gpg->u.inuse.type_info); 65.701 @@ -1652,13 +1354,13 @@ static int shadow_hash_alloc(struct doma 65.702 struct shadow_page_info **table; 65.703 65.704 ASSERT(shadow_locked_by_me(d)); 65.705 - ASSERT(!d->arch.shadow.hash_table); 65.706 + ASSERT(!d->arch.paging.shadow.hash_table); 65.707 65.708 table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS); 65.709 if ( !table ) return 1; 65.710 memset(table, 0, 65.711 SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *)); 65.712 - d->arch.shadow.hash_table = table; 65.713 + d->arch.paging.shadow.hash_table = table; 65.714 return 0; 65.715 } 65.716 65.717 @@ -1667,10 +1369,10 @@ static int shadow_hash_alloc(struct doma 65.718 static void shadow_hash_teardown(struct domain *d) 65.719 { 65.720 ASSERT(shadow_locked_by_me(d)); 65.721 - ASSERT(d->arch.shadow.hash_table); 65.722 - 65.723 - xfree(d->arch.shadow.hash_table); 65.724 - d->arch.shadow.hash_table = NULL; 65.725 + ASSERT(d->arch.paging.shadow.hash_table); 65.726 + 65.727 + xfree(d->arch.paging.shadow.hash_table); 65.728 + d->arch.paging.shadow.hash_table = NULL; 65.729 } 65.730 65.731 65.732 @@ -1683,7 +1385,7 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 65.733 key_t key; 65.734 65.735 ASSERT(shadow_locked_by_me(d)); 65.736 - ASSERT(d->arch.shadow.hash_table); 65.737 + ASSERT(d->arch.paging.shadow.hash_table); 65.738 ASSERT(t); 65.739 65.740 sh_hash_audit(d); 65.741 @@ -1692,16 +1394,16 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 65.742 key = sh_hash(n, t); 65.743 sh_hash_audit_bucket(d, key); 65.744 65.745 - sp = d->arch.shadow.hash_table[key]; 65.746 + sp = d->arch.paging.shadow.hash_table[key]; 65.747 prev = NULL; 65.748 while(sp) 65.749 { 65.750 if ( sp->backpointer == n && sp->type == t ) 65.751 { 65.752 /* Pull-to-front if 'sp' isn't already the head item */ 65.753 - if ( unlikely(sp != d->arch.shadow.hash_table[key]) ) 65.754 + if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) ) 65.755 { 65.756 - if ( unlikely(d->arch.shadow.hash_walking != 0) ) 65.757 + if ( unlikely(d->arch.paging.shadow.hash_walking != 0) ) 65.758 /* Can't reorder: someone is walking the hash chains */ 65.759 return shadow_page_to_mfn(sp); 65.760 else 65.761 @@ -1710,8 +1412,8 @@ mfn_t shadow_hash_lookup(struct vcpu *v, 65.762 /* Delete sp from the list */ 65.763 prev->next_shadow = sp->next_shadow; 65.764 /* Re-insert it at the head of the list */ 65.765 - sp->next_shadow = d->arch.shadow.hash_table[key]; 65.766 - d->arch.shadow.hash_table[key] = sp; 65.767 + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 65.768 + d->arch.paging.shadow.hash_table[key] = sp; 65.769 } 65.770 } 65.771 else 65.772 @@ -1737,7 +1439,7 @@ void shadow_hash_insert(struct vcpu *v, 65.773 key_t key; 65.774 65.775 ASSERT(shadow_locked_by_me(d)); 65.776 - ASSERT(d->arch.shadow.hash_table); 65.777 + ASSERT(d->arch.paging.shadow.hash_table); 65.778 ASSERT(t); 65.779 65.780 sh_hash_audit(d); 65.781 @@ -1748,8 +1450,8 @@ void shadow_hash_insert(struct vcpu *v, 65.782 65.783 /* Insert this shadow at the top of the bucket */ 65.784 sp = mfn_to_shadow_page(smfn); 65.785 - sp->next_shadow = d->arch.shadow.hash_table[key]; 65.786 - d->arch.shadow.hash_table[key] = sp; 65.787 + sp->next_shadow = d->arch.paging.shadow.hash_table[key]; 65.788 + d->arch.paging.shadow.hash_table[key] = sp; 65.789 65.790 sh_hash_audit_bucket(d, key); 65.791 } 65.792 @@ -1763,7 +1465,7 @@ void shadow_hash_delete(struct vcpu *v, 65.793 key_t key; 65.794 65.795 ASSERT(shadow_locked_by_me(d)); 65.796 - ASSERT(d->arch.shadow.hash_table); 65.797 + ASSERT(d->arch.paging.shadow.hash_table); 65.798 ASSERT(t); 65.799 65.800 sh_hash_audit(d); 65.801 @@ -1773,13 +1475,13 @@ void shadow_hash_delete(struct vcpu *v, 65.802 sh_hash_audit_bucket(d, key); 65.803 65.804 sp = mfn_to_shadow_page(smfn); 65.805 - if ( d->arch.shadow.hash_table[key] == sp ) 65.806 + if ( d->arch.paging.shadow.hash_table[key] == sp ) 65.807 /* Easy case: we're deleting the head item. */ 65.808 - d->arch.shadow.hash_table[key] = sp->next_shadow; 65.809 + d->arch.paging.shadow.hash_table[key] = sp->next_shadow; 65.810 else 65.811 { 65.812 /* Need to search for the one we want */ 65.813 - x = d->arch.shadow.hash_table[key]; 65.814 + x = d->arch.paging.shadow.hash_table[key]; 65.815 while ( 1 ) 65.816 { 65.817 ASSERT(x); /* We can't have hit the end, since our target is 65.818 @@ -1818,15 +1520,15 @@ static void hash_foreach(struct vcpu *v, 65.819 65.820 /* Say we're here, to stop hash-lookups reordering the chains */ 65.821 ASSERT(shadow_locked_by_me(d)); 65.822 - ASSERT(d->arch.shadow.hash_walking == 0); 65.823 - d->arch.shadow.hash_walking = 1; 65.824 + ASSERT(d->arch.paging.shadow.hash_walking == 0); 65.825 + d->arch.paging.shadow.hash_walking = 1; 65.826 65.827 for ( i = 0; i < SHADOW_HASH_BUCKETS; i++ ) 65.828 { 65.829 /* WARNING: This is not safe against changes to the hash table. 65.830 * The callback *must* return non-zero if it has inserted or 65.831 * deleted anything from the hash (lookups are OK, though). */ 65.832 - for ( x = d->arch.shadow.hash_table[i]; x; x = x->next_shadow ) 65.833 + for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow ) 65.834 { 65.835 if ( callback_mask & (1 << x->type) ) 65.836 { 65.837 @@ -1839,7 +1541,7 @@ static void hash_foreach(struct vcpu *v, 65.838 } 65.839 if ( done ) break; 65.840 } 65.841 - d->arch.shadow.hash_walking = 0; 65.842 + d->arch.paging.shadow.hash_walking = 0; 65.843 } 65.844 65.845 65.846 @@ -2008,27 +1710,27 @@ int sh_remove_write_access(struct vcpu * 65.847 * and that mapping is likely to be in the current pagetable, 65.848 * in the guest's linear map (on non-HIGHPTE linux and windows)*/ 65.849 65.850 -#define GUESS(_a, _h) do { \ 65.851 - if ( v->arch.shadow.mode->guess_wrmap(v, (_a), gmfn) ) \ 65.852 - perfc_incrc(shadow_writeable_h_ ## _h); \ 65.853 - if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ 65.854 - return 1; \ 65.855 +#define GUESS(_a, _h) do { \ 65.856 + if ( v->arch.paging.mode->shadow.guess_wrmap(v, (_a), gmfn) ) \ 65.857 + perfc_incrc(shadow_writeable_h_ ## _h); \ 65.858 + if ( (pg->u.inuse.type_info & PGT_count_mask) == 0 ) \ 65.859 + return 1; \ 65.860 } while (0) 65.861 65.862 65.863 - if ( v->arch.shadow.mode->guest_levels == 2 ) 65.864 + if ( v->arch.paging.mode->guest_levels == 2 ) 65.865 { 65.866 if ( level == 1 ) 65.867 /* 32bit non-PAE w2k3: linear map at 0xC0000000 */ 65.868 GUESS(0xC0000000UL + (fault_addr >> 10), 1); 65.869 65.870 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ 65.871 - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 65.872 + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 65.873 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); 65.874 65.875 } 65.876 #if CONFIG_PAGING_LEVELS >= 3 65.877 - else if ( v->arch.shadow.mode->guest_levels == 3 ) 65.878 + else if ( v->arch.paging.mode->guest_levels == 3 ) 65.879 { 65.880 /* 32bit PAE w2k3: linear map at 0xC0000000 */ 65.881 switch ( level ) 65.882 @@ -2038,11 +1740,11 @@ int sh_remove_write_access(struct vcpu * 65.883 } 65.884 65.885 /* Linux lowmem: first 896MB is mapped 1-to-1 above 0xC0000000 */ 65.886 - if ((gfn = sh_mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 65.887 + if ((gfn = mfn_to_gfn(v->domain, gmfn)) < 0x38000 ) 65.888 GUESS(0xC0000000UL + (gfn << PAGE_SHIFT), 4); 65.889 } 65.890 #if CONFIG_PAGING_LEVELS >= 4 65.891 - else if ( v->arch.shadow.mode->guest_levels == 4 ) 65.892 + else if ( v->arch.paging.mode->guest_levels == 4 ) 65.893 { 65.894 /* 64bit w2k3: linear map at 0x0000070000000000 */ 65.895 switch ( level ) 65.896 @@ -2054,7 +1756,7 @@ int sh_remove_write_access(struct vcpu * 65.897 65.898 /* 64bit Linux direct map at 0xffff810000000000; older kernels 65.899 * had it at 0x0000010000000000UL */ 65.900 - gfn = sh_mfn_to_gfn(v->domain, gmfn); 65.901 + gfn = mfn_to_gfn(v->domain, gmfn); 65.902 GUESS(0xffff810000000000UL + (gfn << PAGE_SHIFT), 4); 65.903 GUESS(0x0000010000000000UL + (gfn << PAGE_SHIFT), 4); 65.904 } 65.905 @@ -2073,10 +1775,10 @@ int sh_remove_write_access(struct vcpu * 65.906 * the writeable mapping by looking at the same MFN where the last 65.907 * brute-force search succeeded. */ 65.908 65.909 - if ( v->arch.shadow.last_writeable_pte_smfn != 0 ) 65.910 + if ( v->arch.paging.shadow.last_writeable_pte_smfn != 0 ) 65.911 { 65.912 unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask); 65.913 - mfn_t last_smfn = _mfn(v->arch.shadow.last_writeable_pte_smfn); 65.914 + mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn); 65.915 int shtype = mfn_to_shadow_page(last_smfn)->type; 65.916 65.917 if ( callbacks[shtype] ) 65.918 @@ -2431,7 +2133,7 @@ sh_remove_all_shadows_and_parents(struct 65.919 static void sh_update_paging_modes(struct vcpu *v) 65.920 { 65.921 struct domain *d = v->domain; 65.922 - struct shadow_paging_mode *old_mode = v->arch.shadow.mode; 65.923 + struct paging_mode *old_mode = v->arch.paging.mode; 65.924 mfn_t old_guest_table; 65.925 65.926 ASSERT(shadow_locked_by_me(d)); 65.927 @@ -2446,8 +2148,8 @@ static void sh_update_paging_modes(struc 65.928 65.929 // First, tear down any old shadow tables held by this vcpu. 65.930 // 65.931 - if ( v->arch.shadow.mode ) 65.932 - v->arch.shadow.mode->detach_old_tables(v); 65.933 + if ( v->arch.paging.mode ) 65.934 + v->arch.paging.mode->shadow.detach_old_tables(v); 65.935 65.936 if ( !is_hvm_domain(d) ) 65.937 { 65.938 @@ -2456,17 +2158,17 @@ static void sh_update_paging_modes(struc 65.939 /// 65.940 #if CONFIG_PAGING_LEVELS == 4 65.941 if ( pv_32bit_guest(v) ) 65.942 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.943 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.944 else 65.945 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 65.946 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,4,4); 65.947 #elif CONFIG_PAGING_LEVELS == 3 65.948 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.949 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.950 #elif CONFIG_PAGING_LEVELS == 2 65.951 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 65.952 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 65.953 #else 65.954 #error unexpected paging mode 65.955 #endif 65.956 - v->arch.shadow.translate_enabled = !!shadow_mode_translate(d); 65.957 + v->arch.paging.translate_enabled = !!shadow_mode_translate(d); 65.958 } 65.959 else 65.960 { 65.961 @@ -2476,8 +2178,8 @@ static void sh_update_paging_modes(struc 65.962 ASSERT(shadow_mode_translate(d)); 65.963 ASSERT(shadow_mode_external(d)); 65.964 65.965 - v->arch.shadow.translate_enabled = !!hvm_paging_enabled(v); 65.966 - if ( !v->arch.shadow.translate_enabled ) 65.967 + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); 65.968 + if ( !v->arch.paging.translate_enabled ) 65.969 { 65.970 /* Set v->arch.guest_table to use the p2m map, and choose 65.971 * the appropriate shadow mode */ 65.972 @@ -2485,11 +2187,11 @@ static void sh_update_paging_modes(struc 65.973 #if CONFIG_PAGING_LEVELS == 2 65.974 v->arch.guest_table = 65.975 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 65.976 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 65.977 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,2,2); 65.978 #elif CONFIG_PAGING_LEVELS == 3 65.979 v->arch.guest_table = 65.980 pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 65.981 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.982 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.983 #else /* CONFIG_PAGING_LEVELS == 4 */ 65.984 { 65.985 l4_pgentry_t *l4e; 65.986 @@ -2501,7 +2203,7 @@ static void sh_update_paging_modes(struc 65.987 pagetable_from_pfn(l4e_get_pfn(l4e[0])); 65.988 sh_unmap_domain_page(l4e); 65.989 } 65.990 - v->arch.shadow.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.991 + v->arch.paging.mode = &SHADOW_INTERNAL_NAME(sh_paging_mode,3,3); 65.992 #endif 65.993 /* Fix up refcounts on guest_table */ 65.994 get_page(mfn_to_page(pagetable_get_mfn(v->arch.guest_table)), d); 65.995 @@ -2514,7 +2216,7 @@ static void sh_update_paging_modes(struc 65.996 if ( hvm_long_mode_enabled(v) ) 65.997 { 65.998 // long mode guest... 65.999 - v->arch.shadow.mode = 65.1000 + v->arch.paging.mode = 65.1001 &SHADOW_INTERNAL_NAME(sh_paging_mode, 4, 4); 65.1002 } 65.1003 else 65.1004 @@ -2523,7 +2225,7 @@ static void sh_update_paging_modes(struc 65.1005 { 65.1006 #if CONFIG_PAGING_LEVELS >= 3 65.1007 // 32-bit PAE mode guest... 65.1008 - v->arch.shadow.mode = 65.1009 + v->arch.paging.mode = 65.1010 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 3); 65.1011 #else 65.1012 SHADOW_ERROR("PAE not supported in 32-bit Xen\n"); 65.1013 @@ -2535,10 +2237,10 @@ static void sh_update_paging_modes(struc 65.1014 { 65.1015 // 32-bit 2 level guest... 65.1016 #if CONFIG_PAGING_LEVELS >= 3 65.1017 - v->arch.shadow.mode = 65.1018 + v->arch.paging.mode = 65.1019 &SHADOW_INTERNAL_NAME(sh_paging_mode, 3, 2); 65.1020 #else 65.1021 - v->arch.shadow.mode = 65.1022 + v->arch.paging.mode = 65.1023 &SHADOW_INTERNAL_NAME(sh_paging_mode, 2, 2); 65.1024 #endif 65.1025 } 65.1026 @@ -2546,25 +2248,25 @@ static void sh_update_paging_modes(struc 65.1027 65.1028 if ( pagetable_is_null(v->arch.monitor_table) ) 65.1029 { 65.1030 - mfn_t mmfn = v->arch.shadow.mode->make_monitor_table(v); 65.1031 + mfn_t mmfn = v->arch.paging.mode->shadow.make_monitor_table(v); 65.1032 v->arch.monitor_table = pagetable_from_mfn(mmfn); 65.1033 make_cr3(v, mfn_x(mmfn)); 65.1034 hvm_update_host_cr3(v); 65.1035 } 65.1036 65.1037 - if ( v->arch.shadow.mode != old_mode ) 65.1038 + if ( v->arch.paging.mode != old_mode ) 65.1039 { 65.1040 SHADOW_PRINTK("new paging mode: d=%u v=%u pe=%d g=%u s=%u " 65.1041 "(was g=%u s=%u)\n", 65.1042 d->domain_id, v->vcpu_id, 65.1043 is_hvm_domain(d) ? !!hvm_paging_enabled(v) : 1, 65.1044 - v->arch.shadow.mode->guest_levels, 65.1045 - v->arch.shadow.mode->shadow_levels, 65.1046 + v->arch.paging.mode->guest_levels, 65.1047 + v->arch.paging.mode->shadow.shadow_levels, 65.1048 old_mode ? old_mode->guest_levels : 0, 65.1049 - old_mode ? old_mode->shadow_levels : 0); 65.1050 + old_mode ? old_mode->shadow.shadow_levels : 0); 65.1051 if ( old_mode && 65.1052 - (v->arch.shadow.mode->shadow_levels != 65.1053 - old_mode->shadow_levels) ) 65.1054 + (v->arch.paging.mode->shadow.shadow_levels != 65.1055 + old_mode->shadow.shadow_levels) ) 65.1056 { 65.1057 /* Need to make a new monitor table for the new mode */ 65.1058 mfn_t new_mfn, old_mfn; 65.1059 @@ -2584,9 +2286,9 @@ static void sh_update_paging_modes(struc 65.1060 65.1061 old_mfn = pagetable_get_mfn(v->arch.monitor_table); 65.1062 v->arch.monitor_table = pagetable_null(); 65.1063 - new_mfn = v->arch.shadow.mode->make_monitor_table(v); 65.1064 + new_mfn = v->arch.paging.mode->shadow.make_monitor_table(v); 65.1065 v->arch.monitor_table = pagetable_from_mfn(new_mfn); 65.1066 - SHADOW_PRINTK("new monitor table %"SH_PRI_mfn "\n", 65.1067 + SHADOW_PRINTK("new monitor table %"PRI_mfn "\n", 65.1068 mfn_x(new_mfn)); 65.1069 65.1070 /* Don't be running on the old monitor table when we 65.1071 @@ -2596,7 +2298,7 @@ static void sh_update_paging_modes(struc 65.1072 if ( v == current ) 65.1073 write_ptbase(v); 65.1074 hvm_update_host_cr3(v); 65.1075 - old_mode->destroy_monitor_table(v, old_mfn); 65.1076 + old_mode->shadow.destroy_monitor_table(v, old_mfn); 65.1077 } 65.1078 } 65.1079 65.1080 @@ -2606,7 +2308,7 @@ static void sh_update_paging_modes(struc 65.1081 // This *does* happen, at least for CR4.PGE... 65.1082 } 65.1083 65.1084 - v->arch.shadow.mode->update_cr3(v, 0); 65.1085 + v->arch.paging.mode->update_cr3(v, 0); 65.1086 } 65.1087 65.1088 void shadow_update_paging_modes(struct vcpu *v) 65.1089 @@ -2626,9 +2328,7 @@ static void sh_new_mode(struct domain *d 65.1090 65.1091 ASSERT(shadow_locked_by_me(d)); 65.1092 ASSERT(d != current->domain); 65.1093 - d->arch.shadow.mode = new_mode; 65.1094 - if ( new_mode & SHM2_translate ) 65.1095 - shadow_audit_p2m(d); 65.1096 + d->arch.paging.mode = new_mode; 65.1097 for_each_vcpu(d, v) 65.1098 sh_update_paging_modes(v); 65.1099 } 65.1100 @@ -2642,75 +2342,75 @@ int shadow_enable(struct domain *d, u32 65.1101 unsigned int old_pages; 65.1102 int rv = 0; 65.1103 65.1104 - mode |= SHM2_enable; 65.1105 + mode |= PG_SH_enable; 65.1106 65.1107 domain_pause(d); 65.1108 - shadow_lock(d); 65.1109 65.1110 /* Sanity check the arguments */ 65.1111 if ( (d == current->domain) || 65.1112 shadow_mode_enabled(d) || 65.1113 - ((mode & SHM2_translate) && !(mode & SHM2_refcounts)) || 65.1114 - ((mode & SHM2_external) && !(mode & SHM2_translate)) ) 65.1115 + ((mode & PG_translate) && !(mode & PG_refcounts)) || 65.1116 + ((mode & PG_external) && !(mode & PG_translate)) ) 65.1117 { 65.1118 rv = -EINVAL; 65.1119 - goto out; 65.1120 + goto out_unlocked; 65.1121 } 65.1122 65.1123 - // XXX -- eventually would like to require that all memory be allocated 65.1124 - // *after* shadow_enabled() is called... So here, we would test to make 65.1125 - // sure that d->page_list is empty. 65.1126 -#if 0 65.1127 - spin_lock(&d->page_alloc_lock); 65.1128 - if ( !list_empty(&d->page_list) ) 65.1129 - { 65.1130 - spin_unlock(&d->page_alloc_lock); 65.1131 - rv = -EINVAL; 65.1132 - goto out; 65.1133 - } 65.1134 - spin_unlock(&d->page_alloc_lock); 65.1135 -#endif 65.1136 - 65.1137 /* Init the shadow memory allocation if the user hasn't done so */ 65.1138 - old_pages = d->arch.shadow.total_pages; 65.1139 + old_pages = d->arch.paging.shadow.total_pages; 65.1140 if ( old_pages == 0 ) 65.1141 - if ( sh_set_allocation(d, 256, NULL) != 0 ) /* Use at least 1MB */ 65.1142 + { 65.1143 + unsigned int r; 65.1144 + shadow_lock(d); 65.1145 + r = sh_set_allocation(d, 256, NULL); /* Use at least 1MB */ 65.1146 + shadow_unlock(d); 65.1147 + if ( r != 0 ) 65.1148 { 65.1149 sh_set_allocation(d, 0, NULL); 65.1150 rv = -ENOMEM; 65.1151 - goto out; 65.1152 - } 65.1153 + goto out_unlocked; 65.1154 + } 65.1155 + } 65.1156 + 65.1157 + /* Init the P2M table. Must be done before we take the shadow lock 65.1158 + * to avoid possible deadlock. */ 65.1159 + if ( mode & PG_translate ) 65.1160 + { 65.1161 + rv = p2m_alloc_table(d, shadow_alloc_p2m_page, shadow_free_p2m_page); 65.1162 + if (rv != 0) 65.1163 + goto out_unlocked; 65.1164 + } 65.1165 + 65.1166 + shadow_lock(d); 65.1167 + 65.1168 + /* Sanity check again with the lock held */ 65.1169 + if ( shadow_mode_enabled(d) ) 65.1170 + { 65.1171 + rv = -EINVAL; 65.1172 + goto out_locked; 65.1173 + } 65.1174 65.1175 /* Init the hash table */ 65.1176 if ( shadow_hash_alloc(d) != 0 ) 65.1177 { 65.1178 - sh_set_allocation(d, old_pages, NULL); 65.1179 rv = -ENOMEM; 65.1180 - goto out; 65.1181 + goto out_locked; 65.1182 } 65.1183 65.1184 - /* Init the P2M table */ 65.1185 - if ( mode & SHM2_translate ) 65.1186 - if ( !shadow_alloc_p2m_table(d) ) 65.1187 - { 65.1188 - shadow_hash_teardown(d); 65.1189 - sh_set_allocation(d, old_pages, NULL); 65.1190 - shadow_p2m_teardown(d); 65.1191 - rv = -ENOMEM; 65.1192 - goto out; 65.1193 - } 65.1194 - 65.1195 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 65.1196 /* We assume we're dealing with an older 64bit linux guest until we 65.1197 * see the guest use more than one l4 per vcpu. */ 65.1198 - d->arch.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; 65.1199 + d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL; 65.1200 #endif 65.1201 65.1202 /* Update the bits */ 65.1203 sh_new_mode(d, mode); 65.1204 - shadow_audit_p2m(d); 65.1205 - out: 65.1206 + 65.1207 + out_locked: 65.1208 shadow_unlock(d); 65.1209 + out_unlocked: 65.1210 + if ( rv != 0 && !pagetable_is_null(d->arch.phys_table) ) 65.1211 + p2m_teardown(d); 65.1212 domain_unpause(d); 65.1213 return rv; 65.1214 } 65.1215 @@ -2721,6 +2421,8 @@ void shadow_teardown(struct domain *d) 65.1216 { 65.1217 struct vcpu *v; 65.1218 mfn_t mfn; 65.1219 + struct list_head *entry, *n; 65.1220 + struct page_info *pg; 65.1221 65.1222 ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); 65.1223 ASSERT(d != current->domain); 65.1224 @@ -2733,48 +2435,55 @@ void shadow_teardown(struct domain *d) 65.1225 /* Release the shadow and monitor tables held by each vcpu */ 65.1226 for_each_vcpu(d, v) 65.1227 { 65.1228 - if ( v->arch.shadow.mode ) 65.1229 + if ( v->arch.paging.mode ) 65.1230 { 65.1231 - v->arch.shadow.mode->detach_old_tables(v); 65.1232 + v->arch.paging.mode->shadow.detach_old_tables(v); 65.1233 if ( shadow_mode_external(d) ) 65.1234 { 65.1235 mfn = pagetable_get_mfn(v->arch.monitor_table); 65.1236 if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 65.1237 - v->arch.shadow.mode->destroy_monitor_table(v, mfn); 65.1238 + v->arch.paging.mode->shadow.destroy_monitor_table(v, mfn); 65.1239 v->arch.monitor_table = pagetable_null(); 65.1240 } 65.1241 } 65.1242 } 65.1243 } 65.1244 65.1245 - if ( d->arch.shadow.total_pages != 0 ) 65.1246 + list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist) 65.1247 + { 65.1248 + list_del(entry); 65.1249 + pg = list_entry(entry, struct page_info, list); 65.1250 + shadow_free_p2m_page(d, pg); 65.1251 + } 65.1252 + 65.1253 + if ( d->arch.paging.shadow.total_pages != 0 ) 65.1254 { 65.1255 SHADOW_PRINTK("teardown of domain %u starts." 65.1256 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1257 d->domain_id, 65.1258 - d->arch.shadow.total_pages, 65.1259 - d->arch.shadow.free_pages, 65.1260 - d->arch.shadow.p2m_pages); 65.1261 + d->arch.paging.shadow.total_pages, 65.1262 + d->arch.paging.shadow.free_pages, 65.1263 + d->arch.paging.shadow.p2m_pages); 65.1264 /* Destroy all the shadows and release memory to domheap */ 65.1265 sh_set_allocation(d, 0, NULL); 65.1266 /* Release the hash table back to xenheap */ 65.1267 - if (d->arch.shadow.hash_table) 65.1268 + if (d->arch.paging.shadow.hash_table) 65.1269 shadow_hash_teardown(d); 65.1270 /* Release the log-dirty bitmap of dirtied pages */ 65.1271 sh_free_log_dirty_bitmap(d); 65.1272 /* Should not have any more memory held */ 65.1273 SHADOW_PRINTK("teardown done." 65.1274 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1275 - d->arch.shadow.total_pages, 65.1276 - d->arch.shadow.free_pages, 65.1277 - d->arch.shadow.p2m_pages); 65.1278 - ASSERT(d->arch.shadow.total_pages == 0); 65.1279 + d->arch.paging.shadow.total_pages, 65.1280 + d->arch.paging.shadow.free_pages, 65.1281 + d->arch.paging.shadow.p2m_pages); 65.1282 + ASSERT(d->arch.paging.shadow.total_pages == 0); 65.1283 } 65.1284 65.1285 /* We leave the "permanent" shadow modes enabled, but clear the 65.1286 * log-dirty mode bit. We don't want any more mark_dirty() 65.1287 * calls now that we've torn down the bitmap */ 65.1288 - d->arch.shadow.mode &= ~SHM2_log_dirty; 65.1289 + d->arch.paging.mode &= ~PG_log_dirty; 65.1290 65.1291 shadow_unlock(d); 65.1292 } 65.1293 @@ -2782,30 +2491,28 @@ void shadow_teardown(struct domain *d) 65.1294 void shadow_final_teardown(struct domain *d) 65.1295 /* Called by arch_domain_destroy(), when it's safe to pull down the p2m map. */ 65.1296 { 65.1297 - 65.1298 SHADOW_PRINTK("dom %u final teardown starts." 65.1299 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1300 d->domain_id, 65.1301 - d->arch.shadow.total_pages, 65.1302 - d->arch.shadow.free_pages, 65.1303 - d->arch.shadow.p2m_pages); 65.1304 + d->arch.paging.shadow.total_pages, 65.1305 + d->arch.paging.shadow.free_pages, 65.1306 + d->arch.paging.shadow.p2m_pages); 65.1307 65.1308 /* Double-check that the domain didn't have any shadow memory. 65.1309 * It is possible for a domain that never got domain_kill()ed 65.1310 * to get here with its shadow allocation intact. */ 65.1311 - if ( d->arch.shadow.total_pages != 0 ) 65.1312 + if ( d->arch.paging.shadow.total_pages != 0 ) 65.1313 shadow_teardown(d); 65.1314 65.1315 /* It is now safe to pull down the p2m map. */ 65.1316 - if ( d->arch.shadow.p2m_pages != 0 ) 65.1317 - shadow_p2m_teardown(d); 65.1318 + p2m_teardown(d); 65.1319 65.1320 SHADOW_PRINTK("dom %u final teardown done." 65.1321 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1322 d->domain_id, 65.1323 - d->arch.shadow.total_pages, 65.1324 - d->arch.shadow.free_pages, 65.1325 - d->arch.shadow.p2m_pages); 65.1326 + d->arch.paging.shadow.total_pages, 65.1327 + d->arch.paging.shadow.free_pages, 65.1328 + d->arch.paging.shadow.p2m_pages); 65.1329 } 65.1330 65.1331 static int shadow_one_bit_enable(struct domain *d, u32 mode) 65.1332 @@ -2814,12 +2521,14 @@ static int shadow_one_bit_enable(struct 65.1333 ASSERT(shadow_locked_by_me(d)); 65.1334 65.1335 /* Sanity check the call */ 65.1336 - if ( d == current->domain || (d->arch.shadow.mode & mode) ) 65.1337 + if ( d == current->domain || (d->arch.paging.mode & mode) ) 65.1338 { 65.1339 return -EINVAL; 65.1340 } 65.1341 65.1342 - if ( d->arch.shadow.mode == 0 ) 65.1343 + mode |= PG_SH_enable; 65.1344 + 65.1345 + if ( d->arch.paging.mode == 0 ) 65.1346 { 65.1347 /* Init the shadow memory allocation and the hash table */ 65.1348 if ( sh_set_allocation(d, 1, NULL) != 0 65.1349 @@ -2831,7 +2540,7 @@ static int shadow_one_bit_enable(struct 65.1350 } 65.1351 65.1352 /* Update the bits */ 65.1353 - sh_new_mode(d, d->arch.shadow.mode | mode); 65.1354 + sh_new_mode(d, d->arch.paging.mode | mode); 65.1355 65.1356 return 0; 65.1357 } 65.1358 @@ -2843,26 +2552,26 @@ static int shadow_one_bit_disable(struct 65.1359 ASSERT(shadow_locked_by_me(d)); 65.1360 65.1361 /* Sanity check the call */ 65.1362 - if ( d == current->domain || !(d->arch.shadow.mode & mode) ) 65.1363 + if ( d == current->domain || !(d->arch.paging.mode & mode) ) 65.1364 { 65.1365 return -EINVAL; 65.1366 } 65.1367 65.1368 /* Update the bits */ 65.1369 - sh_new_mode(d, d->arch.shadow.mode & ~mode); 65.1370 - if ( d->arch.shadow.mode == 0 ) 65.1371 + sh_new_mode(d, d->arch.paging.mode & ~mode); 65.1372 + if ( d->arch.paging.mode == 0 ) 65.1373 { 65.1374 /* Get this domain off shadows */ 65.1375 SHADOW_PRINTK("un-shadowing of domain %u starts." 65.1376 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1377 d->domain_id, 65.1378 - d->arch.shadow.total_pages, 65.1379 - d->arch.shadow.free_pages, 65.1380 - d->arch.shadow.p2m_pages); 65.1381 + d->arch.paging.shadow.total_pages, 65.1382 + d->arch.paging.shadow.free_pages, 65.1383 + d->arch.paging.shadow.p2m_pages); 65.1384 for_each_vcpu(d, v) 65.1385 { 65.1386 - if ( v->arch.shadow.mode ) 65.1387 - v->arch.shadow.mode->detach_old_tables(v); 65.1388 + if ( v->arch.paging.mode ) 65.1389 + v->arch.paging.mode->shadow.detach_old_tables(v); 65.1390 #if CONFIG_PAGING_LEVELS == 4 65.1391 if ( !(v->arch.flags & TF_kernel_mode) ) 65.1392 make_cr3(v, pagetable_get_pfn(v->arch.guest_table_user)); 65.1393 @@ -2885,9 +2594,9 @@ static int shadow_one_bit_disable(struct 65.1394 SHADOW_PRINTK("un-shadowing of domain %u done." 65.1395 " Shadow pages total = %u, free = %u, p2m=%u\n", 65.1396 d->domain_id, 65.1397 - d->arch.shadow.total_pages, 65.1398 - d->arch.shadow.free_pages, 65.1399 - d->arch.shadow.p2m_pages); 65.1400 + d->arch.paging.shadow.total_pages, 65.1401 + d->arch.paging.shadow.free_pages, 65.1402 + d->arch.paging.shadow.p2m_pages); 65.1403 } 65.1404 65.1405 return 0; 65.1406 @@ -2909,7 +2618,7 @@ static int shadow_test_enable(struct dom 65.1407 goto out; 65.1408 } 65.1409 65.1410 - ret = shadow_one_bit_enable(d, SHM2_enable); 65.1411 + ret = shadow_one_bit_enable(d, PG_SH_enable); 65.1412 out: 65.1413 shadow_unlock(d); 65.1414 domain_unpause(d); 65.1415 @@ -2923,7 +2632,7 @@ static int shadow_test_disable(struct do 65.1416 65.1417 domain_pause(d); 65.1418 shadow_lock(d); 65.1419 - ret = shadow_one_bit_disable(d, SHM2_enable); 65.1420 + ret = shadow_one_bit_disable(d, PG_SH_enable); 65.1421 shadow_unlock(d); 65.1422 domain_unpause(d); 65.1423 65.1424 @@ -2933,19 +2642,19 @@ static int shadow_test_disable(struct do 65.1425 static int 65.1426 sh_alloc_log_dirty_bitmap(struct domain *d) 65.1427 { 65.1428 - ASSERT(d->arch.shadow.dirty_bitmap == NULL); 65.1429 - d->arch.shadow.dirty_bitmap_size = 65.1430 + ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL); 65.1431 + d->arch.paging.shadow.dirty_bitmap_size = 65.1432 (arch_get_max_pfn(d) + (BITS_PER_LONG - 1)) & 65.1433 ~(BITS_PER_LONG - 1); 65.1434 - d->arch.shadow.dirty_bitmap = 65.1435 + d->arch.paging.shadow.dirty_bitmap = 65.1436 xmalloc_array(unsigned long, 65.1437 - d->arch.shadow.dirty_bitmap_size / BITS_PER_LONG); 65.1438 - if ( d->arch.shadow.dirty_bitmap == NULL ) 65.1439 + d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG); 65.1440 + if ( d->arch.paging.shadow.dirty_bitmap == NULL ) 65.1441 { 65.1442 - d->arch.shadow.dirty_bitmap_size = 0; 65.1443 + d->arch.paging.shadow.dirty_bitmap_size = 0; 65.1444 return -ENOMEM; 65.1445 } 65.1446 - memset(d->arch.shadow.dirty_bitmap, 0, d->arch.shadow.dirty_bitmap_size/8); 65.1447 + memset(d->arch.paging.shadow.dirty_bitmap, 0, d->arch.paging.shadow.dirty_bitmap_size/8); 65.1448 65.1449 return 0; 65.1450 } 65.1451 @@ -2953,11 +2662,11 @@ sh_alloc_log_dirty_bitmap(struct domain 65.1452 static void 65.1453 sh_free_log_dirty_bitmap(struct domain *d) 65.1454 { 65.1455 - d->arch.shadow.dirty_bitmap_size = 0; 65.1456 - if ( d->arch.shadow.dirty_bitmap ) 65.1457 + d->arch.paging.shadow.dirty_bitmap_size = 0; 65.1458 + if ( d->arch.paging.shadow.dirty_bitmap ) 65.1459 { 65.1460 - xfree(d->arch.shadow.dirty_bitmap); 65.1461 - d->arch.shadow.dirty_bitmap = NULL; 65.1462 + xfree(d->arch.paging.shadow.dirty_bitmap); 65.1463 + d->arch.paging.shadow.dirty_bitmap = NULL; 65.1464 } 65.1465 } 65.1466 65.1467 @@ -2989,7 +2698,7 @@ static int shadow_log_dirty_enable(struc 65.1468 goto out; 65.1469 } 65.1470 65.1471 - ret = shadow_one_bit_enable(d, SHM2_log_dirty); 65.1472 + ret = shadow_one_bit_enable(d, PG_log_dirty); 65.1473 if ( ret != 0 ) 65.1474 sh_free_log_dirty_bitmap(d); 65.1475 65.1476 @@ -3005,7 +2714,7 @@ static int shadow_log_dirty_disable(stru 65.1477 65.1478 domain_pause(d); 65.1479 shadow_lock(d); 65.1480 - ret = shadow_one_bit_disable(d, SHM2_log_dirty); 65.1481 + ret = shadow_one_bit_disable(d, PG_log_dirty); 65.1482 if ( !shadow_mode_log_dirty(d) ) 65.1483 sh_free_log_dirty_bitmap(d); 65.1484 shadow_unlock(d); 65.1485 @@ -3017,100 +2726,52 @@ static int shadow_log_dirty_disable(stru 65.1486 /**************************************************************************/ 65.1487 /* P2M map manipulations */ 65.1488 65.1489 -static void 65.1490 -sh_p2m_remove_page(struct domain *d, unsigned long gfn, unsigned long mfn) 65.1491 -{ 65.1492 - struct vcpu *v; 65.1493 - 65.1494 - if ( !shadow_mode_translate(d) ) 65.1495 - return; 65.1496 - 65.1497 - v = current; 65.1498 - if ( v->domain != d ) 65.1499 - v = d->vcpu[0]; 65.1500 - 65.1501 - SHADOW_DEBUG(P2M, "removing gfn=%#lx mfn=%#lx\n", gfn, mfn); 65.1502 - 65.1503 - ASSERT(mfn_x(sh_gfn_to_mfn(d, gfn)) == mfn); 65.1504 - //ASSERT(sh_mfn_to_gfn(d, mfn) == gfn); 65.1505 - 65.1506 - if ( v != NULL ) 65.1507 - { 65.1508 - sh_remove_all_shadows_and_parents(v, _mfn(mfn)); 65.1509 - if ( sh_remove_all_mappings(v, _mfn(mfn)) ) 65.1510 - flush_tlb_mask(d->domain_dirty_cpumask); 65.1511 - } 65.1512 - 65.1513 - shadow_set_p2m_entry(d, gfn, _mfn(INVALID_MFN)); 65.1514 - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 65.1515 -} 65.1516 - 65.1517 +/* shadow specific code which should be called when P2M table entry is updated 65.1518 + * with new content. It is responsible for update the entry, as well as other 65.1519 + * shadow processing jobs. 65.1520 + */ 65.1521 void 65.1522 -shadow_guest_physmap_remove_page(struct domain *d, unsigned long gfn, 65.1523 - unsigned long mfn) 65.1524 -{ 65.1525 - shadow_lock(d); 65.1526 - shadow_audit_p2m(d); 65.1527 - sh_p2m_remove_page(d, gfn, mfn); 65.1528 - shadow_audit_p2m(d); 65.1529 - shadow_unlock(d); 65.1530 -} 65.1531 - 65.1532 -void 65.1533 -shadow_guest_physmap_add_page(struct domain *d, unsigned long gfn, 65.1534 - unsigned long mfn) 65.1535 +shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, 65.1536 + l1_pgentry_t new, unsigned int level) 65.1537 { 65.1538 - unsigned long ogfn; 65.1539 - mfn_t omfn; 65.1540 - 65.1541 - if ( !shadow_mode_translate(d) ) 65.1542 - return; 65.1543 - 65.1544 + struct domain *d = v->domain; 65.1545 + mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table); 65.1546 + mfn_t mfn; 65.1547 + 65.1548 shadow_lock(d); 65.1549 - shadow_audit_p2m(d); 65.1550 - 65.1551 - SHADOW_DEBUG(P2M, "adding gfn=%#lx mfn=%#lx\n", gfn, mfn); 65.1552 - 65.1553 - omfn = sh_gfn_to_mfn(d, gfn); 65.1554 - if ( mfn_valid(omfn) ) 65.1555 - { 65.1556 - /* Get rid of the old mapping, especially any shadows */ 65.1557 - struct vcpu *v = current; 65.1558 - if ( v->domain != d ) 65.1559 - v = d->vcpu[0]; 65.1560 - if ( v != NULL ) 65.1561 - { 65.1562 - sh_remove_all_shadows_and_parents(v, omfn); 65.1563 - if ( sh_remove_all_mappings(v, omfn) ) 65.1564 - flush_tlb_mask(d->domain_dirty_cpumask); 65.1565 - } 65.1566 - set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 65.1567 + 65.1568 + /* handle physmap_add and physmap_remove */ 65.1569 + mfn = gfn_to_mfn(d, gfn); 65.1570 + if ( v != NULL && level == 1 && mfn_valid(mfn) ) { 65.1571 + sh_remove_all_shadows_and_parents(v, mfn); 65.1572 + if ( sh_remove_all_mappings(v, mfn) ) 65.1573 + flush_tlb_mask(d->domain_dirty_cpumask); 65.1574 } 65.1575 - 65.1576 - ogfn = sh_mfn_to_gfn(d, _mfn(mfn)); 65.1577 - if ( 65.1578 -#ifdef __x86_64__ 65.1579 - (ogfn != 0x5555555555555555L) 65.1580 -#else 65.1581 - (ogfn != 0x55555555L) 65.1582 -#endif 65.1583 - && (ogfn != INVALID_M2P_ENTRY) 65.1584 - && (ogfn != gfn) ) 65.1585 - { 65.1586 - /* This machine frame is already mapped at another physical address */ 65.1587 - SHADOW_DEBUG(P2M, "aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", 65.1588 - mfn, ogfn, gfn); 65.1589 - if ( mfn_valid(omfn = sh_gfn_to_mfn(d, ogfn)) ) 65.1590 - { 65.1591 - SHADOW_DEBUG(P2M, "old gfn=%#lx -> mfn %#lx\n", 65.1592 - ogfn , mfn_x(omfn)); 65.1593 - if ( mfn_x(omfn) == mfn ) 65.1594 - sh_p2m_remove_page(d, ogfn, mfn); 65.1595 + 65.1596 + /* update the entry with new content */ 65.1597 + safe_write_pte(p, new); 65.1598 + 65.1599 + /* The P2M can be shadowed: keep the shadows synced */ 65.1600 + if ( d->vcpu[0] != NULL ) 65.1601 + (void)sh_validate_guest_entry(d->vcpu[0], table_mfn, p, sizeof(*p)); 65.1602 + 65.1603 + /* install P2M in monitors for PAE Xen */ 65.1604 +#if CONFIG_PAGING_LEVELS == 3 65.1605 + if ( level == 3 ) { 65.1606 + struct vcpu *v; 65.1607 + /* We have written to the p2m l3: need to sync the per-vcpu 65.1608 + * copies of it in the monitor tables */ 65.1609 + p2m_install_entry_in_monitors(d, (l3_pgentry_t *)p); 65.1610 + /* Also, any vcpus running on shadows of the p2m need to 65.1611 + * reload their CR3s so the change propagates to the shadow */ 65.1612 + for_each_vcpu(d, v) { 65.1613 + if ( pagetable_get_pfn(v->arch.guest_table) 65.1614 + == pagetable_get_pfn(d->arch.phys_table) 65.1615 + && v->arch.paging.mode != NULL ) 65.1616 + v->arch.paging.mode->update_cr3(v, 0); 65.1617 } 65.1618 } 65.1619 - 65.1620 - shadow_set_p2m_entry(d, gfn, _mfn(mfn)); 65.1621 - set_gpfn_from_mfn(mfn, gfn); 65.1622 +#endif 65.1623 65.1624 #if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH) 65.1625 /* If we're doing FAST_FAULT_PATH, then shadow mode may have 65.1626 @@ -3122,7 +2783,6 @@ shadow_guest_physmap_add_page(struct dom 65.1627 shadow_blow_tables(d); 65.1628 #endif 65.1629 65.1630 - shadow_audit_p2m(d); 65.1631 shadow_unlock(d); 65.1632 } 65.1633 65.1634 @@ -3151,11 +2811,11 @@ static int shadow_log_dirty_op( 65.1635 SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n", 65.1636 (clean) ? "clean" : "peek", 65.1637 d->domain_id, 65.1638 - d->arch.shadow.fault_count, 65.1639 - d->arch.shadow.dirty_count); 65.1640 - 65.1641 - sc->stats.fault_count = d->arch.shadow.fault_count; 65.1642 - sc->stats.dirty_count = d->arch.shadow.dirty_count; 65.1643 + d->arch.paging.shadow.fault_count, 65.1644 + d->arch.paging.shadow.dirty_count); 65.1645 + 65.1646 + sc->stats.fault_count = d->arch.paging.shadow.fault_count; 65.1647 + sc->stats.dirty_count = d->arch.paging.shadow.dirty_count; 65.1648 65.1649 if ( clean ) 65.1650 { 65.1651 @@ -3164,22 +2824,22 @@ static int shadow_log_dirty_op( 65.1652 * but for now, we just unshadow everything except Xen. */ 65.1653 shadow_blow_tables(d); 65.1654 65.1655 - d->arch.shadow.fault_count = 0; 65.1656 - d->arch.shadow.dirty_count = 0; 65.1657 + d->arch.paging.shadow.fault_count = 0; 65.1658 + d->arch.paging.shadow.dirty_count = 0; 65.1659 } 65.1660 65.1661 if ( guest_handle_is_null(sc->dirty_bitmap) ) 65.1662 /* caller may have wanted just to clean the state or access stats. */ 65.1663 peek = 0; 65.1664 65.1665 - if ( (peek || clean) && (d->arch.shadow.dirty_bitmap == NULL) ) 65.1666 + if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) ) 65.1667 { 65.1668 rv = -EINVAL; /* perhaps should be ENOMEM? */ 65.1669 goto out; 65.1670 } 65.1671 65.1672 - if ( sc->pages > d->arch.shadow.dirty_bitmap_size ) 65.1673 - sc->pages = d->arch.shadow.dirty_bitmap_size; 65.1674 + if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size ) 65.1675 + sc->pages = d->arch.paging.shadow.dirty_bitmap_size; 65.1676 65.1677 #define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */ 65.1678 for ( i = 0; i < sc->pages; i += CHUNK ) 65.1679 @@ -3192,7 +2852,7 @@ static int shadow_log_dirty_op( 65.1680 { 65.1681 if ( copy_to_guest_offset( 65.1682 sc->dirty_bitmap, i/8, 65.1683 - (uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), bytes) ) 65.1684 + (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) ) 65.1685 { 65.1686 rv = -EFAULT; 65.1687 goto out; 65.1688 @@ -3200,7 +2860,7 @@ static int shadow_log_dirty_op( 65.1689 } 65.1690 65.1691 if ( clean ) 65.1692 - memset((uint8_t *)d->arch.shadow.dirty_bitmap + (i/8), 0, bytes); 65.1693 + memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes); 65.1694 } 65.1695 #undef CHUNK 65.1696 65.1697 @@ -3221,7 +2881,7 @@ void sh_mark_dirty(struct domain *d, mfn 65.1698 if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) ) 65.1699 return; 65.1700 65.1701 - ASSERT(d->arch.shadow.dirty_bitmap != NULL); 65.1702 + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); 65.1703 65.1704 /* We /really/ mean PFN here, even for non-translated guests. */ 65.1705 pfn = get_gpfn_from_mfn(mfn_x(gmfn)); 65.1706 @@ -3235,24 +2895,24 @@ void sh_mark_dirty(struct domain *d, mfn 65.1707 return; 65.1708 65.1709 /* N.B. Can use non-atomic TAS because protected by shadow_lock. */ 65.1710 - if ( likely(pfn < d->arch.shadow.dirty_bitmap_size) ) 65.1711 + if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) ) 65.1712 { 65.1713 - if ( !__test_and_set_bit(pfn, d->arch.shadow.dirty_bitmap) ) 65.1714 + if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) 65.1715 { 65.1716 SHADOW_DEBUG(LOGDIRTY, 65.1717 - "marked mfn %" SH_PRI_mfn " (pfn=%lx), dom %d\n", 65.1718 + "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n", 65.1719 mfn_x(gmfn), pfn, d->domain_id); 65.1720 - d->arch.shadow.dirty_count++; 65.1721 + d->arch.paging.shadow.dirty_count++; 65.1722 } 65.1723 } 65.1724 else 65.1725 { 65.1726 SHADOW_PRINTK("mark_dirty OOR! " 65.1727 - "mfn=%" SH_PRI_mfn " pfn=%lx max=%x (dom %d)\n" 65.1728 + "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n" 65.1729 "owner=%d c=%08x t=%" PRtype_info "\n", 65.1730 mfn_x(gmfn), 65.1731 pfn, 65.1732 - d->arch.shadow.dirty_bitmap_size, 65.1733 + d->arch.paging.shadow.dirty_bitmap_size, 65.1734 d->domain_id, 65.1735 (page_get_owner(mfn_to_page(gmfn)) 65.1736 ? page_get_owner(mfn_to_page(gmfn))->domain_id 65.1737 @@ -3292,7 +2952,7 @@ int shadow_domctl(struct domain *d, 65.1738 return rc; 65.1739 if ( is_hvm_domain(d) ) 65.1740 return -EINVAL; 65.1741 - if ( d->arch.shadow.mode & SHM2_enable ) 65.1742 + if ( d->arch.paging.mode & PG_SH_enable ) 65.1743 if ( (rc = shadow_test_disable(d)) != 0 ) 65.1744 return rc; 65.1745 return 0; 65.1746 @@ -3304,7 +2964,7 @@ int shadow_domctl(struct domain *d, 65.1747 return shadow_log_dirty_enable(d); 65.1748 65.1749 case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: 65.1750 - return shadow_enable(d, SHM2_refcounts|SHM2_translate); 65.1751 + return shadow_enable(d, PG_refcounts|PG_translate); 65.1752 65.1753 case XEN_DOMCTL_SHADOW_OP_CLEAN: 65.1754 case XEN_DOMCTL_SHADOW_OP_PEEK: 65.1755 @@ -3313,7 +2973,7 @@ int shadow_domctl(struct domain *d, 65.1756 case XEN_DOMCTL_SHADOW_OP_ENABLE: 65.1757 if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY ) 65.1758 return shadow_log_dirty_enable(d); 65.1759 - return shadow_enable(d, sc->mode << SHM2_shift); 65.1760 + return shadow_enable(d, sc->mode << PG_mode_shift); 65.1761 65.1762 case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: 65.1763 sc->mb = shadow_get_allocation(d); 65.1764 @@ -3390,7 +3050,7 @@ void shadow_audit_tables(struct vcpu *v) 65.1765 else 65.1766 { 65.1767 /* Audit only the current mode's tables */ 65.1768 - switch ( v->arch.shadow.mode->guest_levels ) 65.1769 + switch ( v->arch.paging.mode->guest_levels ) 65.1770 { 65.1771 case 2: mask = (SHF_L1_32|SHF_FL1_32|SHF_L2_32); break; 65.1772 case 3: mask = (SHF_L1_PAE|SHF_FL1_PAE|SHF_L2_PAE 65.1773 @@ -3406,199 +3066,6 @@ void shadow_audit_tables(struct vcpu *v) 65.1774 65.1775 #endif /* Shadow audit */ 65.1776 65.1777 - 65.1778 -/**************************************************************************/ 65.1779 -/* Auditing p2m tables */ 65.1780 - 65.1781 -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M 65.1782 - 65.1783 -void shadow_audit_p2m(struct domain *d) 65.1784 -{ 65.1785 - struct list_head *entry; 65.1786 - struct page_info *page; 65.1787 - struct domain *od; 65.1788 - unsigned long mfn, gfn, m2pfn, lp2mfn = 0; 65.1789 - mfn_t p2mfn; 65.1790 - unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0; 65.1791 - int test_linear; 65.1792 - 65.1793 - if ( !(SHADOW_AUDIT_ENABLE) || !shadow_mode_translate(d) ) 65.1794 - return; 65.1795 - 65.1796 - //SHADOW_PRINTK("p2m audit starts\n"); 65.1797 - 65.1798 - test_linear = ( (d == current->domain) 65.1799 - && !pagetable_is_null(current->arch.monitor_table) ); 65.1800 - if ( test_linear ) 65.1801 - local_flush_tlb(); 65.1802 - 65.1803 - /* Audit part one: walk the domain's page allocation list, checking 65.1804 - * the m2p entries. */ 65.1805 - for ( entry = d->page_list.next; 65.1806 - entry != &d->page_list; 65.1807 - entry = entry->next ) 65.1808 - { 65.1809 - page = list_entry(entry, struct page_info, list); 65.1810 - mfn = mfn_x(page_to_mfn(page)); 65.1811 - 65.1812 - // SHADOW_PRINTK("auditing guest page, mfn=%#lx\n", mfn); 65.1813 - 65.1814 - od = page_get_owner(page); 65.1815 - 65.1816 - if ( od != d ) 65.1817 - { 65.1818 - SHADOW_PRINTK("wrong owner %#lx -> %p(%u) != %p(%u)\n", 65.1819 - mfn, od, (od?od->domain_id:-1), d, d->domain_id); 65.1820 - continue; 65.1821 - } 65.1822 - 65.1823 - gfn = get_gpfn_from_mfn(mfn); 65.1824 - if ( gfn == INVALID_M2P_ENTRY ) 65.1825 - { 65.1826 - orphans_i++; 65.1827 - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has invalid gfn\n", 65.1828 - // mfn); 65.1829 - continue; 65.1830 - } 65.1831 - 65.1832 - if ( gfn == 0x55555555 ) 65.1833 - { 65.1834 - orphans_d++; 65.1835 - //SHADOW_PRINTK("orphaned guest page: mfn=%#lx has debug gfn\n", 65.1836 - // mfn); 65.1837 - continue; 65.1838 - } 65.1839 - 65.1840 - p2mfn = sh_gfn_to_mfn_foreign(d, gfn); 65.1841 - if ( mfn_x(p2mfn) != mfn ) 65.1842 - { 65.1843 - mpbad++; 65.1844 - SHADOW_PRINTK("map mismatch mfn %#lx -> gfn %#lx -> mfn %#lx" 65.1845 - " (-> gfn %#lx)\n", 65.1846 - mfn, gfn, mfn_x(p2mfn), 65.1847 - (mfn_valid(p2mfn) 65.1848 - ? get_gpfn_from_mfn(mfn_x(p2mfn)) 65.1849 - : -1u)); 65.1850 - /* This m2p entry is stale: the domain has another frame in 65.1851 - * this physical slot. No great disaster, but for neatness, 65.1852 - * blow away the m2p entry. */ 65.1853 - set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 65.1854 - } 65.1855 - 65.1856 - if ( test_linear && (gfn <= d->arch.max_mapped_pfn) ) 65.1857 - { 65.1858 - lp2mfn = gfn_to_mfn_current(gfn); 65.1859 - if ( mfn_x(lp2mfn) != mfn_x(p2mfn) ) 65.1860 - { 65.1861 - SHADOW_PRINTK("linear mismatch gfn %#lx -> mfn %#lx " 65.1862 - "(!= mfn %#lx)\n", gfn, 65.1863 - mfn_x(lp2mfn), mfn_x(p2mfn)); 65.1864 - } 65.1865 - } 65.1866 - 65.1867 - // SHADOW_PRINTK("OK: mfn=%#lx, gfn=%#lx, p2mfn=%#lx, lp2mfn=%#lx\n", 65.1868 - // mfn, gfn, p2mfn, lp2mfn); 65.1869 - } 65.1870 - 65.1871 - /* Audit part two: walk the domain's p2m table, checking the entries. */ 65.1872 - if ( pagetable_get_pfn(d->arch.phys_table) != 0 ) 65.1873 - { 65.1874 - l2_pgentry_t *l2e; 65.1875 - l1_pgentry_t *l1e; 65.1876 - int i1, i2; 65.1877 - 65.1878 -#if CONFIG_PAGING_LEVELS == 4 65.1879 - l4_pgentry_t *l4e; 65.1880 - l3_pgentry_t *l3e; 65.1881 - int i3, i4; 65.1882 - l4e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 65.1883 -#elif CONFIG_PAGING_LEVELS == 3 65.1884 - l3_pgentry_t *l3e; 65.1885 - int i3; 65.1886 - l3e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 65.1887 -#else /* CONFIG_PAGING_LEVELS == 2 */ 65.1888 - l2e = sh_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 65.1889 -#endif 65.1890 - 65.1891 - gfn = 0; 65.1892 -#if CONFIG_PAGING_LEVELS >= 3 65.1893 -#if CONFIG_PAGING_LEVELS >= 4 65.1894 - for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) 65.1895 - { 65.1896 - if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) 65.1897 - { 65.1898 - gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); 65.1899 - continue; 65.1900 - } 65.1901 - l3e = sh_map_domain_page(_mfn(l4e_get_pfn(l4e[i4]))); 65.1902 -#endif /* now at levels 3 or 4... */ 65.1903 - for ( i3 = 0; 65.1904 - i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8); 65.1905 - i3++ ) 65.1906 - { 65.1907 - if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) 65.1908 - { 65.1909 - gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); 65.1910 - continue; 65.1911 - } 65.1912 - l2e = sh_map_domain_page(_mfn(l3e_get_pfn(l3e[i3]))); 65.1913 -#endif /* all levels... */ 65.1914 - for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) 65.1915 - { 65.1916 - if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) 65.1917 - { 65.1918 - gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); 65.1919 - continue; 65.1920 - } 65.1921 - l1e = sh_map_domain_page(_mfn(l2e_get_pfn(l2e[i2]))); 65.1922 - 65.1923 - for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) 65.1924 - { 65.1925 - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) 65.1926 - continue; 65.1927 - mfn = l1e_get_pfn(l1e[i1]); 65.1928 - ASSERT(mfn_valid(_mfn(mfn))); 65.1929 - m2pfn = get_gpfn_from_mfn(mfn); 65.1930 - if ( m2pfn != gfn ) 65.1931 - { 65.1932 - pmbad++; 65.1933 - SHADOW_PRINTK("mismatch: gfn %#lx -> mfn %#lx" 65.1934 - " -> gfn %#lx\n", gfn, mfn, m2pfn); 65.1935 - BUG(); 65.1936 - } 65.1937 - } 65.1938 - sh_unmap_domain_page(l1e); 65.1939 - } 65.1940 -#if CONFIG_PAGING_LEVELS >= 3 65.1941 - sh_unmap_domain_page(l2e); 65.1942 - } 65.1943 -#if CONFIG_PAGING_LEVELS >= 4 65.1944 - sh_unmap_domain_page(l3e); 65.1945 - } 65.1946 -#endif 65.1947 -#endif 65.1948 - 65.1949 -#if CONFIG_PAGING_LEVELS == 4 65.1950 - sh_unmap_domain_page(l4e); 65.1951 -#elif CONFIG_PAGING_LEVELS == 3 65.1952 - sh_unmap_domain_page(l3e); 65.1953 -#else /* CONFIG_PAGING_LEVELS == 2 */ 65.1954 - sh_unmap_domain_page(l2e); 65.1955 -#endif 65.1956 - 65.1957 - } 65.1958 - 65.1959 - //SHADOW_PRINTK("p2m audit complete\n"); 65.1960 - //if ( orphans_i | orphans_d | mpbad | pmbad ) 65.1961 - // SHADOW_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n", 65.1962 - // orphans_i + orphans_d, orphans_i, orphans_d, 65.1963 - if ( mpbad | pmbad ) 65.1964 - SHADOW_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n", 65.1965 - pmbad, mpbad); 65.1966 -} 65.1967 - 65.1968 -#endif /* p2m audit */ 65.1969 - 65.1970 /* 65.1971 * Local variables: 65.1972 * mode: C
66.1 --- a/xen/arch/x86/mm/shadow/multi.c Thu Feb 15 13:13:36 2007 -0700 66.2 +++ b/xen/arch/x86/mm/shadow/multi.c Thu Feb 15 14:09:39 2007 -0700 66.3 @@ -237,7 +237,8 @@ guest_walk_tables(struct vcpu *v, unsign 66.4 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ 66.5 /* Get l4e from the top level table */ 66.6 gw->l4mfn = pagetable_get_mfn(v->arch.guest_table); 66.7 - gw->l4e = (guest_l4e_t *)v->arch.guest_vtable + guest_l4_table_offset(va); 66.8 + gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable 66.9 + + guest_l4_table_offset(va); 66.10 /* Walk down to the l3e */ 66.11 if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0; 66.12 gw->l3mfn = vcpu_gfn_to_mfn(v, guest_l4e_get_gfn(*gw->l4e)); 66.13 @@ -248,9 +249,8 @@ guest_walk_tables(struct vcpu *v, unsign 66.14 gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn)) 66.15 + guest_l3_table_offset(va); 66.16 #else /* PAE only... */ 66.17 - /* Get l3e from the top level table */ 66.18 - gw->l3mfn = pagetable_get_mfn(v->arch.guest_table); 66.19 - gw->l3e = (guest_l3e_t *)v->arch.guest_vtable + guest_l3_table_offset(va); 66.20 + /* Get l3e from the cache of the guest's top level table */ 66.21 + gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)]; 66.22 #endif /* PAE or 64... */ 66.23 /* Walk down to the l2e */ 66.24 if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0; 66.25 @@ -264,7 +264,8 @@ guest_walk_tables(struct vcpu *v, unsign 66.26 #else /* 32-bit only... */ 66.27 /* Get l2e from the top level table */ 66.28 gw->l2mfn = pagetable_get_mfn(v->arch.guest_table); 66.29 - gw->l2e = (guest_l2e_t *)v->arch.guest_vtable + guest_l2_table_offset(va); 66.30 + gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable 66.31 + + guest_l2_table_offset(va); 66.32 #endif /* All levels... */ 66.33 66.34 if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0; 66.35 @@ -353,21 +354,21 @@ static inline void print_gw(walk_t *gw) 66.36 SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va); 66.37 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */ 66.38 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */ 66.39 - SHADOW_PRINTK(" l4mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l4mfn)); 66.40 + SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn)); 66.41 SHADOW_PRINTK(" l4e=%p\n", gw->l4e); 66.42 if ( gw->l4e ) 66.43 SHADOW_PRINTK(" *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4); 66.44 + SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn)); 66.45 #endif /* PAE or 64... */ 66.46 - SHADOW_PRINTK(" l3mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l3mfn)); 66.47 SHADOW_PRINTK(" l3e=%p\n", gw->l3e); 66.48 if ( gw->l3e ) 66.49 SHADOW_PRINTK(" *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3); 66.50 #endif /* All levels... */ 66.51 - SHADOW_PRINTK(" l2mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l2mfn)); 66.52 + SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn)); 66.53 SHADOW_PRINTK(" l2e=%p\n", gw->l2e); 66.54 if ( gw->l2e ) 66.55 SHADOW_PRINTK(" *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2); 66.56 - SHADOW_PRINTK(" l1mfn=%" SH_PRI_mfn "\n", mfn_x(gw->l1mfn)); 66.57 + SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn)); 66.58 SHADOW_PRINTK(" l1e=%p\n", gw->l1e); 66.59 if ( gw->l1e ) 66.60 SHADOW_PRINTK(" *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1); 66.61 @@ -1572,7 +1573,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 66.62 #if GUEST_PAGING_LEVELS == 4 66.63 #if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL) 66.64 if ( shadow_type == SH_type_l4_64_shadow && 66.65 - unlikely(v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) 66.66 + unlikely(v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) ) 66.67 { 66.68 /* We're shadowing a new l4, but we've been assuming the guest uses 66.69 * only one l4 per vcpu and context switches using an l4 entry. 66.70 @@ -1584,7 +1585,7 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 66.71 struct shadow_page_info *sp; 66.72 struct vcpu *v2; 66.73 int l4count = 0, vcpus = 0; 66.74 - list_for_each(l, &v->domain->arch.shadow.pinned_shadows) 66.75 + list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows) 66.76 { 66.77 sp = list_entry(l, struct shadow_page_info, list); 66.78 if ( sp->type == SH_type_l4_64_shadow ) 66.79 @@ -1595,13 +1596,13 @@ sh_make_shadow(struct vcpu *v, mfn_t gmf 66.80 if ( l4count > 2 * vcpus ) 66.81 { 66.82 /* Unpin all the pinned l3 tables, and don't pin any more. */ 66.83 - list_for_each_safe(l, t, &v->domain->arch.shadow.pinned_shadows) 66.84 + list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows) 66.85 { 66.86 sp = list_entry(l, struct shadow_page_info, list); 66.87 if ( sp->type == SH_type_l3_64_shadow ) 66.88 sh_unpin(v, shadow_page_to_mfn(sp)); 66.89 } 66.90 - v->domain->arch.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 66.91 + v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL; 66.92 } 66.93 } 66.94 #endif 66.95 @@ -1641,7 +1642,7 @@ make_fl1_shadow(struct vcpu *v, gfn_t gf 66.96 mfn_t smfn = shadow_alloc(v->domain, SH_type_fl1_shadow, 66.97 (unsigned long) gfn_x(gfn)); 66.98 66.99 - SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" SH_PRI_mfn "\n", 66.100 + SHADOW_DEBUG(MAKE_SHADOW, "(%" SH_PRI_gfn ")=>%" PRI_mfn "\n", 66.101 gfn_x(gfn), mfn_x(smfn)); 66.102 66.103 set_fl1_shadow_status(v, gfn, smfn); 66.104 @@ -1851,7 +1852,7 @@ static shadow_l2e_t * shadow_get_and_cre 66.105 #elif GUEST_PAGING_LEVELS == 3 /* PAE... */ 66.106 /* We never demand-shadow PAE l3es: they are only created in 66.107 * sh_update_cr3(). Check if the relevant sl3e is present. */ 66.108 - shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.shadow.l3table) 66.109 + shadow_l3e_t *sl3e = ((shadow_l3e_t *)&v->arch.paging.shadow.l3table) 66.110 + shadow_l3_linear_offset(gw->va); 66.111 if ( !(shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT) ) 66.112 return NULL; 66.113 @@ -2358,7 +2359,7 @@ static int validate_gl1e(struct vcpu *v, 66.114 gfn = guest_l1e_get_gfn(*new_gl1e); 66.115 gmfn = vcpu_gfn_to_mfn(v, gfn); 66.116 66.117 - mmio = (is_hvm_vcpu(v) && shadow_vcpu_mode_translate(v) && !mfn_valid(gmfn)); 66.118 + mmio = (is_hvm_vcpu(v) && paging_vcpu_mode_translate(v) && !mfn_valid(gmfn)); 66.119 l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 66.120 ft_prefetch, mmio); 66.121 66.122 @@ -2506,7 +2507,7 @@ sh_map_and_validate_gl1e(struct vcpu *v, 66.123 static inline void check_for_early_unshadow(struct vcpu *v, mfn_t gmfn) 66.124 { 66.125 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW 66.126 - if ( v->arch.shadow.last_emulated_mfn == mfn_x(gmfn) && 66.127 + if ( v->arch.paging.shadow.last_emulated_mfn == mfn_x(gmfn) && 66.128 sh_mfn_is_a_page_table(gmfn) ) 66.129 { 66.130 u32 flags = mfn_to_page(gmfn)->shadow_flags; 66.131 @@ -2516,7 +2517,7 @@ static inline void check_for_early_unsha 66.132 sh_remove_shadows(v, gmfn, 1, 0 /* Fast, can fail to unshadow */ ); 66.133 } 66.134 } 66.135 - v->arch.shadow.last_emulated_mfn = mfn_x(gmfn); 66.136 + v->arch.paging.shadow.last_emulated_mfn = mfn_x(gmfn); 66.137 #endif 66.138 } 66.139 66.140 @@ -2524,7 +2525,7 @@ static inline void check_for_early_unsha 66.141 static inline void reset_early_unshadow(struct vcpu *v) 66.142 { 66.143 #if SHADOW_OPTIMIZATIONS & SHOPT_EARLY_UNSHADOW 66.144 - v->arch.shadow.last_emulated_mfn = INVALID_MFN; 66.145 + v->arch.paging.shadow.last_emulated_mfn = INVALID_MFN; 66.146 #endif 66.147 } 66.148 66.149 @@ -2589,7 +2590,7 @@ static void sh_prefetch(struct vcpu *v, 66.150 gfn = guest_l1e_get_gfn(gl1e); 66.151 gmfn = vcpu_gfn_to_mfn(v, gfn); 66.152 mmio = ( is_hvm_vcpu(v) 66.153 - && shadow_vcpu_mode_translate(v) 66.154 + && paging_vcpu_mode_translate(v) 66.155 && mmio_space(gfn_to_paddr(gfn)) ); 66.156 66.157 /* Propagate the entry. Safe to use a pointer to our local 66.158 @@ -2631,6 +2632,7 @@ static int sh_page_fault(struct vcpu *v, 66.159 SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", 66.160 v->domain->domain_id, v->vcpu_id, va, regs->error_code); 66.161 66.162 + perfc_incrc(shadow_fault); 66.163 // 66.164 // XXX: Need to think about eventually mapping superpages directly in the 66.165 // shadow (when possible), as opposed to splintering them into a 66.166 @@ -2651,7 +2653,7 @@ static int sh_page_fault(struct vcpu *v, 66.167 if ( sh_l1e_is_gnp(sl1e) ) 66.168 { 66.169 if ( likely(!is_hvm_domain(d) || 66.170 - shadow_vcpu_mode_translate(v)) ) 66.171 + paging_vcpu_mode_translate(v)) ) 66.172 { 66.173 /* Not-present in a guest PT: pass to the guest as 66.174 * a not-present fault (by flipping two bits). */ 66.175 @@ -2701,7 +2703,7 @@ static int sh_page_fault(struct vcpu *v, 66.176 if ( unlikely(shadow_locked_by_me(d)) ) 66.177 { 66.178 SHADOW_ERROR("Recursive shadow fault: lock was taken by %s\n", 66.179 - d->arch.shadow.locker_function); 66.180 + d->arch.paging.shadow.locker_function); 66.181 return 0; 66.182 } 66.183 66.184 @@ -2726,7 +2728,7 @@ static int sh_page_fault(struct vcpu *v, 66.185 // 66.186 if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) ) 66.187 { 66.188 - if ( is_hvm_domain(d) && !shadow_vcpu_mode_translate(v) ) 66.189 + if ( is_hvm_domain(d) && !paging_vcpu_mode_translate(v) ) 66.190 { 66.191 /* Not present in p2m map, means this is mmio */ 66.192 gpa = va; 66.193 @@ -2784,13 +2786,13 @@ static int sh_page_fault(struct vcpu *v, 66.194 gfn = guest_l1e_get_gfn(gw.eff_l1e); 66.195 gmfn = vcpu_gfn_to_mfn(v, gfn); 66.196 mmio = (is_hvm_domain(d) 66.197 - && shadow_vcpu_mode_translate(v) 66.198 + && paging_vcpu_mode_translate(v) 66.199 && mmio_space(gfn_to_paddr(gfn))); 66.200 66.201 if ( !mmio && !mfn_valid(gmfn) ) 66.202 { 66.203 perfc_incrc(shadow_fault_bail_bad_gfn); 66.204 - SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"SH_PRI_mfn"\n", 66.205 + SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 66.206 gfn_x(gfn), mfn_x(gmfn)); 66.207 goto not_a_shadow_fault; 66.208 } 66.209 @@ -2848,7 +2850,7 @@ static int sh_page_fault(struct vcpu *v, 66.210 } 66.211 66.212 perfc_incrc(shadow_fault_fixed); 66.213 - d->arch.shadow.fault_count++; 66.214 + d->arch.paging.shadow.fault_count++; 66.215 reset_early_unshadow(v); 66.216 66.217 done: 66.218 @@ -2949,7 +2951,7 @@ sh_invlpg(struct vcpu *v, unsigned long 66.219 return 0; 66.220 } 66.221 #elif SHADOW_PAGING_LEVELS == 3 66.222 - if ( !(l3e_get_flags(v->arch.shadow.l3table[shadow_l3_linear_offset(va)]) 66.223 + if ( !(l3e_get_flags(v->arch.paging.shadow.l3table[shadow_l3_linear_offset(va)]) 66.224 & _PAGE_PRESENT) ) 66.225 // no need to flush anything if there's no SL2... 66.226 return 0; 66.227 @@ -3120,7 +3122,7 @@ sh_update_linear_entries(struct vcpu *v) 66.228 } 66.229 66.230 /* Shadow l3 tables are made up by sh_update_cr3 */ 66.231 - sl3e = v->arch.shadow.l3table; 66.232 + sl3e = v->arch.paging.shadow.l3table; 66.233 66.234 for ( i = 0; i < SHADOW_L3_PAGETABLE_ENTRIES; i++ ) 66.235 { 66.236 @@ -3161,15 +3163,14 @@ sh_update_linear_entries(struct vcpu *v) 66.237 #if GUEST_PAGING_LEVELS == 2 66.238 /* Shadow l3 tables were built by sh_update_cr3 */ 66.239 if ( shadow_mode_external(d) ) 66.240 - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; 66.241 + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; 66.242 else 66.243 BUG(); /* PV 2-on-3 is not supported yet */ 66.244 66.245 #else /* GUEST_PAGING_LEVELS == 3 */ 66.246 66.247 - shadow_l3e = (shadow_l3e_t *)&v->arch.shadow.l3table; 66.248 - /* Always safe to use guest_vtable, because it's globally mapped */ 66.249 - guest_l3e = v->arch.guest_vtable; 66.250 + shadow_l3e = (shadow_l3e_t *)&v->arch.paging.shadow.l3table; 66.251 + guest_l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e; 66.252 66.253 #endif /* GUEST_PAGING_LEVELS */ 66.254 66.255 @@ -3267,39 +3268,37 @@ sh_update_linear_entries(struct vcpu *v) 66.256 } 66.257 66.258 66.259 -/* Removes vcpu->arch.guest_vtable and vcpu->arch.shadow_table[]. 66.260 +/* Removes vcpu->arch.paging.shadow.guest_vtable and vcpu->arch.shadow_table[]. 66.261 * Does all appropriate management/bookkeeping/refcounting/etc... 66.262 */ 66.263 static void 66.264 sh_detach_old_tables(struct vcpu *v) 66.265 { 66.266 - struct domain *d = v->domain; 66.267 mfn_t smfn; 66.268 int i = 0; 66.269 66.270 //// 66.271 - //// vcpu->arch.guest_vtable 66.272 + //// vcpu->arch.paging.shadow.guest_vtable 66.273 //// 66.274 - if ( v->arch.guest_vtable ) 66.275 + 66.276 +#if GUEST_PAGING_LEVELS == 3 66.277 + /* PAE guests don't have a mapping of the guest top-level table */ 66.278 + ASSERT(v->arch.paging.shadow.guest_vtable == NULL); 66.279 +#else 66.280 + if ( v->arch.paging.shadow.guest_vtable ) 66.281 { 66.282 -#if GUEST_PAGING_LEVELS == 4 66.283 + struct domain *d = v->domain; 66.284 if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 66.285 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.286 -#elif GUEST_PAGING_LEVELS == 3 66.287 - if ( 1 || shadow_mode_external(d) || shadow_mode_translate(d) ) 66.288 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.289 -#elif GUEST_PAGING_LEVELS == 2 66.290 - if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 66.291 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.292 + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); 66.293 + v->arch.paging.shadow.guest_vtable = NULL; 66.294 + } 66.295 #endif 66.296 - v->arch.guest_vtable = NULL; 66.297 - } 66.298 + 66.299 66.300 //// 66.301 //// vcpu->arch.shadow_table[] 66.302 //// 66.303 66.304 - 66.305 #if GUEST_PAGING_LEVELS == 3 66.306 /* PAE guests have four shadow_table entries */ 66.307 for ( i = 0 ; i < 4 ; i++ ) 66.308 @@ -3370,7 +3369,7 @@ sh_set_toplevel_shadow(struct vcpu *v, 66.309 66.310 install_new_entry: 66.311 /* Done. Install it */ 66.312 - SHADOW_PRINTK("%u/%u [%u] gmfn %#"SH_PRI_mfn" smfn %#"SH_PRI_mfn"\n", 66.313 + SHADOW_PRINTK("%u/%u [%u] gmfn %#"PRI_mfn" smfn %#"PRI_mfn"\n", 66.314 GUEST_PAGING_LEVELS, SHADOW_PAGING_LEVELS, slot, 66.315 mfn_x(gmfn), mfn_x(pagetable_get_mfn(new_entry))); 66.316 v->arch.shadow_table[slot] = new_entry; 66.317 @@ -3397,7 +3396,9 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.318 struct domain *d = v->domain; 66.319 mfn_t gmfn; 66.320 #if GUEST_PAGING_LEVELS == 3 66.321 + guest_l3e_t *gl3e; 66.322 u32 guest_idx=0; 66.323 + int i; 66.324 #endif 66.325 66.326 /* Don't do anything on an uninitialised vcpu */ 66.327 @@ -3410,7 +3411,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.328 if ( do_locking ) shadow_lock(v->domain); 66.329 66.330 ASSERT(shadow_locked_by_me(v->domain)); 66.331 - ASSERT(v->arch.shadow.mode); 66.332 + ASSERT(v->arch.paging.mode); 66.333 66.334 //// 66.335 //// vcpu->arch.guest_table is already set 66.336 @@ -3425,7 +3426,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.337 ASSERT(shadow_mode_external(d)); 66.338 66.339 // Is paging enabled on this vcpu? 66.340 - if ( shadow_vcpu_mode_translate(v) ) 66.341 + if ( paging_vcpu_mode_translate(v) ) 66.342 { 66.343 gfn = _gfn(paddr_to_pfn(hvm_get_guest_ctrl_reg(v, 3))); 66.344 gmfn = vcpu_gfn_to_mfn(v, gfn); 66.345 @@ -3456,55 +3457,54 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.346 66.347 66.348 //// 66.349 - //// vcpu->arch.guest_vtable 66.350 + //// vcpu->arch.paging.shadow.guest_vtable 66.351 //// 66.352 #if GUEST_PAGING_LEVELS == 4 66.353 if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 66.354 { 66.355 - if ( v->arch.guest_vtable ) 66.356 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.357 - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 66.358 + if ( v->arch.paging.shadow.guest_vtable ) 66.359 + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); 66.360 + v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn); 66.361 } 66.362 else 66.363 - v->arch.guest_vtable = __linear_l4_table; 66.364 + v->arch.paging.shadow.guest_vtable = __linear_l4_table; 66.365 #elif GUEST_PAGING_LEVELS == 3 66.366 - if ( v->arch.guest_vtable ) 66.367 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.368 - if ( shadow_mode_external(d) ) 66.369 - { 66.370 - if ( shadow_vcpu_mode_translate(v) ) 66.371 - /* Paging enabled: find where in the page the l3 table is */ 66.372 - guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); 66.373 - else 66.374 - /* Paging disabled: l3 is at the start of a page (in the p2m) */ 66.375 - guest_idx = 0; 66.376 - 66.377 - // Ignore the low 2 bits of guest_idx -- they are really just 66.378 - // cache control. 66.379 - guest_idx &= ~3; 66.380 - 66.381 - // XXX - why does this need a global map? 66.382 - v->arch.guest_vtable = 66.383 - (guest_l3e_t *)sh_map_domain_page_global(gmfn) + guest_idx; 66.384 - } 66.385 + /* On PAE guests we don't use a mapping of the guest's own top-level 66.386 + * table. We cache the current state of that table and shadow that, 66.387 + * until the next CR3 write makes us refresh our cache. */ 66.388 + ASSERT(v->arch.paging.shadow.guest_vtable == NULL); 66.389 + 66.390 + if ( shadow_mode_external(d) && paging_vcpu_mode_translate(v) ) 66.391 + /* Paging enabled: find where in the page the l3 table is */ 66.392 + guest_idx = guest_index((void *)hvm_get_guest_ctrl_reg(v, 3)); 66.393 else 66.394 - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 66.395 + /* Paging disabled or PV: l3 is at the start of a page */ 66.396 + guest_idx = 0; 66.397 + 66.398 + // Ignore the low 2 bits of guest_idx -- they are really just 66.399 + // cache control. 66.400 + guest_idx &= ~3; 66.401 + 66.402 + gl3e = ((guest_l3e_t *)sh_map_domain_page(gmfn)) + guest_idx; 66.403 + for ( i = 0; i < 4 ; i++ ) 66.404 + v->arch.paging.shadow.gl3e[i] = gl3e[i]; 66.405 + sh_unmap_domain_page(gl3e); 66.406 #elif GUEST_PAGING_LEVELS == 2 66.407 if ( shadow_mode_external(d) || shadow_mode_translate(d) ) 66.408 { 66.409 - if ( v->arch.guest_vtable ) 66.410 - sh_unmap_domain_page_global(v->arch.guest_vtable); 66.411 - v->arch.guest_vtable = sh_map_domain_page_global(gmfn); 66.412 + if ( v->arch.paging.shadow.guest_vtable ) 66.413 + sh_unmap_domain_page_global(v->arch.paging.shadow.guest_vtable); 66.414 + v->arch.paging.shadow.guest_vtable = sh_map_domain_page_global(gmfn); 66.415 } 66.416 else 66.417 - v->arch.guest_vtable = __linear_l2_table; 66.418 + v->arch.paging.shadow.guest_vtable = __linear_l2_table; 66.419 #else 66.420 #error this should never happen 66.421 #endif 66.422 66.423 #if 0 66.424 - printk("%s %s %d gmfn=%05lx guest_vtable=%p\n", 66.425 - __func__, __FILE__, __LINE__, gmfn, v->arch.guest_vtable); 66.426 + printk("%s %s %d gmfn=%05lx shadow.guest_vtable=%p\n", 66.427 + __func__, __FILE__, __LINE__, gmfn, v->arch.paging.shadow.guest_vtable); 66.428 #endif 66.429 66.430 //// 66.431 @@ -3522,10 +3522,10 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.432 /* PAE guests have four shadow_table entries, based on the 66.433 * current values of the guest's four l3es. */ 66.434 { 66.435 - int i, flush = 0; 66.436 + int flush = 0; 66.437 gfn_t gl2gfn; 66.438 mfn_t gl2mfn; 66.439 - guest_l3e_t *gl3e = (guest_l3e_t*)v->arch.guest_vtable; 66.440 + guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e; 66.441 /* First, make all four entries read-only. */ 66.442 for ( i = 0; i < 4; i++ ) 66.443 { 66.444 @@ -3566,7 +3566,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.445 #endif 66.446 66.447 /// 66.448 - /// v->arch.shadow.l3table 66.449 + /// v->arch.paging.shadow.l3table 66.450 /// 66.451 #if SHADOW_PAGING_LEVELS == 3 66.452 { 66.453 @@ -3581,7 +3581,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.454 /* 3-on-3: make a PAE l3 that points at the four l2 pages */ 66.455 smfn = pagetable_get_mfn(v->arch.shadow_table[i]); 66.456 #endif 66.457 - v->arch.shadow.l3table[i] = 66.458 + v->arch.paging.shadow.l3table[i] = 66.459 (mfn_x(smfn) == 0) 66.460 ? shadow_l3e_empty() 66.461 : shadow_l3e_from_mfn(smfn, _PAGE_PRESENT); 66.462 @@ -3605,8 +3605,8 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.463 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated. 66.464 * Don't use make_cr3 because (a) we know it's below 4GB, and 66.465 * (b) it's not necessarily page-aligned, and make_cr3 takes a pfn */ 66.466 - ASSERT(virt_to_maddr(&v->arch.shadow.l3table) <= 0xffffffe0ULL); 66.467 - v->arch.cr3 = virt_to_maddr(&v->arch.shadow.l3table); 66.468 + ASSERT(virt_to_maddr(&v->arch.paging.shadow.l3table) <= 0xffffffe0ULL); 66.469 + v->arch.cr3 = virt_to_maddr(&v->arch.paging.shadow.l3table); 66.470 #else 66.471 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ 66.472 make_cr3(v, pagetable_get_pfn(v->arch.shadow_table[0])); 66.473 @@ -3622,7 +3622,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 66.474 ASSERT(is_hvm_domain(d)); 66.475 #if SHADOW_PAGING_LEVELS == 3 66.476 /* 2-on-3 or 3-on-3: Use the PAE shadow l3 table we just fabricated */ 66.477 - hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.shadow.l3table)); 66.478 + hvm_update_guest_cr3(v, virt_to_maddr(&v->arch.paging.shadow.l3table)); 66.479 #else 66.480 /* 2-on-2 or 4-on-4: Just use the shadow top-level directly */ 66.481 hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.shadow_table[0])); 66.482 @@ -3665,7 +3665,7 @@ static int sh_guess_wrmap(struct vcpu *v 66.483 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) 66.484 return 0; 66.485 #elif SHADOW_PAGING_LEVELS == 3 66.486 - sl3p = ((shadow_l3e_t *) v->arch.shadow.l3table) 66.487 + sl3p = ((shadow_l3e_t *) v->arch.paging.shadow.l3table) 66.488 + shadow_l3_linear_offset(vaddr); 66.489 if ( !(shadow_l3e_get_flags(*sl3p) & _PAGE_PRESENT) ) 66.490 return 0; 66.491 @@ -3709,7 +3709,7 @@ int sh_rm_write_access_from_l1(struct vc 66.492 (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn); 66.493 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 66.494 /* Remember the last shadow that we shot a writeable mapping in */ 66.495 - v->arch.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); 66.496 + v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); 66.497 #endif 66.498 if ( (mfn_to_page(readonly_mfn)->u.inuse.type_info 66.499 & PGT_count_mask) == 0 ) 66.500 @@ -4050,8 +4050,8 @@ sh_x86_emulate_cmpxchg8b(struct vcpu *v, 66.501 66.502 #define AUDIT_FAIL(_level, _fmt, _a...) do { \ 66.503 printk("Shadow %u-on-%u audit failed at level %i, index %i\n" \ 66.504 - "gl" #_level "mfn = %" SH_PRI_mfn \ 66.505 - " sl" #_level "mfn = %" SH_PRI_mfn \ 66.506 + "gl" #_level "mfn = %" PRI_mfn \ 66.507 + " sl" #_level "mfn = %" PRI_mfn \ 66.508 " &gl" #_level "e = %p &sl" #_level "e = %p" \ 66.509 " gl" #_level "e = %" SH_PRI_gpte \ 66.510 " sl" #_level "e = %" SH_PRI_pte "\nError: " _fmt "\n", \ 66.511 @@ -4105,7 +4105,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g 66.512 != PGT_writable_page ) 66.513 return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ 66.514 else 66.515 - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); 66.516 + return gfn_to_mfn(v->domain, gfn_x(gfn)); 66.517 } 66.518 66.519 66.520 @@ -4156,7 +4156,7 @@ int sh_audit_l1_table(struct vcpu *v, mf 66.521 gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn); 66.522 if ( mfn_x(gmfn) != mfn_x(mfn) ) 66.523 AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn 66.524 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 66.525 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 66.526 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 66.527 } 66.528 } 66.529 @@ -4219,8 +4219,8 @@ int sh_audit_l2_table(struct vcpu *v, mf 66.530 SH_type_l1_shadow); 66.531 if ( mfn_x(gmfn) != mfn_x(mfn) ) 66.532 AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn 66.533 - " (--> %" SH_PRI_mfn ")" 66.534 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 66.535 + " (--> %" PRI_mfn ")" 66.536 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 66.537 gfn_x(gfn), 66.538 (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0 66.539 : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)), 66.540 @@ -4262,7 +4262,7 @@ int sh_audit_l3_table(struct vcpu *v, mf 66.541 : SH_type_l2_shadow); 66.542 if ( mfn_x(gmfn) != mfn_x(mfn) ) 66.543 AUDIT_FAIL(3, "bad translation: gfn %" SH_PRI_gfn 66.544 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 66.545 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 66.546 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 66.547 } 66.548 }); 66.549 @@ -4297,7 +4297,7 @@ int sh_audit_l4_table(struct vcpu *v, mf 66.550 SH_type_l3_shadow); 66.551 if ( mfn_x(gmfn) != mfn_x(mfn) ) 66.552 AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn 66.553 - " --> %" SH_PRI_mfn " != mfn %" SH_PRI_mfn, 66.554 + " --> %" PRI_mfn " != mfn %" PRI_mfn, 66.555 gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); 66.556 } 66.557 }); 66.558 @@ -4314,30 +4314,29 @@ int sh_audit_l4_table(struct vcpu *v, mf 66.559 /**************************************************************************/ 66.560 /* Entry points into this mode of the shadow code. 66.561 * This will all be mangled by the preprocessor to uniquify everything. */ 66.562 -struct shadow_paging_mode sh_paging_mode = { 66.563 - .page_fault = sh_page_fault, 66.564 - .invlpg = sh_invlpg, 66.565 - .gva_to_gpa = sh_gva_to_gpa, 66.566 - .gva_to_gfn = sh_gva_to_gfn, 66.567 - .update_cr3 = sh_update_cr3, 66.568 - .map_and_validate_gl1e = sh_map_and_validate_gl1e, 66.569 - .map_and_validate_gl2e = sh_map_and_validate_gl2e, 66.570 - .map_and_validate_gl2he = sh_map_and_validate_gl2he, 66.571 - .map_and_validate_gl3e = sh_map_and_validate_gl3e, 66.572 - .map_and_validate_gl4e = sh_map_and_validate_gl4e, 66.573 - .detach_old_tables = sh_detach_old_tables, 66.574 - .x86_emulate_write = sh_x86_emulate_write, 66.575 - .x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, 66.576 - .x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, 66.577 - .make_monitor_table = sh_make_monitor_table, 66.578 - .destroy_monitor_table = sh_destroy_monitor_table, 66.579 - .guest_map_l1e = sh_guest_map_l1e, 66.580 - .guest_get_eff_l1e = sh_guest_get_eff_l1e, 66.581 +struct paging_mode sh_paging_mode = { 66.582 + .page_fault = sh_page_fault, 66.583 + .invlpg = sh_invlpg, 66.584 + .gva_to_gpa = sh_gva_to_gpa, 66.585 + .gva_to_gfn = sh_gva_to_gfn, 66.586 + .update_cr3 = sh_update_cr3, 66.587 + .update_paging_modes = shadow_update_paging_modes, 66.588 + .write_p2m_entry = shadow_write_p2m_entry, 66.589 + .write_guest_entry = shadow_write_guest_entry, 66.590 + .cmpxchg_guest_entry = shadow_cmpxchg_guest_entry, 66.591 + .guest_map_l1e = sh_guest_map_l1e, 66.592 + .guest_get_eff_l1e = sh_guest_get_eff_l1e, 66.593 + .guest_levels = GUEST_PAGING_LEVELS, 66.594 + .shadow.detach_old_tables = sh_detach_old_tables, 66.595 + .shadow.x86_emulate_write = sh_x86_emulate_write, 66.596 + .shadow.x86_emulate_cmpxchg = sh_x86_emulate_cmpxchg, 66.597 + .shadow.x86_emulate_cmpxchg8b = sh_x86_emulate_cmpxchg8b, 66.598 + .shadow.make_monitor_table = sh_make_monitor_table, 66.599 + .shadow.destroy_monitor_table = sh_destroy_monitor_table, 66.600 #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC 66.601 - .guess_wrmap = sh_guess_wrmap, 66.602 + .shadow.guess_wrmap = sh_guess_wrmap, 66.603 #endif 66.604 - .guest_levels = GUEST_PAGING_LEVELS, 66.605 - .shadow_levels = SHADOW_PAGING_LEVELS, 66.606 + .shadow.shadow_levels = SHADOW_PAGING_LEVELS, 66.607 }; 66.608 66.609 /*
67.1 --- a/xen/arch/x86/mm/shadow/multi.h Thu Feb 15 13:13:36 2007 -0700 67.2 +++ b/xen/arch/x86/mm/shadow/multi.h Thu Feb 15 14:09:39 2007 -0700 67.3 @@ -115,5 +115,5 @@ SHADOW_INTERNAL_NAME(sh_destroy_monitor_ 67.4 (struct vcpu *v, mfn_t mmfn); 67.5 #endif 67.6 67.7 -extern struct shadow_paging_mode 67.8 +extern struct paging_mode 67.9 SHADOW_INTERNAL_NAME(sh_paging_mode, SHADOW_LEVELS, GUEST_LEVELS);
68.1 --- a/xen/arch/x86/mm/shadow/page-guest32.h Thu Feb 15 13:13:36 2007 -0700 68.2 +++ b/xen/arch/x86/mm/shadow/page-guest32.h Thu Feb 15 14:09:39 2007 -0700 68.3 @@ -87,11 +87,6 @@ static inline l2_pgentry_32_t l2e_from_p 68.4 #define l2_table_offset_32(a) \ 68.5 (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) 68.6 68.7 -#define linear_l1_table_32 \ 68.8 - ((l1_pgentry_32_t *)(LINEAR_PT_VIRT_START)) 68.9 - 68.10 -#define linear_pg_table_32 linear_l1_table_32 68.11 - 68.12 #endif /* __X86_PAGE_GUEST_H__ */ 68.13 68.14 /*
69.1 --- a/xen/arch/x86/mm/shadow/private.h Thu Feb 15 13:13:36 2007 -0700 69.2 +++ b/xen/arch/x86/mm/shadow/private.h Thu Feb 15 14:09:39 2007 -0700 69.3 @@ -41,13 +41,12 @@ 69.4 #define SHADOW_AUDIT_ENTRIES 0x04 /* Check this walk's shadows */ 69.5 #define SHADOW_AUDIT_ENTRIES_FULL 0x08 /* Check every shadow */ 69.6 #define SHADOW_AUDIT_ENTRIES_MFNS 0x10 /* Check gfn-mfn map in shadows */ 69.7 -#define SHADOW_AUDIT_P2M 0x20 /* Check the p2m table */ 69.8 69.9 #ifdef NDEBUG 69.10 #define SHADOW_AUDIT 0 69.11 #define SHADOW_AUDIT_ENABLE 0 69.12 #else 69.13 -#define SHADOW_AUDIT 0x15 /* Basic audit of all except p2m. */ 69.14 +#define SHADOW_AUDIT 0x15 /* Basic audit of all */ 69.15 #define SHADOW_AUDIT_ENABLE shadow_audit_enable 69.16 extern int shadow_audit_enable; 69.17 #endif 69.18 @@ -84,9 +83,9 @@ extern int shadow_audit_enable; 69.19 #define SHADOW_DEBUG_PROPAGATE 1 69.20 #define SHADOW_DEBUG_MAKE_SHADOW 1 69.21 #define SHADOW_DEBUG_DESTROY_SHADOW 1 69.22 -#define SHADOW_DEBUG_P2M 0 69.23 #define SHADOW_DEBUG_A_AND_D 1 69.24 #define SHADOW_DEBUG_EMULATE 1 69.25 +#define SHADOW_DEBUG_P2M 1 69.26 #define SHADOW_DEBUG_LOGDIRTY 0 69.27 69.28 /****************************************************************************** 69.29 @@ -108,36 +107,36 @@ extern int shadow_audit_enable; 69.30 #error shadow.h currently requires CONFIG_SMP 69.31 #endif 69.32 69.33 -#define shadow_lock_init(_d) \ 69.34 - do { \ 69.35 - spin_lock_init(&(_d)->arch.shadow.lock); \ 69.36 - (_d)->arch.shadow.locker = -1; \ 69.37 - (_d)->arch.shadow.locker_function = "nobody"; \ 69.38 +#define shadow_lock_init(_d) \ 69.39 + do { \ 69.40 + spin_lock_init(&(_d)->arch.paging.shadow.lock); \ 69.41 + (_d)->arch.paging.shadow.locker = -1; \ 69.42 + (_d)->arch.paging.shadow.locker_function = "nobody"; \ 69.43 } while (0) 69.44 69.45 #define shadow_locked_by_me(_d) \ 69.46 - (current->processor == (_d)->arch.shadow.locker) 69.47 + (current->processor == (_d)->arch.paging.shadow.locker) 69.48 69.49 -#define shadow_lock(_d) \ 69.50 - do { \ 69.51 - if ( unlikely((_d)->arch.shadow.locker == current->processor) ) \ 69.52 - { \ 69.53 - printk("Error: shadow lock held by %s\n", \ 69.54 - (_d)->arch.shadow.locker_function); \ 69.55 - BUG(); \ 69.56 - } \ 69.57 - spin_lock(&(_d)->arch.shadow.lock); \ 69.58 - ASSERT((_d)->arch.shadow.locker == -1); \ 69.59 - (_d)->arch.shadow.locker = current->processor; \ 69.60 - (_d)->arch.shadow.locker_function = __func__; \ 69.61 +#define shadow_lock(_d) \ 69.62 + do { \ 69.63 + if ( unlikely((_d)->arch.paging.shadow.locker == current->processor) )\ 69.64 + { \ 69.65 + printk("Error: shadow lock held by %s\n", \ 69.66 + (_d)->arch.paging.shadow.locker_function); \ 69.67 + BUG(); \ 69.68 + } \ 69.69 + spin_lock(&(_d)->arch.paging.shadow.lock); \ 69.70 + ASSERT((_d)->arch.paging.shadow.locker == -1); \ 69.71 + (_d)->arch.paging.shadow.locker = current->processor; \ 69.72 + (_d)->arch.paging.shadow.locker_function = __func__; \ 69.73 } while (0) 69.74 69.75 -#define shadow_unlock(_d) \ 69.76 - do { \ 69.77 - ASSERT((_d)->arch.shadow.locker == current->processor); \ 69.78 - (_d)->arch.shadow.locker = -1; \ 69.79 - (_d)->arch.shadow.locker_function = "nobody"; \ 69.80 - spin_unlock(&(_d)->arch.shadow.lock); \ 69.81 +#define shadow_unlock(_d) \ 69.82 + do { \ 69.83 + ASSERT((_d)->arch.paging.shadow.locker == current->processor); \ 69.84 + (_d)->arch.paging.shadow.locker = -1; \ 69.85 + (_d)->arch.paging.shadow.locker_function = "nobody"; \ 69.86 + spin_unlock(&(_d)->arch.paging.shadow.lock); \ 69.87 } while (0) 69.88 69.89 69.90 @@ -152,13 +151,6 @@ extern void shadow_audit_tables(struct v 69.91 #define shadow_audit_tables(_v) do {} while(0) 69.92 #endif 69.93 69.94 -#if SHADOW_AUDIT & SHADOW_AUDIT_P2M 69.95 -extern void shadow_audit_p2m(struct domain *d); 69.96 -#else 69.97 -#define shadow_audit_p2m(_d) do {} while(0) 69.98 -#endif 69.99 - 69.100 - 69.101 /****************************************************************************** 69.102 * Macro for dealing with the naming of the internal names of the 69.103 * shadow code's external entry points. 69.104 @@ -304,7 +296,7 @@ static inline int sh_type_is_pinnable(st 69.105 * page. When we're shadowing those kernels, we have to pin l3 69.106 * shadows so they don't just evaporate on every context switch. 69.107 * For all other guests, we'd rather use the up-pointer field in l3s. */ 69.108 - if ( unlikely((v->domain->arch.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 69.109 + if ( unlikely((v->domain->arch.paging.shadow.opt_flags & SHOPT_LINUX_L3_TOPLEVEL) 69.110 && CONFIG_PAGING_LEVELS >= 4 69.111 && t == SH_type_l3_64_shadow) ) 69.112 return 1; 69.113 @@ -379,12 +371,11 @@ void sh_install_xen_entries_in_l2h(struc 69.114 void sh_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn); 69.115 69.116 /* Update the shadows in response to a pagetable write from Xen */ 69.117 -extern int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, 69.118 - void *entry, u32 size); 69.119 +int sh_validate_guest_entry(struct vcpu *v, mfn_t gmfn, void *entry, u32 size); 69.120 69.121 /* Update the shadows in response to a pagetable write from a HVM guest */ 69.122 -extern void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 69.123 - void *entry, u32 size); 69.124 +void sh_validate_guest_pt_write(struct vcpu *v, mfn_t gmfn, 69.125 + void *entry, u32 size); 69.126 69.127 /* Remove all writeable mappings of a guest frame from the shadows. 69.128 * Returns non-zero if we need to flush TLBs. 69.129 @@ -394,6 +385,21 @@ extern int sh_remove_write_access(struct 69.130 unsigned int level, 69.131 unsigned long fault_addr); 69.132 69.133 +/* Allocate/free functions for passing to the P2M code. */ 69.134 +struct page_info *shadow_alloc_p2m_page(struct domain *d); 69.135 +void shadow_free_p2m_page(struct domain *d, struct page_info *pg); 69.136 + 69.137 +/* Functions that atomically write PT/P2M entries and update state */ 69.138 +void shadow_write_p2m_entry(struct vcpu *v, unsigned long gfn, 69.139 + l1_pgentry_t *p, l1_pgentry_t new, 69.140 + unsigned int level); 69.141 +int shadow_write_guest_entry(struct vcpu *v, intpte_t *p, 69.142 + intpte_t new, mfn_t gmfn); 69.143 +int shadow_cmpxchg_guest_entry(struct vcpu *v, intpte_t *p, 69.144 + intpte_t *old, intpte_t new, mfn_t gmfn); 69.145 + 69.146 + 69.147 + 69.148 /****************************************************************************** 69.149 * Flags used in the return value of the shadow_set_lXe() functions... 69.150 */ 69.151 @@ -477,19 +483,6 @@ sh_unmap_domain_page_global(void *p) 69.152 unmap_domain_page_global(p); 69.153 } 69.154 69.155 -static inline mfn_t 69.156 -pagetable_get_mfn(pagetable_t pt) 69.157 -{ 69.158 - return _mfn(pagetable_get_pfn(pt)); 69.159 -} 69.160 - 69.161 -static inline pagetable_t 69.162 -pagetable_from_mfn(mfn_t mfn) 69.163 -{ 69.164 - return pagetable_from_pfn(mfn_x(mfn)); 69.165 -} 69.166 - 69.167 - 69.168 /****************************************************************************** 69.169 * Log-dirty mode bitmap handling 69.170 */ 69.171 @@ -502,13 +495,13 @@ sh_mfn_is_dirty(struct domain *d, mfn_t 69.172 { 69.173 unsigned long pfn; 69.174 ASSERT(shadow_mode_log_dirty(d)); 69.175 - ASSERT(d->arch.shadow.dirty_bitmap != NULL); 69.176 + ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); 69.177 69.178 /* We /really/ mean PFN here, even for non-translated guests. */ 69.179 pfn = get_gpfn_from_mfn(mfn_x(gmfn)); 69.180 if ( likely(VALID_M2P(pfn)) 69.181 - && likely(pfn < d->arch.shadow.dirty_bitmap_size) 69.182 - && test_bit(pfn, d->arch.shadow.dirty_bitmap) ) 69.183 + && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) 69.184 + && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) ) 69.185 return 1; 69.186 69.187 return 0; 69.188 @@ -612,7 +605,7 @@ static inline int sh_pin(struct vcpu *v, 69.189 sp->pinned = 1; 69.190 } 69.191 /* Put it at the head of the list of pinned shadows */ 69.192 - list_add(&sp->list, &v->domain->arch.shadow.pinned_shadows); 69.193 + list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows); 69.194 return 1; 69.195 } 69.196
70.1 --- a/xen/arch/x86/mm/shadow/types.h Thu Feb 15 13:13:36 2007 -0700 70.2 +++ b/xen/arch/x86/mm/shadow/types.h Thu Feb 15 14:09:39 2007 -0700 70.3 @@ -414,15 +414,9 @@ valid_gfn(gfn_t m) 70.4 static inline mfn_t 70.5 vcpu_gfn_to_mfn(struct vcpu *v, gfn_t gfn) 70.6 { 70.7 - if ( !shadow_vcpu_mode_translate(v) ) 70.8 + if ( !paging_vcpu_mode_translate(v) ) 70.9 return _mfn(gfn_x(gfn)); 70.10 - return sh_gfn_to_mfn(v->domain, gfn_x(gfn)); 70.11 -} 70.12 - 70.13 -static inline gfn_t 70.14 -mfn_to_gfn(struct domain *d, mfn_t mfn) 70.15 -{ 70.16 - return _gfn(sh_mfn_to_gfn(d, mfn)); 70.17 + return gfn_to_mfn(v->domain, gfn_x(gfn)); 70.18 } 70.19 70.20 static inline paddr_t 70.21 @@ -453,10 +447,8 @@ struct shadow_walk_t 70.22 guest_l2e_t *l2e; /* Pointer to guest's level 2 entry */ 70.23 guest_l1e_t *l1e; /* Pointer to guest's level 1 entry */ 70.24 guest_l1e_t eff_l1e; /* Effective level 1 entry */ 70.25 -#if GUEST_PAGING_LEVELS >= 3 70.26 #if GUEST_PAGING_LEVELS >= 4 70.27 mfn_t l4mfn; /* MFN that the level 4 entry is in */ 70.28 -#endif 70.29 mfn_t l3mfn; /* MFN that the level 3 entry is in */ 70.30 #endif 70.31 mfn_t l2mfn; /* MFN that the level 2 entry is in */
71.1 --- a/xen/arch/x86/setup.c Thu Feb 15 13:13:36 2007 -0700 71.2 +++ b/xen/arch/x86/setup.c Thu Feb 15 14:09:39 2007 -0700 71.3 @@ -29,7 +29,7 @@ 71.4 #include <asm/mpspec.h> 71.5 #include <asm/apic.h> 71.6 #include <asm/desc.h> 71.7 -#include <asm/shadow.h> 71.8 +#include <asm/paging.h> 71.9 #include <asm/e820.h> 71.10 #include <acm/acm_hooks.h> 71.11 #include <xen/kexec.h>
72.1 --- a/xen/arch/x86/sysctl.c Thu Feb 15 13:13:36 2007 -0700 72.2 +++ b/xen/arch/x86/sysctl.c Thu Feb 15 14:09:39 2007 -0700 72.3 @@ -19,7 +19,6 @@ 72.4 #include <xen/trace.h> 72.5 #include <xen/console.h> 72.6 #include <xen/iocap.h> 72.7 -#include <asm/shadow.h> 72.8 #include <asm/irq.h> 72.9 #include <asm/hvm/hvm.h> 72.10 #include <asm/hvm/support.h>
73.1 --- a/xen/arch/x86/traps.c Thu Feb 15 13:13:36 2007 -0700 73.2 +++ b/xen/arch/x86/traps.c Thu Feb 15 14:09:39 2007 -0700 73.3 @@ -46,7 +46,7 @@ 73.4 #include <xen/nmi.h> 73.5 #include <xen/version.h> 73.6 #include <xen/kexec.h> 73.7 -#include <asm/shadow.h> 73.8 +#include <asm/paging.h> 73.9 #include <asm/system.h> 73.10 #include <asm/io.h> 73.11 #include <asm/atomic.h> 73.12 @@ -860,8 +860,8 @@ static int fixup_page_fault(unsigned lon 73.13 73.14 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) 73.15 { 73.16 - if ( shadow_mode_external(d) && guest_mode(regs) ) 73.17 - return shadow_fault(addr, regs); 73.18 + if ( paging_mode_external(d) && guest_mode(regs) ) 73.19 + return paging_fault(addr, regs); 73.20 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) 73.21 return handle_gdt_ldt_mapping_fault( 73.22 addr - GDT_LDT_VIRT_START, regs); 73.23 @@ -876,8 +876,8 @@ static int fixup_page_fault(unsigned lon 73.24 ptwr_do_page_fault(v, addr, regs) ) 73.25 return EXCRET_fault_fixed; 73.26 73.27 - if ( shadow_mode_enabled(d) ) 73.28 - return shadow_fault(addr, regs); 73.29 + if ( paging_mode_enabled(d) ) 73.30 + return paging_fault(addr, regs); 73.31 73.32 return 0; 73.33 }
74.1 --- a/xen/arch/x86/x86_32/domain_page.c Thu Feb 15 13:13:36 2007 -0700 74.2 +++ b/xen/arch/x86/x86_32/domain_page.c Thu Feb 15 14:09:39 2007 -0700 74.3 @@ -11,7 +11,6 @@ 74.4 #include <xen/mm.h> 74.5 #include <xen/perfc.h> 74.6 #include <xen/domain_page.h> 74.7 -#include <xen/shadow.h> 74.8 #include <asm/current.h> 74.9 #include <asm/flushtlb.h> 74.10 #include <asm/hardirq.h>
75.1 --- a/xen/arch/x86/x86_64/traps.c Thu Feb 15 13:13:36 2007 -0700 75.2 +++ b/xen/arch/x86/x86_64/traps.c Thu Feb 15 14:09:39 2007 -0700 75.3 @@ -16,7 +16,6 @@ 75.4 #include <asm/flushtlb.h> 75.5 #include <asm/msr.h> 75.6 #include <asm/page.h> 75.7 -#include <asm/shadow.h> 75.8 #include <asm/shared.h> 75.9 #include <asm/hvm/hvm.h> 75.10 #include <asm/hvm/support.h>
76.1 --- a/xen/common/libelf/libelf-dominfo.c Thu Feb 15 13:13:36 2007 -0700 76.2 +++ b/xen/common/libelf/libelf-dominfo.c Thu Feb 15 14:09:39 2007 -0700 76.3 @@ -15,61 +15,63 @@ const char *elf_xen_feature_names[] = { 76.4 [XENFEAT_pae_pgdir_above_4gb] = "pae_pgdir_above_4gb" 76.5 }; 76.6 const int elf_xen_features = 76.7 - sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); 76.8 +sizeof(elf_xen_feature_names) / sizeof(elf_xen_feature_names[0]); 76.9 76.10 int elf_xen_parse_features(const char *features, 76.11 - uint32_t *supported, 76.12 - uint32_t *required) 76.13 + uint32_t *supported, 76.14 + uint32_t *required) 76.15 { 76.16 char feature[64]; 76.17 int pos, len, i; 76.18 76.19 - if (NULL == features) 76.20 - return 0; 76.21 - for (pos = 0; features[pos] != '\0'; pos += len) 76.22 + if ( features == NULL ) 76.23 + return 0; 76.24 + 76.25 + for ( pos = 0; features[pos] != '\0'; pos += len ) 76.26 { 76.27 - memset(feature, 0, sizeof(feature)); 76.28 - for (len = 0;; len++) 76.29 - { 76.30 - if (len >= sizeof(feature)-1) 76.31 - break; 76.32 - if (features[pos + len] == '\0') 76.33 - break; 76.34 - if (features[pos + len] == '|') 76.35 - { 76.36 - len++; 76.37 - break; 76.38 - } 76.39 - feature[len] = features[pos + len]; 76.40 - } 76.41 + memset(feature, 0, sizeof(feature)); 76.42 + for ( len = 0;; len++ ) 76.43 + { 76.44 + if ( len >= sizeof(feature)-1 ) 76.45 + break; 76.46 + if ( features[pos + len] == '\0' ) 76.47 + break; 76.48 + if ( features[pos + len] == '|' ) 76.49 + { 76.50 + len++; 76.51 + break; 76.52 + } 76.53 + feature[len] = features[pos + len]; 76.54 + } 76.55 76.56 - for (i = 0; i < elf_xen_features; i++) 76.57 - { 76.58 - if (!elf_xen_feature_names[i]) 76.59 - continue; 76.60 - if (NULL != required && feature[0] == '!') 76.61 - { 76.62 - /* required */ 76.63 - if (0 == strcmp(feature + 1, elf_xen_feature_names[i])) 76.64 - { 76.65 - elf_xen_feature_set(i, supported); 76.66 - elf_xen_feature_set(i, required); 76.67 - break; 76.68 - } 76.69 - } 76.70 - else 76.71 - { 76.72 - /* supported */ 76.73 - if (0 == strcmp(feature, elf_xen_feature_names[i])) 76.74 - { 76.75 - elf_xen_feature_set(i, supported); 76.76 - break; 76.77 - } 76.78 - } 76.79 - } 76.80 - if (i == elf_xen_features) 76.81 - return -1; 76.82 + for ( i = 0; i < elf_xen_features; i++ ) 76.83 + { 76.84 + if ( !elf_xen_feature_names[i] ) 76.85 + continue; 76.86 + if ( (required != NULL) && (feature[0] == '!') ) 76.87 + { 76.88 + /* required */ 76.89 + if ( !strcmp(feature + 1, elf_xen_feature_names[i]) ) 76.90 + { 76.91 + elf_xen_feature_set(i, supported); 76.92 + elf_xen_feature_set(i, required); 76.93 + break; 76.94 + } 76.95 + } 76.96 + else 76.97 + { 76.98 + /* supported */ 76.99 + if ( !strcmp(feature, elf_xen_feature_names[i]) ) 76.100 + { 76.101 + elf_xen_feature_set(i, supported); 76.102 + break; 76.103 + } 76.104 + } 76.105 + } 76.106 + if ( i == elf_xen_features ) 76.107 + return -1; 76.108 } 76.109 + 76.110 return 0; 76.111 } 76.112 76.113 @@ -77,26 +79,26 @@ int elf_xen_parse_features(const char *f 76.114 /* xen elf notes */ 76.115 76.116 int elf_xen_parse_note(struct elf_binary *elf, 76.117 - struct elf_dom_parms *parms, 76.118 - const elf_note *note) 76.119 + struct elf_dom_parms *parms, 76.120 + const elf_note *note) 76.121 { 76.122 /* *INDENT-OFF* */ 76.123 static const struct { 76.124 - char *name; 76.125 - int str; 76.126 + char *name; 76.127 + int str; 76.128 } note_desc[] = { 76.129 - [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, 76.130 - [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, 76.131 - [XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0}, 76.132 - [XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0}, 76.133 - [XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0}, 76.134 - [XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1}, 76.135 - [XEN_ELFNOTE_GUEST_OS] = { "GUEST_OS", 1}, 76.136 - [XEN_ELFNOTE_GUEST_VERSION] = { "GUEST_VERSION", 1}, 76.137 - [XEN_ELFNOTE_LOADER] = { "LOADER", 1}, 76.138 - [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1}, 76.139 - [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1}, 76.140 - [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1}, 76.141 + [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, 76.142 + [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, 76.143 + [XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0}, 76.144 + [XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0}, 76.145 + [XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0}, 76.146 + [XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1}, 76.147 + [XEN_ELFNOTE_GUEST_OS] = { "GUEST_OS", 1}, 76.148 + [XEN_ELFNOTE_GUEST_VERSION] = { "GUEST_VERSION", 1}, 76.149 + [XEN_ELFNOTE_LOADER] = { "LOADER", 1}, 76.150 + [XEN_ELFNOTE_PAE_MODE] = { "PAE_MODE", 1}, 76.151 + [XEN_ELFNOTE_FEATURES] = { "FEATURES", 1}, 76.152 + [XEN_ELFNOTE_BSD_SYMTAB] = { "BSD_SYMTAB", 1}, 76.153 }; 76.154 /* *INDENT-ON* */ 76.155 76.156 @@ -104,73 +106,73 @@ int elf_xen_parse_note(struct elf_binary 76.157 uint64_t val = 0; 76.158 int type = elf_uval(elf, note, type); 76.159 76.160 - if ((type >= sizeof(note_desc) / sizeof(note_desc[0])) || 76.161 - (NULL == note_desc[type].name)) 76.162 + if ( (type >= sizeof(note_desc) / sizeof(note_desc[0])) || 76.163 + (note_desc[type].name == NULL) ) 76.164 { 76.165 - elf_err(elf, "%s: unknown xen elf note (0x%x)\n", 76.166 - __FUNCTION__, type); 76.167 - return -1; 76.168 + elf_msg(elf, "%s: unknown xen elf note (0x%x)\n", 76.169 + __FUNCTION__, type); 76.170 + return 0; 76.171 } 76.172 76.173 - if (note_desc[type].str) 76.174 + if ( note_desc[type].str ) 76.175 { 76.176 - str = elf_note_desc(elf, note); 76.177 - elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__, 76.178 - note_desc[type].name, str); 76.179 + str = elf_note_desc(elf, note); 76.180 + elf_msg(elf, "%s: %s = \"%s\"\n", __FUNCTION__, 76.181 + note_desc[type].name, str); 76.182 } 76.183 else 76.184 { 76.185 - val = elf_note_numeric(elf, note); 76.186 - elf_msg(elf, "%s: %s = 0x%" PRIx64 "\n", __FUNCTION__, 76.187 - note_desc[type].name, val); 76.188 + val = elf_note_numeric(elf, note); 76.189 + elf_msg(elf, "%s: %s = 0x%" PRIx64 "\n", __FUNCTION__, 76.190 + note_desc[type].name, val); 76.191 } 76.192 76.193 - switch (type) 76.194 + switch ( type ) 76.195 { 76.196 case XEN_ELFNOTE_LOADER: 76.197 - safe_strcpy(parms->loader, str); 76.198 - break; 76.199 + safe_strcpy(parms->loader, str); 76.200 + break; 76.201 case XEN_ELFNOTE_GUEST_OS: 76.202 - safe_strcpy(parms->guest_os, str); 76.203 - break; 76.204 + safe_strcpy(parms->guest_os, str); 76.205 + break; 76.206 case XEN_ELFNOTE_GUEST_VERSION: 76.207 - safe_strcpy(parms->guest_ver, str); 76.208 - break; 76.209 + safe_strcpy(parms->guest_ver, str); 76.210 + break; 76.211 case XEN_ELFNOTE_XEN_VERSION: 76.212 - safe_strcpy(parms->xen_ver, str); 76.213 - break; 76.214 + safe_strcpy(parms->xen_ver, str); 76.215 + break; 76.216 case XEN_ELFNOTE_PAE_MODE: 76.217 - if (0 == strcmp(str, "yes")) 76.218 - parms->pae = 2 /* extended_cr3 */; 76.219 - if (strstr(str, "bimodal")) 76.220 - parms->pae = 3 /* bimodal */; 76.221 - break; 76.222 + if ( !strcmp(str, "yes") ) 76.223 + parms->pae = 2 /* extended_cr3 */; 76.224 + if ( strstr(str, "bimodal") ) 76.225 + parms->pae = 3 /* bimodal */; 76.226 + break; 76.227 case XEN_ELFNOTE_BSD_SYMTAB: 76.228 - if (0 == strcmp(str, "yes")) 76.229 - parms->bsd_symtab = 1; 76.230 - break; 76.231 + if ( !strcmp(str, "yes") ) 76.232 + parms->bsd_symtab = 1; 76.233 + break; 76.234 76.235 case XEN_ELFNOTE_VIRT_BASE: 76.236 - parms->virt_base = val; 76.237 - break; 76.238 + parms->virt_base = val; 76.239 + break; 76.240 case XEN_ELFNOTE_ENTRY: 76.241 - parms->virt_entry = val; 76.242 - break; 76.243 + parms->virt_entry = val; 76.244 + break; 76.245 case XEN_ELFNOTE_PADDR_OFFSET: 76.246 - parms->elf_paddr_offset = val; 76.247 - break; 76.248 + parms->elf_paddr_offset = val; 76.249 + break; 76.250 case XEN_ELFNOTE_HYPERCALL_PAGE: 76.251 - parms->virt_hypercall = val; 76.252 - break; 76.253 + parms->virt_hypercall = val; 76.254 + break; 76.255 case XEN_ELFNOTE_HV_START_LOW: 76.256 - parms->virt_hv_start_low = val; 76.257 - break; 76.258 + parms->virt_hv_start_low = val; 76.259 + break; 76.260 76.261 case XEN_ELFNOTE_FEATURES: 76.262 - if (0 != elf_xen_parse_features(str, parms->f_supported, 76.263 - parms->f_required)) 76.264 - return -1; 76.265 - break; 76.266 + if ( elf_xen_parse_features(str, parms->f_supported, 76.267 + parms->f_required) ) 76.268 + return -1; 76.269 + break; 76.270 76.271 } 76.272 return 0; 76.273 @@ -180,83 +182,85 @@ int elf_xen_parse_note(struct elf_binary 76.274 /* __xen_guest section */ 76.275 76.276 int elf_xen_parse_guest_info(struct elf_binary *elf, 76.277 - struct elf_dom_parms *parms) 76.278 + struct elf_dom_parms *parms) 76.279 { 76.280 const char *h; 76.281 char name[32], value[128]; 76.282 int len; 76.283 76.284 h = parms->guest_info; 76.285 - while (*h) 76.286 + while ( *h ) 76.287 { 76.288 - memset(name, 0, sizeof(name)); 76.289 - memset(value, 0, sizeof(value)); 76.290 - for (len = 0;; len++, h++) { 76.291 - if (len >= sizeof(name)-1) 76.292 - break; 76.293 - if (*h == '\0') 76.294 - break; 76.295 - if (*h == ',') 76.296 - { 76.297 - h++; 76.298 - break; 76.299 - } 76.300 - if (*h == '=') 76.301 - { 76.302 - h++; 76.303 - for (len = 0;; len++, h++) { 76.304 - if (len >= sizeof(value)-1) 76.305 - break; 76.306 - if (*h == '\0') 76.307 - break; 76.308 - if (*h == ',') 76.309 - { 76.310 - h++; 76.311 - break; 76.312 - } 76.313 - value[len] = *h; 76.314 - } 76.315 - break; 76.316 - } 76.317 - name[len] = *h; 76.318 - } 76.319 - elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value); 76.320 + memset(name, 0, sizeof(name)); 76.321 + memset(value, 0, sizeof(value)); 76.322 + for ( len = 0;; len++, h++ ) 76.323 + { 76.324 + if ( len >= sizeof(name)-1 ) 76.325 + break; 76.326 + if ( *h == '\0' ) 76.327 + break; 76.328 + if ( *h == ',' ) 76.329 + { 76.330 + h++; 76.331 + break; 76.332 + } 76.333 + if ( *h == '=' ) 76.334 + { 76.335 + h++; 76.336 + for ( len = 0;; len++, h++ ) 76.337 + { 76.338 + if ( len >= sizeof(value)-1 ) 76.339 + break; 76.340 + if ( *h == '\0' ) 76.341 + break; 76.342 + if ( *h == ',' ) 76.343 + { 76.344 + h++; 76.345 + break; 76.346 + } 76.347 + value[len] = *h; 76.348 + } 76.349 + break; 76.350 + } 76.351 + name[len] = *h; 76.352 + } 76.353 + elf_msg(elf, "%s: %s=\"%s\"\n", __FUNCTION__, name, value); 76.354 76.355 - /* strings */ 76.356 - if (0 == strcmp(name, "LOADER")) 76.357 - safe_strcpy(parms->loader, value); 76.358 - if (0 == strcmp(name, "GUEST_OS")) 76.359 - safe_strcpy(parms->guest_os, value); 76.360 - if (0 == strcmp(name, "GUEST_VER")) 76.361 - safe_strcpy(parms->guest_ver, value); 76.362 - if (0 == strcmp(name, "XEN_VER")) 76.363 - safe_strcpy(parms->xen_ver, value); 76.364 - if (0 == strcmp(name, "PAE")) 76.365 - { 76.366 - if (0 == strcmp(value, "yes[extended-cr3]")) 76.367 - parms->pae = 2 /* extended_cr3 */; 76.368 - else if (0 == strncmp(value, "yes", 3)) 76.369 - parms->pae = 1 /* yes */; 76.370 - } 76.371 - if (0 == strcmp(name, "BSD_SYMTAB")) 76.372 - parms->bsd_symtab = 1; 76.373 + /* strings */ 76.374 + if ( !strcmp(name, "LOADER") ) 76.375 + safe_strcpy(parms->loader, value); 76.376 + if ( !strcmp(name, "GUEST_OS") ) 76.377 + safe_strcpy(parms->guest_os, value); 76.378 + if ( !strcmp(name, "GUEST_VER") ) 76.379 + safe_strcpy(parms->guest_ver, value); 76.380 + if ( !strcmp(name, "XEN_VER") ) 76.381 + safe_strcpy(parms->xen_ver, value); 76.382 + if ( !strcmp(name, "PAE") ) 76.383 + { 76.384 + if ( !strcmp(value, "yes[extended-cr3]") ) 76.385 + parms->pae = 2 /* extended_cr3 */; 76.386 + else if ( !strncmp(value, "yes", 3) ) 76.387 + parms->pae = 1 /* yes */; 76.388 + } 76.389 + if ( !strcmp(name, "BSD_SYMTAB") ) 76.390 + parms->bsd_symtab = 1; 76.391 76.392 - /* longs */ 76.393 - if (0 == strcmp(name, "VIRT_BASE")) 76.394 - parms->virt_base = strtoull(value, NULL, 0); 76.395 - if (0 == strcmp(name, "VIRT_ENTRY")) 76.396 - parms->virt_entry = strtoull(value, NULL, 0); 76.397 - if (0 == strcmp(name, "ELF_PADDR_OFFSET")) 76.398 - parms->elf_paddr_offset = strtoull(value, NULL, 0); 76.399 - if (0 == strcmp(name, "HYPERCALL_PAGE")) 76.400 - parms->virt_hypercall = (strtoull(value, NULL, 0) << 12) + 76.401 - parms->virt_base; 76.402 + /* longs */ 76.403 + if ( !strcmp(name, "VIRT_BASE") ) 76.404 + parms->virt_base = strtoull(value, NULL, 0); 76.405 + if ( !strcmp(name, "VIRT_ENTRY") ) 76.406 + parms->virt_entry = strtoull(value, NULL, 0); 76.407 + if ( !strcmp(name, "ELF_PADDR_OFFSET") ) 76.408 + parms->elf_paddr_offset = strtoull(value, NULL, 0); 76.409 + if ( !strcmp(name, "HYPERCALL_PAGE") ) 76.410 + parms->virt_hypercall = (strtoull(value, NULL, 0) << 12) + 76.411 + parms->virt_base; 76.412 76.413 - /* other */ 76.414 - if (0 == strcmp(name, "FEATURES")) 76.415 - if (0 != elf_xen_parse_features(value, parms->f_supported, 76.416 - parms->f_required)) 76.417 - return -1; 76.418 + /* other */ 76.419 + if ( !strcmp(name, "FEATURES") ) 76.420 + if ( elf_xen_parse_features(value, parms->f_supported, 76.421 + parms->f_required) ) 76.422 + return -1; 76.423 } 76.424 return 0; 76.425 } 76.426 @@ -265,54 +269,59 @@ int elf_xen_parse_guest_info(struct elf_ 76.427 /* sanity checks */ 76.428 76.429 static int elf_xen_note_check(struct elf_binary *elf, 76.430 - struct elf_dom_parms *parms) 76.431 + struct elf_dom_parms *parms) 76.432 { 76.433 - if (NULL == parms->elf_note_start && NULL == parms->guest_info) { 76.434 - int machine = elf_uval(elf, elf->ehdr, e_machine); 76.435 - if (EM_386 == machine || EM_X86_64 == machine) { 76.436 - elf_err(elf, "%s: ERROR: Not a Xen-ELF image: " 76.437 - "No ELF notes or '__xen_guest' section found.\n", 76.438 - __FUNCTION__); 76.439 - return -1; 76.440 - } 76.441 - return 0; 76.442 + if ( (parms->elf_note_start == NULL) && (parms->guest_info == NULL) ) 76.443 + { 76.444 + int machine = elf_uval(elf, elf->ehdr, e_machine); 76.445 + if ( (machine == EM_386) || (machine == EM_X86_64) ) 76.446 + { 76.447 + elf_err(elf, "%s: ERROR: Not a Xen-ELF image: " 76.448 + "No ELF notes or '__xen_guest' section found.\n", 76.449 + __FUNCTION__); 76.450 + return -1; 76.451 + } 76.452 + return 0; 76.453 } 76.454 76.455 /* Check the contents of the Xen notes or guest string. */ 76.456 - if ( ( 0 == strlen(parms->loader) || strncmp(parms->loader, "generic", 7) ) && 76.457 - ( 0 == strlen(parms->guest_os) || strncmp(parms->guest_os, "linux", 5) ) ) 76.458 + if ( ((strlen(parms->loader) == 0) || 76.459 + strncmp(parms->loader, "generic", 7)) && 76.460 + ((strlen(parms->guest_os) == 0) || 76.461 + strncmp(parms->guest_os, "linux", 5)) ) 76.462 { 76.463 - elf_err(elf, "%s: ERROR: Will only load images built for the generic " 76.464 - "loader or Linux images", __FUNCTION__); 76.465 - return -1; 76.466 + elf_err(elf, "%s: ERROR: Will only load images built for the generic " 76.467 + "loader or Linux images", __FUNCTION__); 76.468 + return -1; 76.469 } 76.470 76.471 - if ( 0 == strlen(parms->xen_ver) || strncmp(parms->xen_ver, "xen-3.0", 7) ) 76.472 + if ( (strlen(parms->xen_ver) == 0) || 76.473 + strncmp(parms->xen_ver, "xen-3.0", 7) ) 76.474 { 76.475 - elf_err(elf, "%s: ERROR: Xen will only load images built for Xen v3.0\n", 76.476 - __FUNCTION__); 76.477 - return -1; 76.478 + elf_err(elf, "%s: ERROR: Xen will only load images built " 76.479 + "for Xen v3.0\n", __FUNCTION__); 76.480 + return -1; 76.481 } 76.482 return 0; 76.483 } 76.484 76.485 static int elf_xen_addr_calc_check(struct elf_binary *elf, 76.486 - struct elf_dom_parms *parms) 76.487 + struct elf_dom_parms *parms) 76.488 { 76.489 - if (UNSET_ADDR != parms->elf_paddr_offset && 76.490 - UNSET_ADDR == parms->virt_base ) 76.491 + if ( (parms->elf_paddr_offset != UNSET_ADDR) && 76.492 + (parms->virt_base == UNSET_ADDR) ) 76.493 { 76.494 - elf_err(elf, "%s: ERROR: ELF_PADDR_OFFSET set, VIRT_BASE unset\n", 76.495 - __FUNCTION__); 76.496 + elf_err(elf, "%s: ERROR: ELF_PADDR_OFFSET set, VIRT_BASE unset\n", 76.497 + __FUNCTION__); 76.498 return -1; 76.499 } 76.500 76.501 /* Initial guess for virt_base is 0 if it is not explicitly defined. */ 76.502 - if (UNSET_ADDR == parms->virt_base) 76.503 + if ( parms->virt_base == UNSET_ADDR ) 76.504 { 76.505 - parms->virt_base = 0; 76.506 - elf_msg(elf, "%s: VIRT_BASE unset, using 0x%" PRIx64 "\n", 76.507 - __FUNCTION__, parms->virt_base); 76.508 + parms->virt_base = 0; 76.509 + elf_msg(elf, "%s: VIRT_BASE unset, using 0x%" PRIx64 "\n", 76.510 + __FUNCTION__, parms->virt_base); 76.511 } 76.512 76.513 /* 76.514 @@ -324,22 +333,22 @@ static int elf_xen_addr_calc_check(struc 76.515 * If we are using the modern ELF notes interface then the default 76.516 * is 0. 76.517 */ 76.518 - if (UNSET_ADDR == parms->elf_paddr_offset) 76.519 + if ( parms->elf_paddr_offset == UNSET_ADDR ) 76.520 { 76.521 - if (parms->elf_note_start) 76.522 - parms->elf_paddr_offset = 0; 76.523 - else 76.524 - parms->elf_paddr_offset = parms->virt_base; 76.525 - elf_msg(elf, "%s: ELF_PADDR_OFFSET unset, using 0x%" PRIx64 "\n", 76.526 - __FUNCTION__, parms->elf_paddr_offset); 76.527 + if ( parms->elf_note_start ) 76.528 + parms->elf_paddr_offset = 0; 76.529 + else 76.530 + parms->elf_paddr_offset = parms->virt_base; 76.531 + elf_msg(elf, "%s: ELF_PADDR_OFFSET unset, using 0x%" PRIx64 "\n", 76.532 + __FUNCTION__, parms->elf_paddr_offset); 76.533 } 76.534 76.535 parms->virt_offset = parms->virt_base - parms->elf_paddr_offset; 76.536 parms->virt_kstart = elf->pstart + parms->virt_offset; 76.537 parms->virt_kend = elf->pend + parms->virt_offset; 76.538 76.539 - if (UNSET_ADDR == parms->virt_entry) 76.540 - parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry); 76.541 + if ( parms->virt_entry == UNSET_ADDR ) 76.542 + parms->virt_entry = elf_uval(elf, elf->ehdr, e_entry); 76.543 76.544 elf_msg(elf, "%s: addresses:\n", __FUNCTION__); 76.545 elf_msg(elf, " virt_base = 0x%" PRIx64 "\n", parms->virt_base); 76.546 @@ -355,7 +364,7 @@ static int elf_xen_addr_calc_check(struc 76.547 (parms->virt_base > parms->virt_kstart) ) 76.548 { 76.549 elf_err(elf, "%s: ERROR: ELF start or entries are out of bounds.\n", 76.550 - __FUNCTION__); 76.551 + __FUNCTION__); 76.552 return -1; 76.553 } 76.554 76.555 @@ -366,7 +375,7 @@ static int elf_xen_addr_calc_check(struc 76.556 /* glue it all together ... */ 76.557 76.558 int elf_xen_parse(struct elf_binary *elf, 76.559 - struct elf_dom_parms *parms) 76.560 + struct elf_dom_parms *parms) 76.561 { 76.562 const elf_note *note; 76.563 const elf_shdr *shdr; 76.564 @@ -382,39 +391,49 @@ int elf_xen_parse(struct elf_binary *elf 76.565 76.566 /* find and parse elf notes */ 76.567 count = elf_shdr_count(elf); 76.568 - for (i = 0; i < count; i++) 76.569 + for ( i = 0; i < count; i++ ) 76.570 { 76.571 - shdr = elf_shdr_by_index(elf, i); 76.572 - if (0 == strcmp(elf_section_name(elf, shdr), "__xen_guest")) 76.573 - parms->guest_info = elf_section_start(elf, shdr); 76.574 - if (elf_uval(elf, shdr, sh_type) != SHT_NOTE) 76.575 - continue; 76.576 - parms->elf_note_start = elf_section_start(elf, shdr); 76.577 - parms->elf_note_end = elf_section_end(elf, shdr); 76.578 - for (note = parms->elf_note_start; 76.579 - (void *)note < parms->elf_note_end; 76.580 - note = elf_note_next(elf, note)) 76.581 - { 76.582 - if (0 != strcmp(elf_note_name(elf, note), "Xen")) 76.583 - continue; 76.584 - if (0 != elf_xen_parse_note(elf, parms, note)) 76.585 - return -1; 76.586 - xen_elfnotes++; 76.587 - } 76.588 + shdr = elf_shdr_by_index(elf, i); 76.589 + if ( !strcmp(elf_section_name(elf, shdr), "__xen_guest") ) 76.590 + parms->guest_info = elf_section_start(elf, shdr); 76.591 + if ( elf_uval(elf, shdr, sh_type) != SHT_NOTE ) 76.592 + continue; 76.593 + parms->elf_note_start = elf_section_start(elf, shdr); 76.594 + parms->elf_note_end = elf_section_end(elf, shdr); 76.595 + for ( note = parms->elf_note_start; 76.596 + (void *)note < parms->elf_note_end; 76.597 + note = elf_note_next(elf, note) ) 76.598 + { 76.599 + if ( strcmp(elf_note_name(elf, note), "Xen") ) 76.600 + continue; 76.601 + if ( elf_xen_parse_note(elf, parms, note) ) 76.602 + return -1; 76.603 + xen_elfnotes++; 76.604 + } 76.605 } 76.606 76.607 - if (!xen_elfnotes && parms->guest_info) 76.608 + if ( !xen_elfnotes && parms->guest_info ) 76.609 { 76.610 - parms->elf_note_start = NULL; 76.611 - parms->elf_note_end = NULL; 76.612 - elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, 76.613 - parms->guest_info); 76.614 - elf_xen_parse_guest_info(elf, parms); 76.615 + parms->elf_note_start = NULL; 76.616 + parms->elf_note_end = NULL; 76.617 + elf_msg(elf, "%s: __xen_guest: \"%s\"\n", __FUNCTION__, 76.618 + parms->guest_info); 76.619 + elf_xen_parse_guest_info(elf, parms); 76.620 } 76.621 76.622 - if (0 != elf_xen_note_check(elf, parms)) 76.623 - return -1; 76.624 - if (0 != elf_xen_addr_calc_check(elf, parms)) 76.625 - return -1; 76.626 + if ( elf_xen_note_check(elf, parms) != 0 ) 76.627 + return -1; 76.628 + if ( elf_xen_addr_calc_check(elf, parms) != 0 ) 76.629 + return -1; 76.630 return 0; 76.631 } 76.632 + 76.633 +/* 76.634 + * Local variables: 76.635 + * mode: C 76.636 + * c-set-style: "BSD" 76.637 + * c-basic-offset: 4 76.638 + * tab-width: 4 76.639 + * indent-tabs-mode: nil 76.640 + * End: 76.641 + */
77.1 --- a/xen/common/libelf/libelf-loader.c Thu Feb 15 13:13:36 2007 -0700 77.2 +++ b/xen/common/libelf/libelf-loader.c Thu Feb 15 14:09:39 2007 -0700 77.3 @@ -11,10 +11,10 @@ int elf_init(struct elf_binary *elf, con 77.4 const elf_shdr *shdr; 77.5 uint64_t i, count, section, offset; 77.6 77.7 - if (!elf_is_elfbinary(image)) 77.8 + if ( !elf_is_elfbinary(image) ) 77.9 { 77.10 - elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__); 77.11 - return -1; 77.12 + elf_err(elf, "%s: not an ELF binary\n", __FUNCTION__); 77.13 + return -1; 77.14 } 77.15 77.16 memset(elf, 0, sizeof(*elf)); 77.17 @@ -26,46 +26,46 @@ int elf_init(struct elf_binary *elf, con 77.18 77.19 /* sanity check phdr */ 77.20 offset = elf_uval(elf, elf->ehdr, e_phoff) + 77.21 - elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf); 77.22 - if (offset > elf->size) 77.23 + elf_uval(elf, elf->ehdr, e_phentsize) * elf_phdr_count(elf); 77.24 + if ( offset > elf->size ) 77.25 { 77.26 - elf_err(elf, "%s: phdr overflow (off %" PRIx64 " > size %lx)\n", 77.27 - __FUNCTION__, offset, (unsigned long)elf->size); 77.28 - return -1; 77.29 + elf_err(elf, "%s: phdr overflow (off %" PRIx64 " > size %lx)\n", 77.30 + __FUNCTION__, offset, (unsigned long)elf->size); 77.31 + return -1; 77.32 } 77.33 77.34 /* sanity check shdr */ 77.35 offset = elf_uval(elf, elf->ehdr, e_shoff) + 77.36 - elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf); 77.37 - if (offset > elf->size) 77.38 + elf_uval(elf, elf->ehdr, e_shentsize) * elf_shdr_count(elf); 77.39 + if ( offset > elf->size ) 77.40 { 77.41 - elf_err(elf, "%s: shdr overflow (off %" PRIx64 " > size %lx)\n", 77.42 - __FUNCTION__, offset, (unsigned long)elf->size); 77.43 - return -1; 77.44 + elf_err(elf, "%s: shdr overflow (off %" PRIx64 " > size %lx)\n", 77.45 + __FUNCTION__, offset, (unsigned long)elf->size); 77.46 + return -1; 77.47 } 77.48 77.49 /* find section string table */ 77.50 section = elf_uval(elf, elf->ehdr, e_shstrndx); 77.51 shdr = elf_shdr_by_index(elf, section); 77.52 - if (NULL != shdr) 77.53 - elf->sec_strtab = elf_section_start(elf, shdr); 77.54 + if ( shdr != NULL ) 77.55 + elf->sec_strtab = elf_section_start(elf, shdr); 77.56 77.57 /* find symbol table, symbol string table */ 77.58 count = elf_shdr_count(elf); 77.59 - for (i = 0; i < count; i++) 77.60 + for ( i = 0; i < count; i++ ) 77.61 { 77.62 - shdr = elf_shdr_by_index(elf, i); 77.63 - if (elf_uval(elf, shdr, sh_type) != SHT_SYMTAB) 77.64 - continue; 77.65 - elf->sym_tab = shdr; 77.66 - shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); 77.67 - if (NULL == shdr) 77.68 - { 77.69 - elf->sym_tab = NULL; 77.70 - continue; 77.71 - } 77.72 - elf->sym_strtab = elf_section_start(elf, shdr); 77.73 - break; 77.74 + shdr = elf_shdr_by_index(elf, i); 77.75 + if ( elf_uval(elf, shdr, sh_type) != SHT_SYMTAB ) 77.76 + continue; 77.77 + elf->sym_tab = shdr; 77.78 + shdr = elf_shdr_by_index(elf, elf_uval(elf, shdr, sh_link)); 77.79 + if ( shdr == NULL ) 77.80 + { 77.81 + elf->sym_tab = NULL; 77.82 + continue; 77.83 + } 77.84 + elf->sym_strtab = elf_section_start(elf, shdr); 77.85 + break; 77.86 } 77.87 return 0; 77.88 } 77.89 @@ -91,24 +91,24 @@ void elf_parse_binary(struct elf_binary 77.90 uint64_t i, count, paddr, memsz; 77.91 77.92 count = elf_uval(elf, elf->ehdr, e_phnum); 77.93 - for (i = 0; i < count; i++) 77.94 + for ( i = 0; i < count; i++ ) 77.95 { 77.96 - phdr = elf_phdr_by_index(elf, i); 77.97 - if (!elf_phdr_is_loadable(elf, phdr)) 77.98 - continue; 77.99 - paddr = elf_uval(elf, phdr, p_paddr); 77.100 - memsz = elf_uval(elf, phdr, p_memsz); 77.101 - elf_msg(elf, "%s: phdr: paddr=0x%" PRIx64 77.102 - " memsz=0x%" PRIx64 "\n", __FUNCTION__, paddr, memsz); 77.103 - if (low > paddr) 77.104 - low = paddr; 77.105 - if (high < paddr + memsz) 77.106 - high = paddr + memsz; 77.107 + phdr = elf_phdr_by_index(elf, i); 77.108 + if ( !elf_phdr_is_loadable(elf, phdr) ) 77.109 + continue; 77.110 + paddr = elf_uval(elf, phdr, p_paddr); 77.111 + memsz = elf_uval(elf, phdr, p_memsz); 77.112 + elf_msg(elf, "%s: phdr: paddr=0x%" PRIx64 77.113 + " memsz=0x%" PRIx64 "\n", __FUNCTION__, paddr, memsz); 77.114 + if ( low > paddr ) 77.115 + low = paddr; 77.116 + if ( high < paddr + memsz ) 77.117 + high = paddr + memsz; 77.118 } 77.119 elf->pstart = low; 77.120 elf->pend = high; 77.121 elf_msg(elf, "%s: memory: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", 77.122 - __FUNCTION__, elf->pstart, elf->pend); 77.123 + __FUNCTION__, elf->pstart, elf->pend); 77.124 } 77.125 77.126 void elf_load_binary(struct elf_binary *elf) 77.127 @@ -118,18 +118,20 @@ void elf_load_binary(struct elf_binary * 77.128 char *dest; 77.129 77.130 count = elf_uval(elf, elf->ehdr, e_phnum); 77.131 - for (i = 0; i < count; i++) 77.132 + for ( i = 0; i < count; i++ ) 77.133 { 77.134 - phdr = elf_phdr_by_index(elf, i); 77.135 - if (!elf_phdr_is_loadable(elf, phdr)) 77.136 - continue; 77.137 - paddr = elf_uval(elf, phdr, p_paddr); 77.138 - offset = elf_uval(elf, phdr, p_offset); 77.139 - filesz = elf_uval(elf, phdr, p_filesz); 77.140 - memsz = elf_uval(elf, phdr, p_memsz); 77.141 - dest = elf_get_ptr(elf, paddr); 77.142 - memcpy(dest, elf->image + offset, filesz); 77.143 - memset(dest + filesz, 0, memsz - filesz); 77.144 + phdr = elf_phdr_by_index(elf, i); 77.145 + if ( !elf_phdr_is_loadable(elf, phdr) ) 77.146 + continue; 77.147 + paddr = elf_uval(elf, phdr, p_paddr); 77.148 + offset = elf_uval(elf, phdr, p_offset); 77.149 + filesz = elf_uval(elf, phdr, p_filesz); 77.150 + memsz = elf_uval(elf, phdr, p_memsz); 77.151 + dest = elf_get_ptr(elf, paddr); 77.152 + elf_msg(elf, "%s: phdr %" PRIu64 " at 0x%p -> 0x%p\n", 77.153 + __func__, i, dest, dest + filesz); 77.154 + memcpy(dest, elf->image + offset, filesz); 77.155 + memset(dest + filesz, 0, memsz - filesz); 77.156 } 77.157 } 77.158 77.159 @@ -144,13 +146,24 @@ uint64_t elf_lookup_addr(struct elf_bina 77.160 uint64_t value; 77.161 77.162 sym = elf_sym_by_name(elf, symbol); 77.163 - if (NULL == sym) 77.164 + if ( sym == NULL ) 77.165 { 77.166 - elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol); 77.167 - return -1; 77.168 + elf_err(elf, "%s: not found: %s\n", __FUNCTION__, symbol); 77.169 + return -1; 77.170 } 77.171 + 77.172 value = elf_uval(elf, sym, st_value); 77.173 elf_msg(elf, "%s: symbol \"%s\" at 0x%" PRIx64 "\n", __FUNCTION__, 77.174 - symbol, value); 77.175 + symbol, value); 77.176 return value; 77.177 } 77.178 + 77.179 +/* 77.180 + * Local variables: 77.181 + * mode: C 77.182 + * c-set-style: "BSD" 77.183 + * c-basic-offset: 4 77.184 + * tab-width: 4 77.185 + * indent-tabs-mode: nil 77.186 + * End: 77.187 + */
78.1 --- a/xen/common/libelf/libelf-private.h Thu Feb 15 13:13:36 2007 -0700 78.2 +++ b/xen/common/libelf/libelf-private.h Thu Feb 15 14:09:39 2007 -0700 78.3 @@ -12,9 +12,9 @@ 78.4 #include <public/libelf.h> 78.5 78.6 #define elf_msg(elf, fmt, args ... ) \ 78.7 - if (elf->verbose) printk(fmt, ## args ) 78.8 + if (elf->verbose) printk(fmt, ## args ) 78.9 #define elf_err(elf, fmt, args ... ) \ 78.10 - printk(fmt, ## args ) 78.11 + printk(fmt, ## args ) 78.12 78.13 #define strtoull(str, end, base) simple_strtoull(str, end, base) 78.14 #define bswap_16(x) swab16(x) 78.15 @@ -43,12 +43,12 @@ 78.16 #include "xc_private.h" 78.17 78.18 #define elf_msg(elf, fmt, args ... ) \ 78.19 - if (elf->log && elf->verbose) fprintf(elf->log, fmt , ## args ) 78.20 -#define elf_err(elf, fmt, args ... ) do { \ 78.21 - if (elf->log) \ 78.22 - fprintf(elf->log, fmt , ## args ); \ 78.23 - xc_set_error(XC_INVALID_KERNEL, fmt , ## args ); \ 78.24 - } while (0) 78.25 + if (elf->log && elf->verbose) fprintf(elf->log, fmt , ## args ) 78.26 +#define elf_err(elf, fmt, args ... ) do { \ 78.27 + if (elf->log) \ 78.28 + fprintf(elf->log, fmt , ## args ); \ 78.29 + xc_set_error(XC_INVALID_KERNEL, fmt , ## args ); \ 78.30 +} while (0) 78.31 78.32 #define safe_strcpy(d,s) \ 78.33 do { strncpy((d),(s),sizeof((d))-1); \ 78.34 @@ -58,3 +58,13 @@ do { strncpy((d),(s),sizeof((d))-1); 78.35 #endif 78.36 78.37 #endif /* __LIBELF_PRIVATE_H_ */ 78.38 + 78.39 +/* 78.40 + * Local variables: 78.41 + * mode: C 78.42 + * c-set-style: "BSD" 78.43 + * c-basic-offset: 4 78.44 + * tab-width: 4 78.45 + * indent-tabs-mode: nil 78.46 + * End: 78.47 + */
79.1 --- a/xen/common/libelf/libelf-relocate.c Thu Feb 15 13:13:36 2007 -0700 79.2 +++ b/xen/common/libelf/libelf-relocate.c Thu Feb 15 14:09:39 2007 -0700 79.3 @@ -46,22 +46,22 @@ static const char *rel_names_i386[] = { 79.4 }; 79.5 79.6 static int elf_reloc_i386(struct elf_binary *elf, int type, 79.7 - uint64_t addr, uint64_t value) 79.8 + uint64_t addr, uint64_t value) 79.9 { 79.10 void *ptr = elf_get_ptr(elf, addr); 79.11 uint32_t *u32; 79.12 79.13 - switch (type) 79.14 + switch ( type ) 79.15 { 79.16 case 1 /* R_386_32 */ : 79.17 - u32 = ptr; 79.18 - *u32 += elf->reloc_offset; 79.19 - break; 79.20 + u32 = ptr; 79.21 + *u32 += elf->reloc_offset; 79.22 + break; 79.23 case 2 /* R_386_PC32 */ : 79.24 - /* nothing */ 79.25 - break; 79.26 + /* nothing */ 79.27 + break; 79.28 default: 79.29 - return -1; 79.30 + return -1; 79.31 } 79.32 return 0; 79.33 } 79.34 @@ -96,54 +96,57 @@ static const char *rel_names_x86_64[] = 79.35 }; 79.36 79.37 static int elf_reloc_x86_64(struct elf_binary *elf, int type, 79.38 - uint64_t addr, uint64_t value) 79.39 + uint64_t addr, uint64_t value) 79.40 { 79.41 void *ptr = elf_get_ptr(elf, addr); 79.42 uint64_t *u64; 79.43 uint32_t *u32; 79.44 int32_t *s32; 79.45 79.46 - switch (type) 79.47 + switch ( type ) 79.48 { 79.49 case 1 /* R_X86_64_64 */ : 79.50 - u64 = ptr; 79.51 - value += elf->reloc_offset; 79.52 - *u64 = value; 79.53 - break; 79.54 + u64 = ptr; 79.55 + value += elf->reloc_offset; 79.56 + *u64 = value; 79.57 + break; 79.58 case 2 /* R_X86_64_PC32 */ : 79.59 - u32 = ptr; 79.60 - *u32 = value - addr; 79.61 - if (*u32 != (uint32_t) (value - addr)) 79.62 - { 79.63 - elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32 " != 0x%" PRIx32 "\n", 79.64 - *u32, (uint32_t) (value - addr)); 79.65 - return -1; 79.66 - } 79.67 - break; 79.68 + u32 = ptr; 79.69 + *u32 = value - addr; 79.70 + if ( *u32 != (uint32_t)(value - addr) ) 79.71 + { 79.72 + elf_err(elf, "R_X86_64_PC32 overflow: 0x%" PRIx32 79.73 + " != 0x%" PRIx32 "\n", 79.74 + *u32, (uint32_t) (value - addr)); 79.75 + return -1; 79.76 + } 79.77 + break; 79.78 case 10 /* R_X86_64_32 */ : 79.79 - u32 = ptr; 79.80 - value += elf->reloc_offset; 79.81 - *u32 = value; 79.82 - if (*u32 != value) 79.83 - { 79.84 - elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32 " != 0x%" PRIx64 "\n", 79.85 - *u32, value); 79.86 - return -1; 79.87 - } 79.88 - break; 79.89 + u32 = ptr; 79.90 + value += elf->reloc_offset; 79.91 + *u32 = value; 79.92 + if ( *u32 != value ) 79.93 + { 79.94 + elf_err(elf, "R_X86_64_32 overflow: 0x%" PRIx32 79.95 + " != 0x%" PRIx64 "\n", 79.96 + *u32, value); 79.97 + return -1; 79.98 + } 79.99 + break; 79.100 case 11 /* R_X86_64_32S */ : 79.101 - s32 = ptr; 79.102 - value += elf->reloc_offset; 79.103 - *s32 = value; 79.104 - if (*s32 != (int64_t) value) 79.105 - { 79.106 - elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32 " != 0x%" PRIx64 "\n", 79.107 - *s32, (int64_t) value); 79.108 - return -1; 79.109 - } 79.110 - break; 79.111 + s32 = ptr; 79.112 + value += elf->reloc_offset; 79.113 + *s32 = value; 79.114 + if ( *s32 != (int64_t) value ) 79.115 + { 79.116 + elf_err(elf, "R_X86_64_32S overflow: 0x%" PRIx32 79.117 + " != 0x%" PRIx64 "\n", 79.118 + *s32, (int64_t) value); 79.119 + return -1; 79.120 + } 79.121 + break; 79.122 default: 79.123 - return -1; 79.124 + return -1; 79.125 } 79.126 return 0; 79.127 } 79.128 @@ -154,19 +157,19 @@ static struct relocs { 79.129 const char **names; 79.130 int count; 79.131 int (*func) (struct elf_binary * elf, int type, uint64_t addr, 79.132 - uint64_t value); 79.133 + uint64_t value); 79.134 } relocs[] = 79.135 /* *INDENT-OFF* */ 79.136 { 79.137 [EM_386] = { 79.138 - .names = rel_names_i386, 79.139 - .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]), 79.140 - .func = elf_reloc_i386, 79.141 + .names = rel_names_i386, 79.142 + .count = sizeof(rel_names_i386) / sizeof(rel_names_i386[0]), 79.143 + .func = elf_reloc_i386, 79.144 }, 79.145 [EM_X86_64] = { 79.146 - .names = rel_names_x86_64, 79.147 - .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]), 79.148 - .func = elf_reloc_x86_64, 79.149 + .names = rel_names_x86_64, 79.150 + .count = sizeof(rel_names_x86_64) / sizeof(rel_names_x86_64[0]), 79.151 + .func = elf_reloc_x86_64, 79.152 } 79.153 }; 79.154 /* *INDENT-ON* */ 79.155 @@ -175,18 +178,18 @@ static struct relocs { 79.156 79.157 static const char *rela_name(int machine, int type) 79.158 { 79.159 - if (machine > sizeof(relocs) / sizeof(relocs[0])) 79.160 - return "unknown mach"; 79.161 - if (!relocs[machine].names) 79.162 - return "unknown mach"; 79.163 - if (type > relocs[machine].count) 79.164 - return "unknown rela"; 79.165 + if ( machine > sizeof(relocs) / sizeof(relocs[0]) ) 79.166 + return "unknown mach"; 79.167 + if ( !relocs[machine].names ) 79.168 + return "unknown mach"; 79.169 + if ( type > relocs[machine].count ) 79.170 + return "unknown rela"; 79.171 return relocs[machine].names[type]; 79.172 } 79.173 79.174 static int elf_reloc_section(struct elf_binary *elf, 79.175 - const elf_shdr * rels, 79.176 - const elf_shdr * sect, const elf_shdr * syms) 79.177 + const elf_shdr * rels, 79.178 + const elf_shdr * sect, const elf_shdr * syms) 79.179 { 79.180 const void *ptr, *end; 79.181 const elf_shdr *shdr; 79.182 @@ -204,18 +207,18 @@ static int elf_reloc_section(struct elf_ 79.183 int machine; 79.184 79.185 machine = elf_uval(elf, elf->ehdr, e_machine); 79.186 - if (machine >= sizeof(relocs) / sizeof(relocs[0]) || 79.187 - NULL == relocs[machine].func) 79.188 + if ( (machine >= (sizeof(relocs) / sizeof(relocs[0]))) || 79.189 + (relocs[machine].func == NULL) ) 79.190 { 79.191 - elf_err(elf, "%s: can't handle machine %d\n", 79.192 - __FUNCTION__, machine); 79.193 - return -1; 79.194 + elf_err(elf, "%s: can't handle machine %d\n", 79.195 + __FUNCTION__, machine); 79.196 + return -1; 79.197 } 79.198 - if (elf_swap(elf)) 79.199 + if ( elf_swap(elf) ) 79.200 { 79.201 - elf_err(elf, "%s: non-native byte order, relocation not supported\n", 79.202 - __FUNCTION__); 79.203 - return -1; 79.204 + elf_err(elf, "%s: non-native byte order, relocation not supported\n", 79.205 + __FUNCTION__); 79.206 + return -1; 79.207 } 79.208 79.209 s_type = elf_uval(elf, rels, sh_type); 79.210 @@ -223,89 +226,89 @@ static int elf_reloc_section(struct elf_ 79.211 ptr = elf_section_start(elf, rels); 79.212 end = elf_section_end(elf, rels); 79.213 79.214 - for (; ptr < end; ptr += rsize) 79.215 + for ( ; ptr < end; ptr += rsize ) 79.216 { 79.217 - switch (s_type) 79.218 - { 79.219 - case SHT_REL: 79.220 - rel = ptr; 79.221 - r_offset = elf_uval(elf, rel, r_offset); 79.222 - r_info = elf_uval(elf, rel, r_info); 79.223 - r_addend = 0; 79.224 - break; 79.225 - case SHT_RELA: 79.226 - rela = ptr; 79.227 - r_offset = elf_uval(elf, rela, r_offset); 79.228 - r_info = elf_uval(elf, rela, r_info); 79.229 - r_addend = elf_uval(elf, rela, r_addend); 79.230 - break; 79.231 - default: 79.232 - /* can't happen */ 79.233 - return -1; 79.234 - } 79.235 - if (elf_64bit(elf)) 79.236 - { 79.237 - r_type = ELF64_R_TYPE(r_info); 79.238 - r_sym = ELF64_R_SYM(r_info); 79.239 - } 79.240 - else 79.241 - { 79.242 - r_type = ELF32_R_TYPE(r_info); 79.243 - r_sym = ELF32_R_SYM(r_info); 79.244 - } 79.245 + switch ( s_type ) 79.246 + { 79.247 + case SHT_REL: 79.248 + rel = ptr; 79.249 + r_offset = elf_uval(elf, rel, r_offset); 79.250 + r_info = elf_uval(elf, rel, r_info); 79.251 + r_addend = 0; 79.252 + break; 79.253 + case SHT_RELA: 79.254 + rela = ptr; 79.255 + r_offset = elf_uval(elf, rela, r_offset); 79.256 + r_info = elf_uval(elf, rela, r_info); 79.257 + r_addend = elf_uval(elf, rela, r_addend); 79.258 + break; 79.259 + default: 79.260 + /* can't happen */ 79.261 + return -1; 79.262 + } 79.263 + if ( elf_64bit(elf) ) 79.264 + { 79.265 + r_type = ELF64_R_TYPE(r_info); 79.266 + r_sym = ELF64_R_SYM(r_info); 79.267 + } 79.268 + else 79.269 + { 79.270 + r_type = ELF32_R_TYPE(r_info); 79.271 + r_sym = ELF32_R_SYM(r_info); 79.272 + } 79.273 79.274 - sym = elf_sym_by_index(elf, r_sym); 79.275 - shndx = elf_uval(elf, sym, st_shndx); 79.276 - switch (shndx) 79.277 - { 79.278 - case SHN_UNDEF: 79.279 - sname = "*UNDEF*"; 79.280 - sbase = 0; 79.281 - break; 79.282 - case SHN_COMMON: 79.283 - elf_err(elf, "%s: invalid section: %" PRId64 "\n", 79.284 - __FUNCTION__, shndx); 79.285 - return -1; 79.286 - case SHN_ABS: 79.287 - sname = "*ABS*"; 79.288 - sbase = 0; 79.289 - break; 79.290 - default: 79.291 - shdr = elf_shdr_by_index(elf, shndx); 79.292 - if (NULL == shdr) 79.293 - { 79.294 - elf_err(elf, "%s: invalid section: %" PRId64 "\n", 79.295 - __FUNCTION__, shndx); 79.296 - return -1; 79.297 - } 79.298 - sname = elf_section_name(elf, shdr); 79.299 - sbase = elf_uval(elf, shdr, sh_addr); 79.300 - } 79.301 + sym = elf_sym_by_index(elf, r_sym); 79.302 + shndx = elf_uval(elf, sym, st_shndx); 79.303 + switch ( shndx ) 79.304 + { 79.305 + case SHN_UNDEF: 79.306 + sname = "*UNDEF*"; 79.307 + sbase = 0; 79.308 + break; 79.309 + case SHN_COMMON: 79.310 + elf_err(elf, "%s: invalid section: %" PRId64 "\n", 79.311 + __FUNCTION__, shndx); 79.312 + return -1; 79.313 + case SHN_ABS: 79.314 + sname = "*ABS*"; 79.315 + sbase = 0; 79.316 + break; 79.317 + default: 79.318 + shdr = elf_shdr_by_index(elf, shndx); 79.319 + if ( shdr == NULL ) 79.320 + { 79.321 + elf_err(elf, "%s: invalid section: %" PRId64 "\n", 79.322 + __FUNCTION__, shndx); 79.323 + return -1; 79.324 + } 79.325 + sname = elf_section_name(elf, shdr); 79.326 + sbase = elf_uval(elf, shdr, sh_addr); 79.327 + } 79.328 79.329 - addr = r_offset; 79.330 - value = elf_uval(elf, sym, st_value); 79.331 - value += r_addend; 79.332 + addr = r_offset; 79.333 + value = elf_uval(elf, sym, st_value); 79.334 + value += r_addend; 79.335 79.336 - if (elf->log && elf->verbose > 1) 79.337 - { 79.338 - uint64_t st_name = elf_uval(elf, sym, st_name); 79.339 - const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*"; 79.340 + if ( elf->log && (elf->verbose > 1) ) 79.341 + { 79.342 + uint64_t st_name = elf_uval(elf, sym, st_name); 79.343 + const char *name = st_name ? elf->sym_strtab + st_name : "*NONE*"; 79.344 79.345 - elf_msg(elf, 79.346 - "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 "," 79.347 - " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]" 79.348 - " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n", 79.349 - __FUNCTION__, rela_name(machine, r_type), r_type, r_offset, 79.350 - r_addend, name, elf_uval(elf, sym, st_value), sname, sbase, 79.351 - addr, value); 79.352 - } 79.353 + elf_msg(elf, 79.354 + "%s: type %s [%d], off 0x%" PRIx64 ", add 0x%" PRIx64 "," 79.355 + " sym %s [0x%" PRIx64 "], sec %s [0x%" PRIx64 "]" 79.356 + " -> addr 0x%" PRIx64 " value 0x%" PRIx64 "\n", 79.357 + __FUNCTION__, rela_name(machine, r_type), r_type, r_offset, 79.358 + r_addend, name, elf_uval(elf, sym, st_value), sname, sbase, 79.359 + addr, value); 79.360 + } 79.361 79.362 - if (-1 == relocs[machine].func(elf, r_type, addr, value)) 79.363 - { 79.364 - elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n", 79.365 - __FUNCTION__, rela_name(machine, r_type), r_type); 79.366 - return -1; 79.367 - } 79.368 + if ( relocs[machine].func(elf, r_type, addr, value) == -1 ) 79.369 + { 79.370 + elf_err(elf, "%s: unknown/unsupported reloc type %s [%d]\n", 79.371 + __FUNCTION__, rela_name(machine, r_type), r_type); 79.372 + return -1; 79.373 + } 79.374 } 79.375 return 0; 79.376 } 79.377 @@ -316,30 +319,40 @@ int elf_reloc(struct elf_binary *elf) 79.378 uint64_t i, count, type; 79.379 79.380 count = elf_shdr_count(elf); 79.381 - for (i = 0; i < count; i++) 79.382 + for ( i = 0; i < count; i++ ) 79.383 { 79.384 - rels = elf_shdr_by_index(elf, i); 79.385 - type = elf_uval(elf, rels, sh_type); 79.386 - if (type != SHT_REL && type != SHT_RELA) 79.387 - continue; 79.388 + rels = elf_shdr_by_index(elf, i); 79.389 + type = elf_uval(elf, rels, sh_type); 79.390 + if ( (type != SHT_REL) && (type != SHT_RELA) ) 79.391 + continue; 79.392 79.393 - sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info)); 79.394 - syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link)); 79.395 - if (NULL == sect || NULL == syms) 79.396 - continue; 79.397 + sect = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_info)); 79.398 + syms = elf_shdr_by_index(elf, elf_uval(elf, rels, sh_link)); 79.399 + if ( NULL == sect || NULL == syms ) 79.400 + continue; 79.401 79.402 - if (!(elf_uval(elf, sect, sh_flags) & SHF_ALLOC)) 79.403 - { 79.404 - elf_msg(elf, "%s: relocations for %s, skipping\n", 79.405 - __FUNCTION__, elf_section_name(elf, sect)); 79.406 - continue; 79.407 - } 79.408 + if ( !(elf_uval(elf, sect, sh_flags) & SHF_ALLOC) ) 79.409 + { 79.410 + elf_msg(elf, "%s: relocations for %s, skipping\n", 79.411 + __FUNCTION__, elf_section_name(elf, sect)); 79.412 + continue; 79.413 + } 79.414 79.415 - elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n", 79.416 - __FUNCTION__, elf_section_name(elf, sect), 79.417 - elf_uval(elf, sect, sh_addr)); 79.418 - if (0 != elf_reloc_section(elf, rels, sect, syms)) 79.419 - return -1; 79.420 + elf_msg(elf, "%s: relocations for %s @ 0x%" PRIx64 "\n", 79.421 + __FUNCTION__, elf_section_name(elf, sect), 79.422 + elf_uval(elf, sect, sh_addr)); 79.423 + if ( elf_reloc_section(elf, rels, sect, syms) != 0 ) 79.424 + return -1; 79.425 } 79.426 return 0; 79.427 } 79.428 + 79.429 +/* 79.430 + * Local variables: 79.431 + * mode: C 79.432 + * c-set-style: "BSD" 79.433 + * c-basic-offset: 4 79.434 + * tab-width: 4 79.435 + * indent-tabs-mode: nil 79.436 + * End: 79.437 + */
80.1 --- a/xen/common/libelf/libelf-tools.c Thu Feb 15 13:13:36 2007 -0700 80.2 +++ b/xen/common/libelf/libelf-tools.c Thu Feb 15 14:09:39 2007 -0700 80.3 @@ -7,7 +7,7 @@ 80.4 /* ------------------------------------------------------------------------ */ 80.5 80.6 uint64_t elf_access_unsigned(struct elf_binary * elf, const void *ptr, 80.7 - uint64_t offset, size_t size) 80.8 + uint64_t offset, size_t size) 80.9 { 80.10 int need_swap = elf_swap(elf); 80.11 const uint8_t *u8; 80.12 @@ -15,27 +15,27 @@ uint64_t elf_access_unsigned(struct elf_ 80.13 const uint32_t *u32; 80.14 const uint64_t *u64; 80.15 80.16 - switch (size) 80.17 + switch ( size ) 80.18 { 80.19 case 1: 80.20 - u8 = ptr + offset; 80.21 - return *u8; 80.22 + u8 = ptr + offset; 80.23 + return *u8; 80.24 case 2: 80.25 - u16 = ptr + offset; 80.26 - return need_swap ? bswap_16(*u16) : *u16; 80.27 + u16 = ptr + offset; 80.28 + return need_swap ? bswap_16(*u16) : *u16; 80.29 case 4: 80.30 - u32 = ptr + offset; 80.31 - return need_swap ? bswap_32(*u32) : *u32; 80.32 + u32 = ptr + offset; 80.33 + return need_swap ? bswap_32(*u32) : *u32; 80.34 case 8: 80.35 - u64 = ptr + offset; 80.36 - return need_swap ? bswap_64(*u64) : *u64; 80.37 + u64 = ptr + offset; 80.38 + return need_swap ? bswap_64(*u64) : *u64; 80.39 default: 80.40 - return 0; 80.41 + return 0; 80.42 } 80.43 } 80.44 80.45 int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, 80.46 - uint64_t offset, size_t size) 80.47 + uint64_t offset, size_t size) 80.48 { 80.49 int need_swap = elf_swap(elf); 80.50 const int8_t *s8; 80.51 @@ -43,22 +43,22 @@ int64_t elf_access_signed(struct elf_bin 80.52 const int32_t *s32; 80.53 const int64_t *s64; 80.54 80.55 - switch (size) 80.56 + switch ( size ) 80.57 { 80.58 case 1: 80.59 - s8 = ptr + offset; 80.60 - return *s8; 80.61 + s8 = ptr + offset; 80.62 + return *s8; 80.63 case 2: 80.64 - s16 = ptr + offset; 80.65 - return need_swap ? bswap_16(*s16) : *s16; 80.66 + s16 = ptr + offset; 80.67 + return need_swap ? bswap_16(*s16) : *s16; 80.68 case 4: 80.69 - s32 = ptr + offset; 80.70 - return need_swap ? bswap_32(*s32) : *s32; 80.71 + s32 = ptr + offset; 80.72 + return need_swap ? bswap_32(*s32) : *s32; 80.73 case 8: 80.74 - s64 = ptr + offset; 80.75 - return need_swap ? bswap_64(*s64) : *s64; 80.76 + s64 = ptr + offset; 80.77 + return need_swap ? bswap_64(*s64) : *s64; 80.78 default: 80.79 - return 0; 80.80 + return 0; 80.81 } 80.82 } 80.83 80.84 @@ -88,11 +88,12 @@ const elf_shdr *elf_shdr_by_name(struct 80.85 const char *sname; 80.86 int i; 80.87 80.88 - for (i = 0; i < count; i++) { 80.89 - shdr = elf_shdr_by_index(elf, i); 80.90 - sname = elf_section_name(elf, shdr); 80.91 - if (sname && 0 == strcmp(sname, name)) 80.92 - return shdr; 80.93 + for ( i = 0; i < count; i++ ) 80.94 + { 80.95 + shdr = elf_shdr_by_index(elf, i); 80.96 + sname = elf_section_name(elf, shdr); 80.97 + if ( sname && !strcmp(sname, name) ) 80.98 + return shdr; 80.99 } 80.100 return NULL; 80.101 } 80.102 @@ -100,31 +101,35 @@ const elf_shdr *elf_shdr_by_name(struct 80.103 const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index) 80.104 { 80.105 uint64_t count = elf_shdr_count(elf); 80.106 - const void *ptr = NULL; 80.107 + const void *ptr; 80.108 80.109 - if (index < count) 80.110 - ptr = elf->image 80.111 - + elf_uval(elf, elf->ehdr, e_shoff) 80.112 - + elf_uval(elf, elf->ehdr, e_shentsize) * index; 80.113 + if ( index >= count ) 80.114 + return NULL; 80.115 + 80.116 + ptr = (elf->image 80.117 + + elf_uval(elf, elf->ehdr, e_shoff) 80.118 + + elf_uval(elf, elf->ehdr, e_shentsize) * index); 80.119 return ptr; 80.120 } 80.121 80.122 const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index) 80.123 { 80.124 uint64_t count = elf_uval(elf, elf->ehdr, e_phnum); 80.125 - const void *ptr = NULL; 80.126 + const void *ptr; 80.127 80.128 - if (index < count) 80.129 - ptr = elf->image 80.130 - + elf_uval(elf, elf->ehdr, e_phoff) 80.131 - + elf_uval(elf, elf->ehdr, e_phentsize) * index; 80.132 + if ( index >= count ) 80.133 + return NULL; 80.134 + 80.135 + ptr = (elf->image 80.136 + + elf_uval(elf, elf->ehdr, e_phoff) 80.137 + + elf_uval(elf, elf->ehdr, e_phentsize) * index); 80.138 return ptr; 80.139 } 80.140 80.141 const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr) 80.142 { 80.143 - if (NULL == elf->sec_strtab) 80.144 - return "unknown"; 80.145 + if ( elf->sec_strtab == NULL ) 80.146 + return "unknown"; 80.147 return elf->sec_strtab + elf_uval(elf, shdr, sh_name); 80.148 } 80.149 80.150 @@ -136,7 +141,7 @@ const void *elf_section_start(struct elf 80.151 const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr) 80.152 { 80.153 return elf->image 80.154 - + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size); 80.155 + + elf_uval(elf, shdr, sh_offset) + elf_uval(elf, shdr, sh_size); 80.156 } 80.157 80.158 const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol) 80.159 @@ -146,16 +151,16 @@ const elf_sym *elf_sym_by_name(struct el 80.160 const elf_sym *sym; 80.161 uint64_t info, name; 80.162 80.163 - for (; ptr < end; ptr += elf_size(elf, sym)) 80.164 + for ( ; ptr < end; ptr += elf_size(elf, sym) ) 80.165 { 80.166 - sym = ptr; 80.167 - info = elf_uval(elf, sym, st_info); 80.168 - name = elf_uval(elf, sym, st_name); 80.169 - if (ELF32_ST_BIND(info) != STB_GLOBAL) 80.170 - continue; 80.171 - if (strcmp(elf->sym_strtab + name, symbol) != 0) 80.172 - continue; 80.173 - return sym; 80.174 + sym = ptr; 80.175 + info = elf_uval(elf, sym, st_info); 80.176 + name = elf_uval(elf, sym, st_name); 80.177 + if ( ELF32_ST_BIND(info) != STB_GLOBAL ) 80.178 + continue; 80.179 + if ( strcmp(elf->sym_strtab + name, symbol) ) 80.180 + continue; 80.181 + return sym; 80.182 } 80.183 return NULL; 80.184 } 80.185 @@ -192,9 +197,9 @@ uint64_t elf_note_numeric(struct elf_bin 80.186 case 2: 80.187 case 4: 80.188 case 8: 80.189 - return elf_access_unsigned(elf, desc, 0, descsz); 80.190 + return elf_access_unsigned(elf, desc, 0, descsz); 80.191 default: 80.192 - return 0; 80.193 + return 0; 80.194 } 80.195 } 80.196 const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note) 80.197 @@ -211,9 +216,7 @@ int elf_is_elfbinary(const void *image) 80.198 { 80.199 const Elf32_Ehdr *ehdr = image; 80.200 80.201 - if (IS_ELF(*ehdr)) 80.202 - return 1; 80.203 - return 0; 80.204 + return IS_ELF(*ehdr); 80.205 } 80.206 80.207 int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr) 80.208 @@ -223,3 +226,13 @@ int elf_phdr_is_loadable(struct elf_bina 80.209 80.210 return ((p_type == PT_LOAD) && (p_flags & (PF_W | PF_X)) != 0); 80.211 } 80.212 + 80.213 +/* 80.214 + * Local variables: 80.215 + * mode: C 80.216 + * c-set-style: "BSD" 80.217 + * c-basic-offset: 4 80.218 + * tab-width: 4 80.219 + * indent-tabs-mode: nil 80.220 + * End: 80.221 + */
81.1 --- a/xen/include/asm-powerpc/config.h Thu Feb 15 13:13:36 2007 -0700 81.2 +++ b/xen/include/asm-powerpc/config.h Thu Feb 15 14:09:39 2007 -0700 81.3 @@ -13,7 +13,7 @@ 81.4 * along with this program; if not, write to the Free Software 81.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 81.6 * 81.7 - * Copyright (C) IBM Corp. 2005 81.8 + * Copyright IBM Corp. 2005, 2006, 2007 81.9 * 81.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 81.11 */ 81.12 @@ -34,9 +34,6 @@ 81.13 81.14 #define U(x) (x ## U) 81.15 #define UL(x) (x ## UL) 81.16 -extern char _start[]; 81.17 -extern char _end[]; 81.18 -extern char _etext[]; 81.19 extern char __bss_start[]; 81.20 #endif 81.21
82.1 --- a/xen/include/asm-powerpc/domain.h Thu Feb 15 13:13:36 2007 -0700 82.2 +++ b/xen/include/asm-powerpc/domain.h Thu Feb 15 14:09:39 2007 -0700 82.3 @@ -107,13 +107,6 @@ extern void load_segments(struct vcpu *) 82.4 extern void save_float(struct vcpu *); 82.5 extern void load_float(struct vcpu *); 82.6 82.7 -#define RMA_SHARED_INFO 1 82.8 -#define RMA_START_INFO 2 82.9 -#define RMA_LAST_DOM0 2 82.10 -/* these are not used for dom0 so they should be last */ 82.11 -#define RMA_CONSOLE 3 82.12 -#define RMA_LAST_DOMU 3 82.13 - 82.14 #define rma_size(rma_order) (1UL << ((rma_order) + PAGE_SHIFT)) 82.15 82.16 static inline ulong rma_addr(struct arch_domain *ad, int type)
83.1 --- a/xen/include/asm-powerpc/init.h Thu Feb 15 13:13:36 2007 -0700 83.2 +++ b/xen/include/asm-powerpc/init.h Thu Feb 15 14:09:39 2007 -0700 83.3 @@ -13,7 +13,7 @@ 83.4 * along with this program; if not, write to the Free Software 83.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 83.6 * 83.7 - * Copyright (C) IBM Corp. 2006 83.8 + * Copyright IBM Corp. 2006, 2007 83.9 * 83.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 83.11 */ 83.12 @@ -27,14 +27,14 @@ typedef void (*hcall_handler_t)(struct c 83.13 typedef struct { 83.14 unsigned long number; 83.15 hcall_handler_t handler; 83.16 -} inithcall_t; 83.17 -extern inithcall_t __inithcall_start, __inithcall_end; 83.18 +} init_hcall_t; 83.19 +extern init_hcall_t __init_hcall_start, __init_hcall_end; 83.20 83.21 #define __init_papr_hcall(nr, fn) \ 83.22 - static inithcall_t __inithcall_##fn __init_hcall \ 83.23 + static init_hcall_t __init_hcall_##fn __init_hcall \ 83.24 = { .number = nr, .handler = fn } 83.25 83.26 #define __init_hcall \ 83.27 - __attribute_used__ __attribute__ ((__section__ (".inithcall.text"))) 83.28 + __attribute_used__ __attribute__ ((__section__ (".init_hcall.init"))) 83.29 83.30 #endif /* _XEN_ASM_INIT_H */
84.1 --- a/xen/include/asm-powerpc/mm.h Thu Feb 15 13:13:36 2007 -0700 84.2 +++ b/xen/include/asm-powerpc/mm.h Thu Feb 15 14:09:39 2007 -0700 84.3 @@ -13,7 +13,7 @@ 84.4 * along with this program; if not, write to the Free Software 84.5 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 84.6 * 84.7 - * Copyright (C) IBM Corp. 2005, 2006 84.8 + * Copyright IBM Corp. 2005, 2006, 2007 84.9 * 84.10 * Authors: Hollis Blanchard <hollisb@us.ibm.com> 84.11 * Jimi Xenidis <jimix@watson.ibm.com> 84.12 @@ -35,6 +35,7 @@ 84.13 #define memguard_unguard_range(_p,_l) ((void)0) 84.14 84.15 extern unsigned long xenheap_phys_end; 84.16 +extern int boot_of_mem_avail(int pos, ulong *start, ulong *end); 84.17 84.18 /* 84.19 * Per-page-frame information.
85.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 85.2 +++ b/xen/include/asm-powerpc/nmi.h Thu Feb 15 14:09:39 2007 -0700 85.3 @@ -0,0 +1,6 @@ 85.4 +#ifndef ASM_NMI_H 85.5 +#define ASM_NMI_H 85.6 + 85.7 +#include <public/nmi.h> 85.8 + 85.9 +#endif /* ASM_NMI_H */
86.1 --- a/xen/include/asm-powerpc/processor.h Thu Feb 15 13:13:36 2007 -0700 86.2 +++ b/xen/include/asm-powerpc/processor.h Thu Feb 15 14:09:39 2007 -0700 86.3 @@ -125,6 +125,7 @@ extern uint cpu_large_page_orders(uint * 86.4 extern void cpu_initialize(int cpuid); 86.5 extern void cpu_init_vcpu(struct vcpu *); 86.6 extern int cpu_io_mfn(ulong mfn); 86.7 +extern int cpu_threads(int cpuid); 86.8 extern void save_cpu_sprs(struct vcpu *); 86.9 extern void load_cpu_sprs(struct vcpu *); 86.10 extern void flush_segments(void);
87.1 --- a/xen/include/asm-x86/domain.h Thu Feb 15 13:13:36 2007 -0700 87.2 +++ b/xen/include/asm-x86/domain.h Thu Feb 15 14:09:39 2007 -0700 87.3 @@ -58,19 +58,22 @@ extern void toggle_guest_mode(struct vcp 87.4 */ 87.5 extern void hypercall_page_initialise(struct domain *d, void *); 87.6 87.7 +/************************************************/ 87.8 +/* shadow paging extension */ 87.9 +/************************************************/ 87.10 struct shadow_domain { 87.11 - u32 mode; /* flags to control shadow operation */ 87.12 spinlock_t lock; /* shadow domain lock */ 87.13 int locker; /* processor which holds the lock */ 87.14 const char *locker_function; /* Func that took it */ 87.15 + unsigned int opt_flags; /* runtime tunable optimizations on/off */ 87.16 + struct list_head pinned_shadows; 87.17 + 87.18 + /* Memory allocation */ 87.19 struct list_head freelists[SHADOW_MAX_ORDER + 1]; 87.20 struct list_head p2m_freelist; 87.21 - struct list_head p2m_inuse; 87.22 - struct list_head pinned_shadows; 87.23 unsigned int total_pages; /* number of pages allocated */ 87.24 unsigned int free_pages; /* number of pages on freelists */ 87.25 - unsigned int p2m_pages; /* number of pages in p2m map */ 87.26 - unsigned int opt_flags; /* runtime tunable optimizations on/off */ 87.27 + unsigned int p2m_pages; /* number of pages allocates to p2m */ 87.28 87.29 /* Shadow hashtable */ 87.30 struct shadow_page_info **hash_table; 87.31 @@ -85,6 +88,65 @@ struct shadow_domain { 87.32 unsigned int dirty_count; 87.33 }; 87.34 87.35 +struct shadow_vcpu { 87.36 +#if CONFIG_PAGING_LEVELS >= 3 87.37 + /* PAE guests: per-vcpu shadow top-level table */ 87.38 + l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); 87.39 + /* PAE guests: per-vcpu cache of the top-level *guest* entries */ 87.40 + l3_pgentry_t gl3e[4] __attribute__((__aligned__(32))); 87.41 +#endif 87.42 + /* Non-PAE guests: pointer to guest top-level pagetable */ 87.43 + void *guest_vtable; 87.44 + /* Last MFN that we emulated a write to. */ 87.45 + unsigned long last_emulated_mfn; 87.46 + /* MFN of the last shadow that we shot a writeable mapping in */ 87.47 + unsigned long last_writeable_pte_smfn; 87.48 +}; 87.49 + 87.50 +/************************************************/ 87.51 +/* p2m handling */ 87.52 +/************************************************/ 87.53 + 87.54 +struct p2m_domain { 87.55 + /* Lock that protects updates to the p2m */ 87.56 + spinlock_t lock; 87.57 + int locker; /* processor which holds the lock */ 87.58 + const char *locker_function; /* Func that took it */ 87.59 + 87.60 + /* Pages used to construct the p2m */ 87.61 + struct list_head pages; 87.62 + 87.63 + /* Functions to call to get or free pages for the p2m */ 87.64 + struct page_info * (*alloc_page )(struct domain *d); 87.65 + void (*free_page )(struct domain *d, 87.66 + struct page_info *pg); 87.67 + 87.68 + /* Highest guest frame that's ever been mapped in the p2m */ 87.69 + unsigned long max_mapped_pfn; 87.70 +}; 87.71 + 87.72 +/************************************************/ 87.73 +/* common paging data structure */ 87.74 +/************************************************/ 87.75 +struct paging_domain { 87.76 + u32 mode; /* flags to control paging operation */ 87.77 + 87.78 + /* extension for shadow paging support */ 87.79 + struct shadow_domain shadow; 87.80 + 87.81 + /* Other paging assistance code will have structs here */ 87.82 +}; 87.83 + 87.84 +struct paging_vcpu { 87.85 + /* Pointers to mode-specific entry points. */ 87.86 + struct paging_mode *mode; 87.87 + /* HVM guest: paging enabled (CR0.PG)? */ 87.88 + unsigned int translate_enabled:1; 87.89 + 87.90 + /* paging support extension */ 87.91 + struct shadow_vcpu shadow; 87.92 +}; 87.93 + 87.94 struct arch_domain 87.95 { 87.96 l1_pgentry_t *mm_perdomain_pt; 87.97 @@ -108,12 +170,11 @@ struct arch_domain 87.98 87.99 struct hvm_domain hvm_domain; 87.100 87.101 - struct shadow_domain shadow; 87.102 + struct paging_domain paging; 87.103 + struct p2m_domain p2m ; 87.104 87.105 /* Shadow translated domain: P2M mapping */ 87.106 pagetable_t phys_table; 87.107 - /* Highest guest frame that's ever been mapped in the p2m */ 87.108 - unsigned long max_mapped_pfn; 87.109 87.110 /* Pseudophysical e820 map (XENMEM_memory_map). */ 87.111 struct e820entry e820[3]; 87.112 @@ -139,21 +200,6 @@ struct pae_l3_cache { }; 87.113 #define pae_l3_cache_init(c) ((void)0) 87.114 #endif 87.115 87.116 -struct shadow_vcpu { 87.117 -#if CONFIG_PAGING_LEVELS >= 3 87.118 - /* PAE guests: per-vcpu shadow top-level table */ 87.119 - l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); 87.120 -#endif 87.121 - /* Pointers to mode-specific entry points. */ 87.122 - struct shadow_paging_mode *mode; 87.123 - /* Last MFN that we emulated a write to. */ 87.124 - unsigned long last_emulated_mfn; 87.125 - /* MFN of the last shadow that we shot a writeable mapping in */ 87.126 - unsigned long last_writeable_pte_smfn; 87.127 - /* HVM guest: paging enabled (CR0.PG)? */ 87.128 - unsigned int translate_enabled:1; 87.129 -}; 87.130 - 87.131 struct arch_vcpu 87.132 { 87.133 /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ 87.134 @@ -200,12 +246,10 @@ struct arch_vcpu 87.135 pagetable_t monitor_table; /* (MFN) hypervisor PT (for HVM) */ 87.136 unsigned long cr3; /* (MA) value to install in HW CR3 */ 87.137 87.138 - void *guest_vtable; /* virtual addr of pagetable */ 87.139 - 87.140 /* Current LDT details. */ 87.141 unsigned long shadow_ldt_mapcnt; 87.142 87.143 - struct shadow_vcpu shadow; 87.144 + struct paging_vcpu paging; 87.145 } __cacheline_aligned; 87.146 87.147 /* shorthands to improve code legibility */
88.1 --- a/xen/include/asm-x86/mm.h Thu Feb 15 13:13:36 2007 -0700 88.2 +++ b/xen/include/asm-x86/mm.h Thu Feb 15 14:09:39 2007 -0700 88.3 @@ -246,6 +246,64 @@ pae_copy_root(struct vcpu *v, l3_pgentry 88.4 88.5 int check_descriptor(const struct domain *, struct desc_struct *d); 88.6 88.7 + 88.8 +/****************************************************************************** 88.9 + * With shadow pagetables, the different kinds of address start 88.10 + * to get get confusing. 88.11 + * 88.12 + * Virtual addresses are what they usually are: the addresses that are used 88.13 + * to accessing memory while the guest is running. The MMU translates from 88.14 + * virtual addresses to machine addresses. 88.15 + * 88.16 + * (Pseudo-)physical addresses are the abstraction of physical memory the 88.17 + * guest uses for allocation and so forth. For the purposes of this code, 88.18 + * we can largely ignore them. 88.19 + * 88.20 + * Guest frame numbers (gfns) are the entries that the guest puts in its 88.21 + * pagetables. For normal paravirtual guests, they are actual frame numbers, 88.22 + * with the translation done by the guest. 88.23 + * 88.24 + * Machine frame numbers (mfns) are the entries that the hypervisor puts 88.25 + * in the shadow page tables. 88.26 + * 88.27 + * Elsewhere in the xen code base, the name "gmfn" is generally used to refer 88.28 + * to a "machine frame number, from the guest's perspective", or in other 88.29 + * words, pseudo-physical frame numbers. However, in the shadow code, the 88.30 + * term "gmfn" means "the mfn of a guest page"; this combines naturally with 88.31 + * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a 88.32 + * guest L2 page), etc... 88.33 + */ 88.34 + 88.35 +/* With this defined, we do some ugly things to force the compiler to 88.36 + * give us type safety between mfns and gfns and other integers. 88.37 + * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions 88.38 + * that translate beween int and foo_t. 88.39 + * 88.40 + * It does have some performance cost because the types now have 88.41 + * a different storage attribute, so may not want it on all the time. */ 88.42 + 88.43 +#ifndef NDEBUG 88.44 +#define TYPE_SAFETY 1 88.45 +#endif 88.46 + 88.47 +#ifdef TYPE_SAFETY 88.48 +#define TYPE_SAFE(_type,_name) \ 88.49 +typedef struct { _type _name; } _name##_t; \ 88.50 +static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ 88.51 +static inline _type _name##_x(_name##_t n) { return n._name; } 88.52 +#else 88.53 +#define TYPE_SAFE(_type,_name) \ 88.54 +typedef _type _name##_t; \ 88.55 +static inline _name##_t _##_name(_type n) { return n; } \ 88.56 +static inline _type _name##_x(_name##_t n) { return n; } 88.57 +#endif 88.58 + 88.59 +TYPE_SAFE(unsigned long,mfn); 88.60 + 88.61 +/* Macro for printk formats: use as printk("%"PRI_mfn"\n", mfn_x(foo)); */ 88.62 +#define PRI_mfn "05lx" 88.63 + 88.64 + 88.65 /* 88.66 * The MPT (machine->physical mapping table) is an array of word-sized 88.67 * values, indexed on machine frame number. It is expected that guest OSes 88.68 @@ -269,13 +327,12 @@ int check_descriptor(const struct domain 88.69 #endif 88.70 #define get_gpfn_from_mfn(mfn) (machine_to_phys_mapping[(mfn)]) 88.71 88.72 - 88.73 #define mfn_to_gmfn(_d, mfn) \ 88.74 - ( (shadow_mode_translate(_d)) \ 88.75 + ( (paging_mode_translate(_d)) \ 88.76 ? get_gpfn_from_mfn(mfn) \ 88.77 : (mfn) ) 88.78 88.79 -#define gmfn_to_mfn(_d, gpfn) mfn_x(sh_gfn_to_mfn(_d, gpfn)) 88.80 +#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn)) 88.81 88.82 #define INVALID_MFN (~0UL) 88.83
89.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 89.2 +++ b/xen/include/asm-x86/p2m.h Thu Feb 15 14:09:39 2007 -0700 89.3 @@ -0,0 +1,142 @@ 89.4 +/****************************************************************************** 89.5 + * include/asm-x86/paging.h 89.6 + * 89.7 + * physical-to-machine mappings for automatically-translated domains. 89.8 + * 89.9 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 89.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 89.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 89.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 89.13 + * 89.14 + * This program is free software; you can redistribute it and/or modify 89.15 + * it under the terms of the GNU General Public License as published by 89.16 + * the Free Software Foundation; either version 2 of the License, or 89.17 + * (at your option) any later version. 89.18 + * 89.19 + * This program is distributed in the hope that it will be useful, 89.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 89.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 89.22 + * GNU General Public License for more details. 89.23 + * 89.24 + * You should have received a copy of the GNU General Public License 89.25 + * along with this program; if not, write to the Free Software 89.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 89.27 + */ 89.28 + 89.29 +#ifndef _XEN_P2M_H 89.30 +#define _XEN_P2M_H 89.31 + 89.32 + 89.33 +/* The phys_to_machine_mapping is the reversed mapping of MPT for full 89.34 + * virtualization. It is only used by shadow_mode_translate()==true 89.35 + * guests, so we steal the address space that would have normally 89.36 + * been used by the read-only MPT map. 89.37 + */ 89.38 +#define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 89.39 + 89.40 + 89.41 +/* Read the current domain's P2M table. */ 89.42 +static inline mfn_t gfn_to_mfn_current(unsigned long gfn) 89.43 +{ 89.44 + l1_pgentry_t l1e = l1e_empty(); 89.45 + int ret; 89.46 + 89.47 + if ( gfn > current->domain->arch.p2m.max_mapped_pfn ) 89.48 + return _mfn(INVALID_MFN); 89.49 + 89.50 + /* Don't read off the end of the p2m table */ 89.51 + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); 89.52 + 89.53 + ret = __copy_from_user(&l1e, 89.54 + &phys_to_machine_mapping[gfn], 89.55 + sizeof(l1e)); 89.56 + 89.57 + if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) 89.58 + return _mfn(l1e_get_pfn(l1e)); 89.59 + 89.60 + return _mfn(INVALID_MFN); 89.61 +} 89.62 + 89.63 +/* Read another domain's P2M table, mapping pages as we go */ 89.64 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 89.65 + 89.66 +/* General conversion function from gfn to mfn */ 89.67 +static inline mfn_t gfn_to_mfn(struct domain *d, unsigned long gfn) 89.68 +{ 89.69 + if ( !paging_mode_translate(d) ) 89.70 + return _mfn(gfn); 89.71 + if ( likely(current->domain == d) ) 89.72 + return gfn_to_mfn_current(gfn); 89.73 + else 89.74 + return gfn_to_mfn_foreign(d, gfn); 89.75 +} 89.76 + 89.77 +/* General conversion function from mfn to gfn */ 89.78 +static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn) 89.79 +{ 89.80 + if ( paging_mode_translate(d) ) 89.81 + return get_gpfn_from_mfn(mfn_x(mfn)); 89.82 + else 89.83 + return mfn_x(mfn); 89.84 +} 89.85 + 89.86 +/* Compatibility function for HVM code */ 89.87 +static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) 89.88 +{ 89.89 + return mfn_x(gfn_to_mfn_current(pfn)); 89.90 +} 89.91 + 89.92 +/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ 89.93 +static inline int mmio_space(paddr_t gpa) 89.94 +{ 89.95 + unsigned long gfn = gpa >> PAGE_SHIFT; 89.96 + return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); 89.97 +} 89.98 + 89.99 +/* Translate the frame number held in an l1e from guest to machine */ 89.100 +static inline l1_pgentry_t 89.101 +gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 89.102 +{ 89.103 + if ( unlikely(paging_mode_translate(d)) ) 89.104 + l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), 89.105 + l1e_get_flags(l1e)); 89.106 + return l1e; 89.107 +} 89.108 + 89.109 + 89.110 + 89.111 +/* Init the datastructures for later use by the p2m code */ 89.112 +void p2m_init(struct domain *d); 89.113 + 89.114 +/* Allocate a new p2m table for a domain. 89.115 + * 89.116 + * The alloc_page and free_page functions will be used to get memory to 89.117 + * build the p2m, and to release it again at the end of day. 89.118 + * 89.119 + * Returns 0 for success or -errno. */ 89.120 +int p2m_alloc_table(struct domain *d, 89.121 + struct page_info * (*alloc_page)(struct domain *d), 89.122 + void (*free_page)(struct domain *d, struct page_info *pg)); 89.123 + 89.124 +/* Return all the p2m resources to Xen. */ 89.125 +void p2m_teardown(struct domain *d); 89.126 + 89.127 +/* Add a page to a domain's p2m table */ 89.128 +void guest_physmap_add_page(struct domain *d, unsigned long gfn, 89.129 + unsigned long mfn); 89.130 + 89.131 +/* Remove a page from a domain's p2m table */ 89.132 +void guest_physmap_remove_page(struct domain *d, unsigned long gfn, 89.133 + unsigned long mfn); 89.134 + 89.135 + 89.136 +#endif /* _XEN_P2M_H */ 89.137 + 89.138 +/* 89.139 + * Local variables: 89.140 + * mode: C 89.141 + * c-set-style: "BSD" 89.142 + * c-basic-offset: 4 89.143 + * indent-tabs-mode: nil 89.144 + * End: 89.145 + */
90.1 --- a/xen/include/asm-x86/page.h Thu Feb 15 13:13:36 2007 -0700 90.2 +++ b/xen/include/asm-x86/page.h Thu Feb 15 14:09:39 2007 -0700 90.3 @@ -208,8 +208,10 @@ typedef struct { u64 pfn; } pagetable_t; 90.4 #define pagetable_get_paddr(x) ((paddr_t)(x).pfn << PAGE_SHIFT) 90.5 #define pagetable_get_page(x) mfn_to_page((x).pfn) 90.6 #define pagetable_get_pfn(x) ((x).pfn) 90.7 +#define pagetable_get_mfn(x) _mfn(((x).pfn)) 90.8 #define pagetable_is_null(x) ((x).pfn == 0) 90.9 #define pagetable_from_pfn(pfn) ((pagetable_t) { (pfn) }) 90.10 +#define pagetable_from_mfn(mfn) ((pagetable_t) { mfn_x(mfn) }) 90.11 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) 90.12 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) 90.13 #define pagetable_null() pagetable_from_pfn(0) 90.14 @@ -276,9 +278,6 @@ typedef struct { u64 pfn; } pagetable_t; 90.15 #define __linear_l4_table \ 90.16 ((l4_pgentry_t *)(__linear_l3_table + l3_linear_offset(LINEAR_PT_VIRT_START))) 90.17 90.18 -#define linear_l1_table __linear_l1_table 90.19 -#define linear_pg_table linear_l1_table 90.20 -#define linear_l2_table(v) ((l2_pgentry_t *)(v)->arch.guest_vtable) 90.21 90.22 #ifndef __ASSEMBLY__ 90.23 #if CONFIG_PAGING_LEVELS == 3
91.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 91.2 +++ b/xen/include/asm-x86/paging.h Thu Feb 15 14:09:39 2007 -0700 91.3 @@ -0,0 +1,376 @@ 91.4 +/****************************************************************************** 91.5 + * include/asm-x86/paging.h 91.6 + * 91.7 + * Common interface for paging support 91.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 91.9 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 91.10 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 91.11 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 91.12 + * 91.13 + * This program is free software; you can redistribute it and/or modify 91.14 + * it under the terms of the GNU General Public License as published by 91.15 + * the Free Software Foundation; either version 2 of the License, or 91.16 + * (at your option) any later version. 91.17 + * 91.18 + * This program is distributed in the hope that it will be useful, 91.19 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 91.20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 91.21 + * GNU General Public License for more details. 91.22 + * 91.23 + * You should have received a copy of the GNU General Public License 91.24 + * along with this program; if not, write to the Free Software 91.25 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 91.26 + */ 91.27 + 91.28 +#ifndef _XEN_PAGING_H 91.29 +#define _XEN_PAGING_H 91.30 + 91.31 +#include <xen/mm.h> 91.32 +#include <public/domctl.h> 91.33 +#include <xen/sched.h> 91.34 +#include <xen/perfc.h> 91.35 +#include <xen/domain_page.h> 91.36 +#include <asm/flushtlb.h> 91.37 +#include <asm/domain.h> 91.38 + 91.39 +/***************************************************************************** 91.40 + * Macros to tell which paging mode a domain is in */ 91.41 + 91.42 +#define PG_SH_shift 20 91.43 +#define PG_HAP_shift 21 91.44 +/* We're in one of the shadow modes */ 91.45 +#define PG_SH_enable (1U << PG_SH_shift) 91.46 +#define PG_HAP_enable (1U << PG_HAP_shift) 91.47 + 91.48 +/* common paging mode bits */ 91.49 +#define PG_mode_shift 10 91.50 +/* Refcounts based on shadow tables instead of guest tables */ 91.51 +#define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift) 91.52 +/* Enable log dirty mode */ 91.53 +#define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift) 91.54 +/* Xen does p2m translation, not guest */ 91.55 +#define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift) 91.56 +/* Xen does not steal address space from the domain for its own booking; 91.57 + * requires VT or similar mechanisms */