ia64/xen-unstable

changeset 17161:71a8366fb212

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Fri Feb 29 09:19:58 2008 -0700 (2008-02-29)
parents 0b20ac6ec64a 9049b0b62e08
children 5ea409d66ee4
files xen/arch/ia64/xen/machine_kexec.c
line diff
     1.1 --- a/extras/mini-os/fbfront.c	Fri Feb 29 09:18:01 2008 -0700
     1.2 +++ b/extras/mini-os/fbfront.c	Fri Feb 29 09:19:58 2008 -0700
     1.3 @@ -31,13 +31,6 @@ struct kbdfront_dev {
     1.4      char *nodename;
     1.5      char *backend;
     1.6  
     1.7 -    char *data;
     1.8 -    int width;
     1.9 -    int height;
    1.10 -    int depth;
    1.11 -    int line_length;
    1.12 -    int mem_length;
    1.13 -
    1.14  #ifdef HAVE_LIBC
    1.15      int fd;
    1.16  #endif
    1.17 @@ -316,7 +309,10 @@ struct fbfront_dev *init_fbfront(char *n
    1.18      for (i = 0; mapped < mem_length && i < max_pd; i++) {
    1.19          unsigned long *pd = (unsigned long *) alloc_page();
    1.20          for (j = 0; mapped < mem_length && j < PAGE_SIZE / sizeof(unsigned long); j++) {
    1.21 -            pd[j] = virt_to_mfn((unsigned long) data + mapped);
    1.22 +            /* Trigger CoW */
    1.23 +            * ((char *)data + mapped) = 0;
    1.24 +            barrier();
    1.25 +            pd[j] = virtual_to_mfn((unsigned long) data + mapped);
    1.26              mapped += PAGE_SIZE;
    1.27          }
    1.28          for ( ; j < PAGE_SIZE / sizeof(unsigned long); j++)
     2.1 --- a/extras/mini-os/include/fbfront.h	Fri Feb 29 09:18:01 2008 -0700
     2.2 +++ b/extras/mini-os/include/fbfront.h	Fri Feb 29 09:19:58 2008 -0700
     2.3 @@ -14,6 +14,9 @@
     2.4  #ifndef KEY_Q
     2.5  #define KEY_Q 16
     2.6  #endif
     2.7 +#ifndef KEY_MAX
     2.8 +#define KEY_MAX 0x1ff
     2.9 +#endif
    2.10  
    2.11  
    2.12  struct kbdfront_dev;
     3.1 --- a/extras/mini-os/kernel.c	Fri Feb 29 09:18:01 2008 -0700
     3.2 +++ b/extras/mini-os/kernel.c	Fri Feb 29 09:19:58 2008 -0700
     3.3 @@ -360,13 +360,13 @@ static void kbdfront_thread(void *p)
     3.4                  refresh_cursor(x, y);
     3.5                  break;
     3.6              case XENKBD_TYPE_POS:
     3.7 -                printk("pos x:%d y:%d z:%d\n",
     3.8 +                printk("pos x:%d y:%d dz:%d\n",
     3.9                          event.pos.abs_x,
    3.10                          event.pos.abs_y,
    3.11 -                        event.pos.abs_z);
    3.12 +                        event.pos.rel_z);
    3.13                  x = event.pos.abs_x;
    3.14                  y = event.pos.abs_y;
    3.15 -                z = event.pos.abs_z;
    3.16 +                z = event.pos.rel_z;
    3.17                  clip_cursor(&x, &y);
    3.18                  refresh_cursor(x, y);
    3.19                  break;
     4.1 --- a/extras/mini-os/xenbus/xenbus.c	Fri Feb 29 09:18:01 2008 -0700
     4.2 +++ b/extras/mini-os/xenbus/xenbus.c	Fri Feb 29 09:19:58 2008 -0700
     4.3 @@ -637,9 +637,7 @@ char* xenbus_printf(xenbus_transaction_t
     4.4      va_start(args, fmt);
     4.5      vsprintf(val, fmt, args);
     4.6      va_end(args);
     4.7 -    xenbus_write(xbt,fullpath,val);
     4.8 -
     4.9 -    return NULL;
    4.10 +    return xenbus_write(xbt,fullpath,val);
    4.11  }
    4.12  
    4.13  static void do_ls_test(const char *pre)
     5.1 --- a/stubdom/README	Fri Feb 29 09:18:01 2008 -0700
     5.2 +++ b/stubdom/README	Fri Feb 29 09:19:58 2008 -0700
     5.3 @@ -6,6 +6,73 @@ Then make install to install the result.
     5.4  
     5.5  Also, run make and make install in $XEN_ROOT/tools/fs-back
     5.6  
     5.7 +General Configuration
     5.8 +=====================
     5.9 +
    5.10 +In your HVM config "hvmconfig",
    5.11 +
    5.12 +- use /usr/lib/xen/bin/stubdom-dm as dm script
    5.13 +
    5.14 +device_model = '/usr/lib/xen/bin/stubdom-dm'
    5.15 +
    5.16 +- comment the disk statement:
    5.17 +
    5.18 +#disk = [  'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ]
    5.19 +
    5.20 +
    5.21 +Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is the name of your HVM
    5.22 +guest) with
    5.23 +
    5.24 +kernel = "/usr/lib/xen/boot/stubdom.gz"
    5.25 +vif = [ '', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
    5.26 +disk = [  'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ]
    5.27 +
    5.28 +where
    5.29 +- the first vif ('') is reserved for VNC (see below)
    5.30 +- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig
    5.31 +script,
    5.32 +- and disk = is the same block configuration as in the hvmconfig script.
    5.33 +
    5.34 +Display Configuration
    5.35 +=====================
    5.36 +
    5.37 +There are three posibilities
    5.38 +
    5.39 +* Using SDL
    5.40 +
    5.41 +In hvmconfig, disable vnc:
    5.42 +
    5.43 +vnc = 0
    5.44 +
    5.45 +In stubdom-hvmconfig, set a vfb:
    5.46 +
    5.47 +vfb = [ 'type=sdl' ]
    5.48 +
    5.49 +* Using a VNC server in the stub domain
    5.50 +
    5.51 +In hvmconfig, set vnclisten to "172.30.206.1" for instance.  Do not use a host
    5.52 +name as Mini-OS does not have a name resolver.  Do not use 127.0.0.1 since then
    5.53 +you will not be able to connect to it.
    5.54 +
    5.55 +vnc = 1
    5.56 +vnclisten = "172.30.206.1"
    5.57 +
    5.58 +In stubdom-hvmconfig, fill the reserved vif with the same IP, for instance:
    5.59 +
    5.60 +vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
    5.61 +
    5.62 +* Using a VNC server in dom0
    5.63 +
    5.64 +In hvmconfig, disable vnc:
    5.65 +
    5.66 +vnc = 0
    5.67 +
    5.68 +In stubdom-hvmconfig, set a vfb:
    5.69 +
    5.70 +vfb = [ 'type=vnc' ]
    5.71 +
    5.72 +and any other parameter as wished.
    5.73 +
    5.74  To run
    5.75  ======
    5.76  
    5.77 @@ -13,32 +80,4 @@ mkdir -p /exports/usr/share/qemu
    5.78  ln -s /usr/share/qemu/keymaps /exports/usr/share/qemu
    5.79  /usr/sbin/fs-backend &
    5.80  
    5.81 -
    5.82 -In your HVM config "hvmconfig",
    5.83 -
    5.84 -- use VNC, set vnclisten to "172.30.206.1" for instance.  Do not use a host name
    5.85 -as Mini-OS does not have a name resolver.  Do not use 127.0.0.1 since then you
    5.86 -will not be able to connect to it.
    5.87 -
    5.88 -vnc = 1
    5.89 -vnclisten = "172.30.206.1"
    5.90 -
    5.91 -- use /usr/lib/xen/bin/stubdom-dm as dm script
    5.92 -
    5.93 -device_model = '/usr/lib/xen/bin/stubdom-dm'
    5.94 -
    5.95 -- comment the disk statement:
    5.96 -#disk = [  'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ]
    5.97 -
    5.98 -Create /etc/xen/stubdom-hvmconfig (where "hvmconfig" is your HVM guest domain
    5.99 -name) with
   5.100 -
   5.101 -kernel = "/usr/lib/xen/boot/stubdom.gz"
   5.102 -vif = [ 'ip=172.30.206.1', 'ip=10.0.1.1,mac=aa:00:00:12:23:34']
   5.103 -disk = [  'file:/tmp/install.iso,hdc:cdrom,r', 'phy:/dev/sda6,hda,w', 'file:/tmp/test,hdb,r' ]
   5.104 -
   5.105 -where
   5.106 -- 172.30.206.1 is the IP for vnc,
   5.107 -- 'ip=10.0.1.1,mac= etc...' is the same net configuration as in the hvmconfig
   5.108 -script,
   5.109 -- and disk = is the same block configuration as in the hvmconfig script.
   5.110 +xm create hvmconfig
     6.1 --- a/stubdom/stubdom-dm	Fri Feb 29 09:18:01 2008 -0700
     6.2 +++ b/stubdom/stubdom-dm	Fri Feb 29 09:19:58 2008 -0700
     6.3 @@ -62,32 +62,23 @@ done
     6.4  
     6.5  creation="xm create -c stubdom-$domname target=$domid memory=32"
     6.6  
     6.7 -(while true ; do sleep 60 ; done) | $creation > /var/log/xen/qemu-dm-$domid.log &
     6.8 +(while true ; do sleep 60 ; done) | $creation &
     6.9  #xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" &
    6.10  consolepid=$!
    6.11  
    6.12  
    6.13 -# Wait for vnc server to appear
    6.14 -while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port`
    6.15 -do
    6.16 -        # Check that the stubdom job is still alive
    6.17 -        kill -0 $consolepid || term
    6.18 -	sleep 1
    6.19 -done
    6.20 -
    6.21 -################
    6.22 -# DEBUG: tcpdump
    6.23 -#while ! stubdomid=`xm domid stubdom-$domname`
    6.24 -#do
    6.25 -#        sleep 1
    6.26 -#done
    6.27 -#xterm -geometry 160x25+0+$height -e /bin/sh -c "tcpdump -n -i vif$stubdomid.0" &
    6.28 -#xterm -geometry 160x25+0+$((2 * $height)) -e /bin/sh -c "tcpdump -n -i vif$stubdomid.1" &
    6.29 -
    6.30  ###########
    6.31  # vncviewer
    6.32  if [ "$vncviewer" = 1 ]
    6.33  then
    6.34 +    # Wait for vnc server to appear
    6.35 +    while ! vnc_port=`xenstore-read /local/domain/$domid/console/vnc-port`
    6.36 +    do
    6.37 +        # Check that the stubdom job is still alive
    6.38 +        kill -0 $consolepid || term
    6.39 +	sleep 1
    6.40 +    done
    6.41 +
    6.42      vncviewer $ip:$vnc_port &
    6.43      vncpid=$!
    6.44  fi
     7.1 --- a/tools/blktap/drivers/block-qcow2.c	Fri Feb 29 09:18:01 2008 -0700
     7.2 +++ b/tools/blktap/drivers/block-qcow2.c	Fri Feb 29 09:19:58 2008 -0700
     7.3 @@ -1243,167 +1243,6 @@ static void create_refcount_update(QCowC
     7.4  	}
     7.5  }
     7.6  
     7.7 -static int qcow2_create(const char *filename, int64_t total_size,
     7.8 -		const char *backing_file, int flags)
     7.9 -{
    7.10 -	int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
    7.11 -	QCowHeader header;
    7.12 -	uint64_t tmp, offset;
    7.13 -	QCowCreateState s1, *s = &s1;
    7.14 -
    7.15 -	memset(s, 0, sizeof(*s));
    7.16 -
    7.17 -	fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
    7.18 -	if (fd < 0)
    7.19 -		return -1;
    7.20 -	memset(&header, 0, sizeof(header));
    7.21 -	header.magic = cpu_to_be32(QCOW_MAGIC);
    7.22 -	header.version = cpu_to_be32(QCOW_VERSION);
    7.23 -	header.size = cpu_to_be64(total_size * 512);
    7.24 -	header_size = sizeof(header);
    7.25 -	backing_filename_len = 0;
    7.26 -	if (backing_file) {
    7.27 -		header.backing_file_offset = cpu_to_be64(header_size);
    7.28 -		backing_filename_len = strlen(backing_file);
    7.29 -		header.backing_file_size = cpu_to_be32(backing_filename_len);
    7.30 -		header_size += backing_filename_len;
    7.31 -	}
    7.32 -	s->cluster_bits = 12;  /* 4 KB clusters */
    7.33 -	s->cluster_size = 1 << s->cluster_bits;
    7.34 -	header.cluster_bits = cpu_to_be32(s->cluster_bits);
    7.35 -	header_size = (header_size + 7) & ~7;
    7.36 -	if (flags & BLOCK_FLAG_ENCRYPT) {
    7.37 -		header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
    7.38 -	} else {
    7.39 -		header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
    7.40 -	}
    7.41 -	l2_bits = s->cluster_bits - 3;
    7.42 -	shift = s->cluster_bits + l2_bits;
    7.43 -	l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
    7.44 -	offset = align_offset(header_size, s->cluster_size);
    7.45 -	s->l1_table_offset = offset;
    7.46 -	header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
    7.47 -	header.l1_size = cpu_to_be32(l1_size);
    7.48 -	offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
    7.49 -
    7.50 -	s->refcount_table = qemu_mallocz(s->cluster_size);
    7.51 -	if (!s->refcount_table)
    7.52 -		goto fail;
    7.53 -	s->refcount_block = qemu_mallocz(s->cluster_size);
    7.54 -	if (!s->refcount_block)
    7.55 -		goto fail;
    7.56 -
    7.57 -	s->refcount_table_offset = offset;
    7.58 -	header.refcount_table_offset = cpu_to_be64(offset);
    7.59 -	header.refcount_table_clusters = cpu_to_be32(1);
    7.60 -	offset += s->cluster_size;
    7.61 -
    7.62 -	s->refcount_table[0] = cpu_to_be64(offset);
    7.63 -	s->refcount_block_offset = offset;
    7.64 -	offset += s->cluster_size;
    7.65 -
    7.66 -	/* update refcounts */
    7.67 -	create_refcount_update(s, 0, header_size);
    7.68 -	create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t));
    7.69 -	create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
    7.70 -	create_refcount_update(s, s->refcount_block_offset, s->cluster_size);
    7.71 -
    7.72 -	/* write all the data */
    7.73 -	write(fd, &header, sizeof(header));
    7.74 -	if (backing_file) {
    7.75 -		write(fd, backing_file, backing_filename_len);
    7.76 -	}
    7.77 -	lseek(fd, s->l1_table_offset, SEEK_SET);
    7.78 -	tmp = 0;
    7.79 -	for(i = 0;i < l1_size; i++) {
    7.80 -		write(fd, &tmp, sizeof(tmp));
    7.81 -	}
    7.82 -	lseek(fd, s->refcount_table_offset, SEEK_SET);
    7.83 -	write(fd, s->refcount_table, s->cluster_size);
    7.84 -
    7.85 -	lseek(fd, s->refcount_block_offset, SEEK_SET);
    7.86 -	write(fd, s->refcount_block, s->cluster_size);
    7.87 -
    7.88 -	qemu_free(s->refcount_table);
    7.89 -	qemu_free(s->refcount_block);
    7.90 -	close(fd);
    7.91 -	return 0;
    7.92 -fail:
    7.93 -	qemu_free(s->refcount_table);
    7.94 -	qemu_free(s->refcount_block);
    7.95 -	close(fd);
    7.96 -	return -ENOMEM;
    7.97 -}
    7.98 -
    7.99 -/* XXX: put compressed sectors first, then all the cluster aligned
   7.100 -   tables to avoid losing bytes in alignment */
   7.101 -static int qcow_write_compressed(struct disk_driver *bs, int64_t sector_num,
   7.102 -		const uint8_t *buf, int nb_sectors)
   7.103 -{
   7.104 -	BDRVQcowState *s = bs->private;
   7.105 -	z_stream strm;
   7.106 -	int ret, out_len;
   7.107 -	uint8_t *out_buf;
   7.108 -	uint64_t cluster_offset;
   7.109 -
   7.110 -	if (nb_sectors == 0) {
   7.111 -		/* align end of file to a sector boundary to ease reading with
   7.112 -		   sector based I/Os */
   7.113 -		cluster_offset = 512 * s->total_sectors;
   7.114 -		cluster_offset = (cluster_offset + 511) & ~511;
   7.115 -		ftruncate(s->fd, cluster_offset);
   7.116 -		return 0;
   7.117 -	}
   7.118 -
   7.119 -	if (nb_sectors != s->cluster_sectors)
   7.120 -		return -EINVAL;
   7.121 -
   7.122 -	out_buf = qemu_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
   7.123 -	if (!out_buf)
   7.124 -		return -ENOMEM;
   7.125 -
   7.126 -	/* best compression, small window, no zlib header */
   7.127 -	memset(&strm, 0, sizeof(strm));
   7.128 -	ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
   7.129 -			Z_DEFLATED, -12,
   7.130 -			9, Z_DEFAULT_STRATEGY);
   7.131 -	if (ret != 0) {
   7.132 -		qemu_free(out_buf);
   7.133 -		return -1;
   7.134 -	}
   7.135 -
   7.136 -	strm.avail_in = s->cluster_size;
   7.137 -	strm.next_in = (uint8_t *)buf;
   7.138 -	strm.avail_out = s->cluster_size;
   7.139 -	strm.next_out = out_buf;
   7.140 -
   7.141 -	ret = deflate(&strm, Z_FINISH);
   7.142 -	if (ret != Z_STREAM_END && ret != Z_OK) {
   7.143 -		qemu_free(out_buf);
   7.144 -		deflateEnd(&strm);
   7.145 -		return -1;
   7.146 -	}
   7.147 -	out_len = strm.next_out - out_buf;
   7.148 -
   7.149 -	deflateEnd(&strm);
   7.150 -
   7.151 -	if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
   7.152 -		/* could not compress: write normal cluster */
   7.153 -		qcow_write(bs, sector_num, buf, s->cluster_sectors);
   7.154 -	} else {
   7.155 -		cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
   7.156 -											out_len, 0, 0);
   7.157 -		cluster_offset &= s->cluster_offset_mask;
   7.158 -		if (bdrv_pwrite(s->fd, cluster_offset, out_buf, out_len) != out_len) {
   7.159 -			qemu_free(out_buf);
   7.160 -			return -1;
   7.161 -		}
   7.162 -	}
   7.163 -
   7.164 -	qemu_free(out_buf);
   7.165 -	return 0;
   7.166 -}
   7.167 -
   7.168  static int qcow_submit(struct disk_driver *bs)
   7.169  {
   7.170  	struct BDRVQcowState *s = (struct BDRVQcowState*) bs->private;
     8.1 --- a/tools/ioemu/block-qcow.c	Fri Feb 29 09:18:01 2008 -0700
     8.2 +++ b/tools/ioemu/block-qcow.c	Fri Feb 29 09:19:58 2008 -0700
     8.3 @@ -95,7 +95,7 @@ static int qcow_open(BlockDriverState *b
     8.4      int len, i, shift, ret;
     8.5      QCowHeader header;
     8.6  
     8.7 -    ret = bdrv_file_open(&s->hd, filename, flags);
     8.8 +    ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
     8.9      if (ret < 0)
    8.10          return ret;
    8.11      if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
     9.1 --- a/tools/ioemu/block-qcow2.c	Fri Feb 29 09:18:01 2008 -0700
     9.2 +++ b/tools/ioemu/block-qcow2.c	Fri Feb 29 09:19:58 2008 -0700
     9.3 @@ -191,7 +191,7 @@ static int qcow_open(BlockDriverState *b
     9.4      int len, i, shift, ret;
     9.5      QCowHeader header;
     9.6  
     9.7 -    ret = bdrv_file_open(&s->hd, filename, flags);
     9.8 +    ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
     9.9      if (ret < 0)
    9.10          return ret;
    9.11      if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
    10.1 --- a/tools/ioemu/block-raw.c	Fri Feb 29 09:18:01 2008 -0700
    10.2 +++ b/tools/ioemu/block-raw.c	Fri Feb 29 09:19:58 2008 -0700
    10.3 @@ -1489,5 +1489,7 @@ BlockDriver bdrv_host_device = {
    10.4      .bdrv_pread = raw_pread,
    10.5      .bdrv_pwrite = raw_pwrite,
    10.6      .bdrv_getlength = raw_getlength,
    10.7 +
    10.8 +    .bdrv_flags = BLOCK_DRIVER_FLAG_EXTENDABLE
    10.9  };
   10.10  #endif /* _WIN32 */
    11.1 --- a/tools/ioemu/block-vmdk.c	Fri Feb 29 09:18:01 2008 -0700
    11.2 +++ b/tools/ioemu/block-vmdk.c	Fri Feb 29 09:19:58 2008 -0700
    11.3 @@ -352,7 +352,7 @@ static int vmdk_open(BlockDriverState *b
    11.4      uint32_t magic;
    11.5      int l1_size, i, ret;
    11.6  
    11.7 -    ret = bdrv_file_open(&s->hd, filename, flags);
    11.8 +    ret = bdrv_file_open(&s->hd, filename, flags | BDRV_O_EXTENDABLE);
    11.9      if (ret < 0)
   11.10          return ret;
   11.11      if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
    12.1 --- a/tools/ioemu/block.c	Fri Feb 29 09:18:01 2008 -0700
    12.2 +++ b/tools/ioemu/block.c	Fri Feb 29 09:19:58 2008 -0700
    12.3 @@ -123,20 +123,23 @@ void path_combine(char *dest, int dest_s
    12.4  static int bdrv_rw_badreq_sectors(BlockDriverState *bs,
    12.5  				int64_t sector_num, int nb_sectors)
    12.6  {
    12.7 -    return
    12.8 +    return (
    12.9  	nb_sectors < 0 ||
   12.10  	nb_sectors > bs->total_sectors ||
   12.11 -	sector_num > bs->total_sectors - nb_sectors;
   12.12 +	sector_num > bs->total_sectors - nb_sectors
   12.13 +	) && !bs->extendable;
   12.14  }
   12.15  
   12.16  static int bdrv_rw_badreq_bytes(BlockDriverState *bs,
   12.17  				  int64_t offset, int count)
   12.18  {
   12.19      int64_t size = bs->total_sectors << SECTOR_BITS;
   12.20 -    return
   12.21 +    return (
   12.22  	count < 0 ||
   12.23  	count > size ||
   12.24 -	offset > size - count;
   12.25 +	offset > size - count
   12.26 +	) && !bs->extendable;
   12.27 +    
   12.28  }
   12.29  
   12.30  void bdrv_register(BlockDriver *bdrv)
   12.31 @@ -347,6 +350,12 @@ int bdrv_open2(BlockDriverState *bs, con
   12.32      bs->is_temporary = 0;
   12.33      bs->encrypted = 0;
   12.34  
   12.35 +    if (flags & BDRV_O_EXTENDABLE) {
   12.36 +	if (!(drv->bdrv_flags & BLOCK_DRIVER_FLAG_EXTENDABLE))
   12.37 +	    return -ENOSYS;
   12.38 +	bs->extendable = 1;
   12.39 +    }
   12.40 +
   12.41      if (flags & BDRV_O_SNAPSHOT) {
   12.42          BlockDriverState *bs1;
   12.43          int64_t total_size;
    13.1 --- a/tools/ioemu/block_int.h	Fri Feb 29 09:18:01 2008 -0700
    13.2 +++ b/tools/ioemu/block_int.h	Fri Feb 29 09:19:58 2008 -0700
    13.3 @@ -24,6 +24,8 @@
    13.4  #ifndef BLOCK_INT_H
    13.5  #define BLOCK_INT_H
    13.6  
    13.7 +#define BLOCK_DRIVER_FLAG_EXTENDABLE  0x0001u
    13.8 +
    13.9  struct BlockDriver {
   13.10      const char *format_name;
   13.11      int instance_size;
   13.12 @@ -76,6 +78,7 @@ struct BlockDriver {
   13.13      int (*bdrv_eject)(BlockDriverState *bs, int eject_flag);
   13.14      int (*bdrv_set_locked)(BlockDriverState *bs, int locked);
   13.15      
   13.16 +    unsigned bdrv_flags;
   13.17      BlockDriverAIOCB *free_aiocb;
   13.18      struct BlockDriver *next;
   13.19  };
   13.20 @@ -87,6 +90,7 @@ struct BlockDriverState {
   13.21      int removable; /* if true, the media can be removed */
   13.22      int locked;    /* if true, the media cannot temporarily be ejected */
   13.23      int encrypted; /* if true, the media is encrypted */
   13.24 +    int extendable;/* if true, we may write out of original range */
   13.25      /* event callback when inserting/removing */
   13.26      void (*change_cb)(void *opaque);
   13.27      void *change_opaque;
    14.1 --- a/tools/ioemu/hw/e1000.c	Fri Feb 29 09:18:01 2008 -0700
    14.2 +++ b/tools/ioemu/hw/e1000.c	Fri Feb 29 09:19:58 2008 -0700
    14.3 @@ -48,7 +48,7 @@ static int debugflags = DBGBIT(TXERR) | 
    14.4  #endif
    14.5  
    14.6  #define IOPORT_SIZE       0x40
    14.7 -#define PNPMMIO_SIZE      0x60000
    14.8 +#define PNPMMIO_SIZE      0x20000
    14.9  
   14.10  /*
   14.11   * HW models:
    15.1 --- a/tools/ioemu/hw/xenfb.c	Fri Feb 29 09:18:01 2008 -0700
    15.2 +++ b/tools/ioemu/hw/xenfb.c	Fri Feb 29 09:19:58 2008 -0700
    15.3 @@ -19,6 +19,12 @@
    15.4  
    15.5  #include "xenfb.h"
    15.6  
    15.7 +#ifdef CONFIG_STUBDOM
    15.8 +#include <semaphore.h>
    15.9 +#include <sched.h>
   15.10 +#include <fbfront.h>
   15.11 +#endif
   15.12 +
   15.13  #ifndef BTN_LEFT
   15.14  #define BTN_LEFT 0x110 /* from <linux/input.h> */
   15.15  #endif
   15.16 @@ -592,7 +598,8 @@ static int xenfb_send_key(struct xenfb *
   15.17  }
   15.18  
   15.19  /* Send a relative mouse movement event */
   15.20 -static int xenfb_send_motion(struct xenfb *xenfb, int rel_x, int rel_y, int rel_z)
   15.21 +static int xenfb_send_motion(struct xenfb *xenfb,
   15.22 +			     int rel_x, int rel_y, int rel_z)
   15.23  {
   15.24  	union xenkbd_in_event event;
   15.25  
   15.26 @@ -606,7 +613,8 @@ static int xenfb_send_motion(struct xenf
   15.27  }
   15.28  
   15.29  /* Send an absolute mouse movement event */
   15.30 -static int xenfb_send_position(struct xenfb *xenfb, int abs_x, int abs_y, int abs_z)
   15.31 +static int xenfb_send_position(struct xenfb *xenfb,
   15.32 +			       int abs_x, int abs_y, int rel_z)
   15.33  {
   15.34  	union xenkbd_in_event event;
   15.35  
   15.36 @@ -614,7 +622,7 @@ static int xenfb_send_position(struct xe
   15.37  	event.type = XENKBD_TYPE_POS;
   15.38  	event.pos.abs_x = abs_x;
   15.39  	event.pos.abs_y = abs_y;
   15.40 -	event.pos.abs_z = abs_z;
   15.41 +	event.pos.rel_z = rel_z;
   15.42  
   15.43  	return xenfb_kbd_event(xenfb, &event);
   15.44  }
   15.45 @@ -1124,12 +1132,10 @@ static void xenfb_guest_copy(struct xenf
   15.46      dpy_update(xenfb->ds, x, y, w, h);
   15.47  }
   15.48  
   15.49 -/* QEMU display state changed, so refresh the framebuffer copy */
   15.50 -/* XXX - can we optimize this, or the next func at all ? */ 
   15.51 +/* Periodic update of display, no need for any in our case */
   15.52  static void xenfb_update(void *opaque)
   15.53  {
   15.54      struct xenfb *xenfb = opaque;
   15.55 -    xenfb_guest_copy(xenfb, 0, 0, xenfb->width, xenfb->height);
   15.56  }
   15.57  
   15.58  /* QEMU display state changed, so refresh the framebuffer copy */
   15.59 @@ -1169,6 +1175,204 @@ static int xenfb_register_console(struct
   15.60          return 0;
   15.61  }
   15.62  
   15.63 +#ifdef CONFIG_STUBDOM
   15.64 +static struct semaphore kbd_sem = __SEMAPHORE_INITIALIZER(kbd_sem, 0);
   15.65 +static struct kbdfront_dev *kbd_dev;
   15.66 +static char *kbd_path, *fb_path;
   15.67 +
   15.68 +static unsigned char linux2scancode[KEY_MAX + 1];
   15.69 +
   15.70 +#define WIDTH 1024
   15.71 +#define HEIGHT 768
   15.72 +#define DEPTH 32
   15.73 +#define LINESIZE (1280 * (DEPTH / 8))
   15.74 +#define MEMSIZE (LINESIZE * HEIGHT)
   15.75 +
   15.76 +int xenfb_connect_vkbd(const char *path)
   15.77 +{
   15.78 +    kbd_path = strdup(path);
   15.79 +    return 0;
   15.80 +}
   15.81 +
   15.82 +int xenfb_connect_vfb(const char *path)
   15.83 +{
   15.84 +    fb_path = strdup(path);
   15.85 +    return 0;
   15.86 +}
   15.87 +
   15.88 +static void xenfb_pv_update(DisplayState *s, int x, int y, int w, int h)
   15.89 +{
   15.90 +    struct fbfront_dev *fb_dev = s->opaque;
   15.91 +    fbfront_update(fb_dev, x, y, w, h);
   15.92 +}
   15.93 +
   15.94 +static void xenfb_pv_resize(DisplayState *s, int w, int h)
   15.95 +{
   15.96 +    struct fbfront_dev *fb_dev = s->opaque;
   15.97 +    fprintf(stderr,"resize to %dx%d required\n", w, h);
   15.98 +    s->width = w;
   15.99 +    s->height = h;
  15.100 +    /* TODO: send resize event if supported */
  15.101 +    memset(s->data, 0, MEMSIZE);
  15.102 +    fbfront_update(fb_dev, 0, 0, WIDTH, HEIGHT);
  15.103 +}
  15.104 +
  15.105 +static void xenfb_pv_colourdepth(DisplayState *s, int depth)
  15.106 +{
  15.107 +    /* TODO: send redepth event if supported */
  15.108 +    fprintf(stderr,"redepth to %d required\n", depth);
  15.109 +}
  15.110 +
  15.111 +static void xenfb_kbd_handler(void *opaque)
  15.112 +{
  15.113 +#define KBD_NUM_BATCH 64
  15.114 +    union xenkbd_in_event buf[KBD_NUM_BATCH];
  15.115 +    int n, i;
  15.116 +    DisplayState *s = opaque;
  15.117 +    static int buttons;
  15.118 +    static int x, y;
  15.119 +
  15.120 +    n = kbdfront_receive(kbd_dev, buf, KBD_NUM_BATCH);
  15.121 +    for (i = 0; i < n; i++) {
  15.122 +        switch (buf[i].type) {
  15.123 +
  15.124 +            case XENKBD_TYPE_MOTION:
  15.125 +                fprintf(stderr, "FB backend sent us relative mouse motion event!\n");
  15.126 +                break;
  15.127 +
  15.128 +            case XENKBD_TYPE_POS:
  15.129 +            {
  15.130 +                int new_x = buf[i].pos.abs_x;
  15.131 +                int new_y = buf[i].pos.abs_y;
  15.132 +                if (new_x >= s->width)
  15.133 +                    new_x = s->width - 1;
  15.134 +                if (new_y >= s->height)
  15.135 +                    new_y = s->height - 1;
  15.136 +                if (kbd_mouse_is_absolute()) {
  15.137 +                    kbd_mouse_event(
  15.138 +                            new_x * 0x7FFF / (s->width - 1),
  15.139 +                            new_y * 0x7FFF / (s->height - 1),
  15.140 +                            buf[i].pos.rel_z,
  15.141 +                            buttons);
  15.142 +                } else {
  15.143 +                    kbd_mouse_event(
  15.144 +                            new_x - x,
  15.145 +                            new_y - y,
  15.146 +                            buf[i].pos.rel_z,
  15.147 +                            buttons);
  15.148 +                }
  15.149 +                x = new_x;
  15.150 +                y = new_y;
  15.151 +                break;
  15.152 +            }
  15.153 +
  15.154 +            case XENKBD_TYPE_KEY:
  15.155 +            {
  15.156 +                int keycode = buf[i].key.keycode;
  15.157 +                int button = 0;
  15.158 +
  15.159 +                if (keycode == BTN_LEFT)
  15.160 +                    button = MOUSE_EVENT_LBUTTON;
  15.161 +                else if (keycode == BTN_RIGHT)
  15.162 +                    button = MOUSE_EVENT_RBUTTON;
  15.163 +                else if (keycode == BTN_MIDDLE)
  15.164 +                    button = MOUSE_EVENT_MBUTTON;
  15.165 +
  15.166 +                if (button) {
  15.167 +                    if (buf[i].key.pressed)
  15.168 +                        buttons |=  button;
  15.169 +                    else
  15.170 +                        buttons &= ~button;
  15.171 +                    if (kbd_mouse_is_absolute())
  15.172 +                        kbd_mouse_event(
  15.173 +                                x * 0x7FFF / s->width,
  15.174 +                                y * 0x7FFF / s->height,
  15.175 +                                0,
  15.176 +                                buttons);
  15.177 +                    else
  15.178 +                        kbd_mouse_event(0, 0, 0, buttons);
  15.179 +                } else {
  15.180 +                    int scancode = linux2scancode[keycode];
  15.181 +                    if (!scancode) {
  15.182 +                        fprintf(stderr, "Can't convert keycode %x to scancode\n", keycode);
  15.183 +                        break;
  15.184 +                    }
  15.185 +                    if (scancode & 0x80) {
  15.186 +                        kbd_put_keycode(0xe0);
  15.187 +                        scancode &= 0x7f;
  15.188 +                    }
  15.189 +                    if (!buf[i].key.pressed)
  15.190 +                        scancode |= 0x80;
  15.191 +                    kbd_put_keycode(scancode);
  15.192 +                }
  15.193 +                break;
  15.194 +            }
  15.195 +        }
  15.196 +    }
  15.197 +}
  15.198 +
  15.199 +static void xenfb_pv_refresh(DisplayState *ds)
  15.200 +{
  15.201 +    vga_hw_update();
  15.202 +}
  15.203 +
  15.204 +static void kbdfront_thread(void *p)
  15.205 +{
  15.206 +    int scancode, keycode;
  15.207 +    kbd_dev = init_kbdfront(p, 1);
  15.208 +    if (!kbd_dev) {
  15.209 +        fprintf(stderr,"can't open keyboard\n");
  15.210 +        exit(1);
  15.211 +    }
  15.212 +    up(&kbd_sem);
  15.213 +    for (scancode = 0; scancode < 128; scancode++) {
  15.214 +        keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode]];
  15.215 +        linux2scancode[keycode] = scancode;
  15.216 +        keycode = atkbd_set2_keycode[atkbd_unxlate_table[scancode] | 0x80];
  15.217 +        linux2scancode[keycode] = scancode | 0x80;
  15.218 +    }
  15.219 +}
  15.220 +
  15.221 +int xenfb_pv_display_init(DisplayState *ds)
  15.222 +{
  15.223 +    void *data;
  15.224 +    struct fbfront_dev *fb_dev;
  15.225 +    int kbd_fd;
  15.226 +
  15.227 +    if (!fb_path || !kbd_path)
  15.228 +        return -1;
  15.229 +
  15.230 +    create_thread("kbdfront", kbdfront_thread, (void*) kbd_path);
  15.231 +
  15.232 +    data = qemu_memalign(PAGE_SIZE, VGA_RAM_SIZE);
  15.233 +    fb_dev = init_fbfront(fb_path, data, WIDTH, HEIGHT, DEPTH, LINESIZE, MEMSIZE);
  15.234 +    if (!fb_dev) {
  15.235 +        fprintf(stderr,"can't open frame buffer\n");
  15.236 +        exit(1);
  15.237 +    }
  15.238 +    free(fb_path);
  15.239 +
  15.240 +    down(&kbd_sem);
  15.241 +    free(kbd_path);
  15.242 +
  15.243 +    kbd_fd = kbdfront_open(kbd_dev);
  15.244 +    qemu_set_fd_handler(kbd_fd, xenfb_kbd_handler, NULL, ds);
  15.245 +
  15.246 +    ds->data = data;
  15.247 +    ds->linesize = LINESIZE;
  15.248 +    ds->depth = DEPTH;
  15.249 +    ds->bgr = 0;
  15.250 +    ds->width = WIDTH;
  15.251 +    ds->height = HEIGHT;
  15.252 +    ds->dpy_update = xenfb_pv_update;
  15.253 +    ds->dpy_resize = xenfb_pv_resize;
  15.254 +    ds->dpy_colourdepth = NULL; //xenfb_pv_colourdepth;
  15.255 +    ds->dpy_refresh = xenfb_pv_refresh;
  15.256 +    ds->opaque = fb_dev;
  15.257 +    return 0;
  15.258 +}
  15.259 +#endif
  15.260 +
  15.261  /*
  15.262   * Local variables:
  15.263   *  c-indent-level: 8
    16.1 --- a/tools/ioemu/keymaps.c	Fri Feb 29 09:18:01 2008 -0700
    16.2 +++ b/tools/ioemu/keymaps.c	Fri Feb 29 09:19:58 2008 -0700
    16.3 @@ -126,11 +126,11 @@ static kbd_layout_t *parse_keyboard_layo
    16.4  		    if (rest && strstr(rest, "numlock")) {
    16.5  			add_to_key_range(&k->keypad_range, keycode);
    16.6  			add_to_key_range(&k->numlock_range, keysym);
    16.7 -			fprintf(stderr, "keypad keysym %04x keycode %d\n", keysym, keycode);
    16.8 +			//fprintf(stderr, "keypad keysym %04x keycode %d\n", keysym, keycode);
    16.9  		    }
   16.10  		    if (rest && strstr(rest, "shift")) {
   16.11  			add_to_key_range(&k->shift_range, keysym);
   16.12 -			fprintf(stderr, "shift keysym %04x keycode %d\n", keysym, keycode);
   16.13 +			//fprintf(stderr, "shift keysym %04x keycode %d\n", keysym, keycode);
   16.14  		    }
   16.15  
   16.16  		    /* if(keycode&0x80)
    17.1 --- a/tools/ioemu/monitor.c	Fri Feb 29 09:18:01 2008 -0700
    17.2 +++ b/tools/ioemu/monitor.c	Fri Feb 29 09:19:58 2008 -0700
    17.3 @@ -2520,7 +2520,7 @@ static void monitor_handle_command1(void
    17.4  
    17.5  static void monitor_start_input(void)
    17.6  {
    17.7 -    readline_start("(HVMXen) ", 0, monitor_handle_command1, NULL);
    17.8 +    readline_start("(qemu) ", 0, monitor_handle_command1, NULL);
    17.9  }
   17.10  
   17.11  static void term_event(void *opaque, int event)
    18.1 --- a/tools/ioemu/vl.c	Fri Feb 29 09:18:01 2008 -0700
    18.2 +++ b/tools/ioemu/vl.c	Fri Feb 29 09:19:58 2008 -0700
    18.3 @@ -7611,9 +7611,7 @@ int main(int argc, char **argv)
    18.4          }
    18.5      }
    18.6  
    18.7 -    /* Now send logs to our named config */
    18.8 -    sprintf(qemu_dm_logfilename, "/var/log/xen/qemu-dm-%d.log", domid);
    18.9 -    cpu_set_log_filename(qemu_dm_logfilename);
   18.10 +    cpu_set_log(0);
   18.11  
   18.12  #ifndef NO_DAEMONIZE
   18.13      if (daemonize && !nographic && vnc_display == NULL && vncunused == 0) {
   18.14 @@ -7831,6 +7829,10 @@ int main(int argc, char **argv)
   18.15      init_ioports();
   18.16  
   18.17      /* terminal init */
   18.18 +#ifdef CONFIG_STUBDOM
   18.19 +    if (xenfb_pv_display_init(ds) == 0) {
   18.20 +    } else
   18.21 +#endif
   18.22      if (nographic) {
   18.23          dumb_display_init(ds);
   18.24      } else if (vnc_display != NULL || vncunused != 0) {
    19.1 --- a/tools/ioemu/vl.h	Fri Feb 29 09:18:01 2008 -0700
    19.2 +++ b/tools/ioemu/vl.h	Fri Feb 29 09:19:58 2008 -0700
    19.3 @@ -614,6 +614,8 @@ typedef struct QEMUSnapshotInfo {
    19.4                                       use a disk image format on top of
    19.5                                       it (default for
    19.6                                       bdrv_file_open()) */
    19.7 +#define BDRV_O_EXTENDABLE  0x0080 /* allow writes out of original size range;
    19.8 +				     only effective for some drivers */
    19.9  
   19.10  void bdrv_init(void);
   19.11  BlockDriver *bdrv_find_format(const char *format_name);
   19.12 @@ -1525,6 +1527,11 @@ int xenstore_unsubscribe_from_hotplug_st
   19.13  int xenstore_vm_write(int domid, char *key, char *val);
   19.14  char *xenstore_vm_read(int domid, char *key, unsigned int *len);
   19.15  
   19.16 +/* xenfb.c */
   19.17 +int xenfb_pv_display_init(DisplayState *ds);
   19.18 +int xenfb_connect_vkbd(const char *path);
   19.19 +int xenfb_connect_vfb(const char *path);
   19.20 +
   19.21  /* helper2.c */
   19.22  extern long time_offset;
   19.23  void timeoffset_get(void);
    20.1 --- a/tools/ioemu/xenstore.c	Fri Feb 29 09:18:01 2008 -0700
    20.2 +++ b/tools/ioemu/xenstore.c	Fri Feb 29 09:19:58 2008 -0700
    20.3 @@ -238,6 +238,37 @@ void xenstore_parse_domain_config(int do
    20.4          }
    20.5      }
    20.6  
    20.7 +#ifdef CONFIG_STUBDOM
    20.8 +    if (pasprintf(&buf, "%s/device/vkbd", path) == -1)
    20.9 +        goto out;
   20.10 +
   20.11 +    free(e);
   20.12 +    e = xs_directory(xsh, XBT_NULL, buf, &num);
   20.13 +
   20.14 +    if (e) {
   20.15 +        for (i = 0; i < num; i++) {
   20.16 +            if (pasprintf(&buf, "%s/device/vkbd/%s", path, e[i]) == -1)
   20.17 +                continue;
   20.18 +            xenfb_connect_vkbd(buf);
   20.19 +        }
   20.20 +    }
   20.21 +
   20.22 +    if (pasprintf(&buf, "%s/device/vfb", path) == -1)
   20.23 +        goto out;
   20.24 +
   20.25 +    free(e);
   20.26 +    e = xs_directory(xsh, XBT_NULL, buf, &num);
   20.27 +
   20.28 +    if (e) {
   20.29 +        for (i = 0; i < num; i++) {
   20.30 +            if (pasprintf(&buf, "%s/device/vfb/%s", path, e[i]) == -1)
   20.31 +                continue;
   20.32 +            xenfb_connect_vfb(buf);
   20.33 +        }
   20.34 +    }
   20.35 +#endif
   20.36 +
   20.37 +
   20.38      /* Set a watch for log-dirty requests from the migration tools */
   20.39      if (pasprintf(&buf, "/local/domain/0/device-model/%u/logdirty/next-active",
   20.40                    domid) != -1) {
    21.1 --- a/tools/python/xen/xend/XendAPI.py	Fri Feb 29 09:18:01 2008 -0700
    21.2 +++ b/tools/python/xen/xend/XendAPI.py	Fri Feb 29 09:19:58 2008 -0700
    21.3 @@ -1761,9 +1761,10 @@ class XendAPI(object):
    21.4  
    21.5          resource = other_config.get("resource", 0)
    21.6          port = other_config.get("port", 0)
    21.7 +        node = other_config.get("node", 0)
    21.8          
    21.9          xendom.domain_migrate(xeninfo.getDomid(), destination_url,
   21.10 -                              bool(live), resource, port)
   21.11 +                              bool(live), resource, port, node)
   21.12          return xen_api_success_void()
   21.13  
   21.14      def VM_save(self, _, vm_ref, dest, checkpoint):
    22.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Fri Feb 29 09:18:01 2008 -0700
    22.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Fri Feb 29 09:19:58 2008 -0700
    22.3 @@ -22,6 +22,7 @@ from xen.xend.XendError import XendError
    22.4  from xen.xend.XendLogging import log
    22.5  from xen.xend.XendConfig import XendConfig
    22.6  from xen.xend.XendConstants import *
    22.7 +from xen.xend import XendNode
    22.8  
    22.9  SIGNATURE = "LinuxGuestRecord"
   22.10  QEMU_SIGNATURE = "QemuDeviceModelRecord"
   22.11 @@ -56,10 +57,23 @@ def read_exact(fd, size, errmsg):
   22.12      return buf
   22.13  
   22.14  
   22.15 -def save(fd, dominfo, network, live, dst, checkpoint=False):
   22.16 +def insert_after(list, pred, value):
   22.17 +    for i,k in enumerate(list):
   22.18 +        if type(k) == type([]):
   22.19 +           if k[0] == pred:
   22.20 +              list.insert (i+1, value)
   22.21 +    return
   22.22 +
   22.23 +
   22.24 +def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1):
   22.25      write_exact(fd, SIGNATURE, "could not write guest state file: signature")
   22.26  
   22.27 -    config = sxp.to_string(dominfo.sxpr())
   22.28 +    sxprep = dominfo.sxpr()
   22.29 +
   22.30 +    if node > -1:
   22.31 +        insert_after(sxprep,'vcpus',['node', str(node)])
   22.32 +
   22.33 +    config = sxp.to_string(sxprep)
   22.34  
   22.35      domain_name = dominfo.getName()
   22.36      # Rename the domain temporarily, so that we don't get a name clash if this
   22.37 @@ -192,6 +206,21 @@ def restore(xd, fd, dominfo = None, paus
   22.38      else:
   22.39          dominfo = xd.restore_(vmconfig)
   22.40  
   22.41 +    # repin domain vcpus if a target node number was specified 
   22.42 +    # this is done prior to memory allocation to aide in memory
   22.43 +    # distribution for NUMA systems.
   22.44 +    nodenr = -1
   22.45 +    for i,l in enumerate(vmconfig):
   22.46 +        if type(l) == type([]):
   22.47 +            if l[0] == 'node':
   22.48 +                nodenr = int(l[1])
   22.49 +
   22.50 +    if nodenr >= 0:
   22.51 +        node_to_cpu = XendNode.instance().xc.physinfo()['node_to_cpu']
   22.52 +        if nodenr < len(node_to_cpu):
   22.53 +            for v in range(0, dominfo.info['VCPUs_max']):
   22.54 +                 xc.vcpu_setaffinity(dominfo.domid, v, node_to_cpu[nodenr])
   22.55 +
   22.56      store_port   = dominfo.getStorePort()
   22.57      console_port = dominfo.getConsolePort()
   22.58  
    23.1 --- a/tools/python/xen/xend/XendDomain.py	Fri Feb 29 09:18:01 2008 -0700
    23.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Feb 29 09:19:58 2008 -0700
    23.3 @@ -865,7 +865,7 @@ class XendDomain:
    23.4                  raise XendInvalidDomain(domname)
    23.5  
    23.6              if dominfo.getDomid() == DOM0_ID:
    23.7 -                raise XendError("Cannot save privileged domain %s" % domname)
    23.8 +                raise XendError("Cannot suspend privileged domain %s" % domname)
    23.9  
   23.10              if dominfo._stateGet() != DOM_STATE_RUNNING:
   23.11                  raise VMBadState("Domain is not running",
   23.12 @@ -910,7 +910,7 @@ class XendDomain:
   23.13                      raise XendInvalidDomain(domname)
   23.14  
   23.15                  if dominfo.getDomid() == DOM0_ID:
   23.16 -                    raise XendError("Cannot save privileged domain %s" % domname)
   23.17 +                    raise XendError("Cannot resume privileged domain %s" % domname)
   23.18  
   23.19                  if dominfo._stateGet() != XEN_API_VM_POWER_STATE_SUSPENDED:
   23.20                      raise XendError("Cannot resume domain that is not suspended.")
   23.21 @@ -1258,7 +1258,7 @@ class XendDomain:
   23.22  
   23.23          return val       
   23.24  
   23.25 -    def domain_migrate(self, domid, dst, live=False, resource=0, port=0):
   23.26 +    def domain_migrate(self, domid, dst, live=False, resource=0, port=0, node=-1):
   23.27          """Start domain migration.
   23.28          
   23.29          @param domid: Domain ID or Name
   23.30 @@ -1271,6 +1271,8 @@ class XendDomain:
   23.31          @type live: bool
   23.32          @keyword resource: not used??
   23.33          @rtype: None
   23.34 +        @keyword node: use node number for target
   23.35 +        @rtype: int 
   23.36          @raise XendError: Failed to migrate
   23.37          @raise XendInvalidDomain: Domain is not valid        
   23.38          """
   23.39 @@ -1299,7 +1301,7 @@ class XendDomain:
   23.40  
   23.41          sock.send("receive\n")
   23.42          sock.recv(80)
   23.43 -        XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst)
   23.44 +        XendCheckpoint.save(sock.fileno(), dominfo, True, live, dst, node=node)
   23.45          sock.close()
   23.46  
   23.47      def domain_save(self, domid, dst, checkpoint=False):
    24.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Fri Feb 29 09:18:01 2008 -0700
    24.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Fri Feb 29 09:19:58 2008 -0700
    24.3 @@ -1406,9 +1406,6 @@ class XendDomainInfo:
    24.4      def setWeight(self, cpu_weight):
    24.5          self.info['vcpus_params']['weight'] = cpu_weight
    24.6  
    24.7 -    def setResume(self, state):
    24.8 -        self._resume = state
    24.9 -
   24.10      def getRestartCount(self):
   24.11          return self._readVm('xend/restart_count')
   24.12  
   24.13 @@ -1963,6 +1960,39 @@ class XendDomainInfo:
   24.14              if self.info['cpus'] is not None and len(self.info['cpus']) > 0:
   24.15                  for v in range(0, self.info['VCPUs_max']):
   24.16                      xc.vcpu_setaffinity(self.domid, v, self.info['cpus'])
   24.17 +            else:
   24.18 +                info = xc.physinfo()
   24.19 +                if info['nr_nodes'] > 1:
   24.20 +                    node_memory_list = info['node_to_memory']
   24.21 +                    needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
   24.22 +                    candidate_node_list = []
   24.23 +                    for i in range(0, info['nr_nodes']):
   24.24 +                        if node_memory_list[i] >= needmem:
   24.25 +                            candidate_node_list.append(i)
   24.26 +                    if candidate_node_list is None or len(candidate_node_list) == 1:
   24.27 +                        index = node_memory_list.index( max(node_memory_list) )
   24.28 +                        cpumask = info['node_to_cpu'][index]
   24.29 +                    else:
   24.30 +                        nodeload = [0]
   24.31 +                        nodeload = nodeload * info['nr_nodes']
   24.32 +                        from xen.xend import XendDomain
   24.33 +                        doms = XendDomain.instance().list('all')
   24.34 +                        for dom in doms:
   24.35 +                            cpuinfo = dom.getVCPUInfo()
   24.36 +                            for vcpu in sxp.children(cpuinfo, 'vcpu'):
   24.37 +                                def vinfo(n, t):
   24.38 +                                    return t(sxp.child_value(vcpu, n))
   24.39 +                                cpumap = vinfo('cpumap', list)
   24.40 +                                for i in candidate_node_list:
   24.41 +                                    node_cpumask = info['node_to_cpu'][i]
   24.42 +                                    for j in node_cpumask:
   24.43 +                                        if j in cpumap:
   24.44 +                                            nodeload[i] += 1
   24.45 +                                            break
   24.46 +                        index = nodeload.index( min(nodeload) )
   24.47 +                        cpumask = info['node_to_cpu'][index]
   24.48 +                    for v in range(0, self.info['VCPUs_max']):
   24.49 +                        xc.vcpu_setaffinity(self.domid, v, cpumask)
   24.50  
   24.51              # Use architecture- and image-specific calculations to determine
   24.52              # the various headrooms necessary, given the raw configured
    25.1 --- a/tools/python/xen/xend/image.py	Fri Feb 29 09:18:01 2008 -0700
    25.2 +++ b/tools/python/xen/xend/image.py	Fri Feb 29 09:19:58 2008 -0700
    25.3 @@ -296,7 +296,34 @@ class ImageHandler:
    25.4                          { 'dom': self.vm.getDomid(), 'read': True, 'write': True })
    25.5          log.info("spawning device models: %s %s", self.device_model, args)
    25.6          # keep track of pid and spawned options to kill it later
    25.7 -        self.pid = os.spawnve(os.P_NOWAIT, self.device_model, args, env)
    25.8 +
    25.9 +        logfile = "/var/log/xen/qemu-dm-%s.log" %  str(self.vm.info['name_label'])
   25.10 +        if os.path.exists(logfile):
   25.11 +            if os.path.exists(logfile + ".1"):
   25.12 +                os.unlink(logfile + ".1")
   25.13 +            os.rename(logfile, logfile + ".1")
   25.14 +
   25.15 +        null = os.open("/dev/null", os.O_RDONLY)
   25.16 +        logfd = os.open(logfile, os.O_WRONLY|os.O_CREAT|os.O_TRUNC)
   25.17 +        
   25.18 +        pid = os.fork()
   25.19 +        if pid == 0: #child
   25.20 +            try:
   25.21 +                os.dup2(null, 0)
   25.22 +                os.dup2(logfd, 1)
   25.23 +                os.dup2(logfd, 2)
   25.24 +                os.close(null)
   25.25 +                os.close(logfd)
   25.26 +                try:
   25.27 +                    os.execve(self.device_model, args, env)
   25.28 +                except:
   25.29 +                    os._exit(127)
   25.30 +            except:
   25.31 +                os._exit(127)
   25.32 +        else:
   25.33 +            self.pid = pid
   25.34 +            os.close(null)
   25.35 +            os.close(logfd)
   25.36          self.vm.storeDom("image/device-model-pid", self.pid)
   25.37          log.info("device model pid: %d", self.pid)
   25.38  
    26.1 --- a/tools/python/xen/xm/main.py	Fri Feb 29 09:18:01 2008 -0700
    26.2 +++ b/tools/python/xen/xm/main.py	Fri Feb 29 09:19:58 2008 -0700
    26.3 @@ -699,9 +699,6 @@ def xm_save(args):
    26.4          err(opterr)
    26.5          sys.exit(1)
    26.6  
    26.7 -    dom = params[0]
    26.8 -    savefile = params[1]
    26.9 -
   26.10      checkpoint = False
   26.11      for (k, v) in options:
   26.12          if k in ['-c', '--checkpoint']:
   26.13 @@ -710,9 +707,9 @@ def xm_save(args):
   26.14      if len(params) != 2:
   26.15          err("Wrong number of parameters")
   26.16          usage('save')
   26.17 -        sys.exit(1)
   26.18  
   26.19 -    savefile = os.path.abspath(savefile)
   26.20 +    dom = params[0]
   26.21 +    savefile = os.path.abspath(params[1])
   26.22  
   26.23      if not os.access(os.path.dirname(savefile), os.W_OK):
   26.24          err("xm save: Unable to create file %s" % savefile)
    27.1 --- a/tools/python/xen/xm/migrate.py	Fri Feb 29 09:18:01 2008 -0700
    27.2 +++ b/tools/python/xen/xm/migrate.py	Fri Feb 29 09:19:58 2008 -0700
    27.3 @@ -43,6 +43,10 @@ gopts.opt('port', short='p', val='portnu
    27.4            fn=set_int, default=0,
    27.5            use="Use specified port for migration.")
    27.6  
    27.7 +gopts.opt('node', short='n', val='nodenum',
    27.8 +          fn=set_int, default=-1,
    27.9 +          use="Use specified NUMA node on target.")
   27.10 +
   27.11  gopts.opt('resource', short='r', val='MBIT',
   27.12            fn=set_int, default=0,
   27.13            use="Set level of resource usage for migration.")
   27.14 @@ -65,11 +69,13 @@ def main(argv):
   27.15          vm_ref = get_single_vm(dom)
   27.16          other_config = {
   27.17              "port":     opts.vals.port,
   27.18 -            "resource": opts.vals.resource
   27.19 +            "resource": opts.vals.resource,
   27.20 +            "node":     opts.vals.node
   27.21              }
   27.22          server.xenapi.VM.migrate(vm_ref, dst, bool(opts.vals.live),
   27.23                                   other_config)
   27.24      else:
   27.25          server.xend.domain.migrate(dom, dst, opts.vals.live,
   27.26                                     opts.vals.resource,
   27.27 -                                   opts.vals.port)
   27.28 +                                   opts.vals.port,
   27.29 +                                   opts.vals.node)
    28.1 --- a/tools/xenstat/libxenstat/src/xenstat_solaris.c	Fri Feb 29 09:18:01 2008 -0700
    28.2 +++ b/tools/xenstat/libxenstat/src/xenstat_solaris.c	Fri Feb 29 09:19:58 2008 -0700
    28.3 @@ -113,49 +113,23 @@ static void xenstat_uninit_devs(xenstat_
    28.4  	priv->kc = NULL;
    28.5  }
    28.6  
    28.7 -static int parse_nic(const char *nic, char *module, int *instance)
    28.8 -{
    28.9 -	const char *c;
   28.10 -
   28.11 -	for (c = &nic[strlen(nic) - 1]; c != nic && isdigit(*c); c--)
   28.12 -		;
   28.13 -
   28.14 -	if (c == nic)
   28.15 -		return 0;
   28.16 -
   28.17 -	c++;
   28.18 -
   28.19 -	if (sscanf(c, "%d", instance) != 1)
   28.20 -		return 0;
   28.21 -
   28.22 -	strncpy(module, nic, c - nic);
   28.23 -	module[c - nic] = '\0';
   28.24 -	return 1;
   28.25 -}
   28.26 -
   28.27  static int update_dev_stats(priv_data_t *priv, stdevice_t *dev)
   28.28  {
   28.29 -	char mod[256];
   28.30 -	const char *name;
   28.31 -	int inst;
   28.32  	kstat_t *ksp;
   28.33  
   28.34 -	if (dev->type == DEVICE_NIC) {
   28.35 -		if (!parse_nic(dev->name, mod, &inst))
   28.36 -			return 0;
   28.37 -		name = "mac";
   28.38 -	} else {
   28.39 -		strcpy(mod, "xdb");
   28.40 -		inst = dev->instance;
   28.41 -		name = "req_statistics";
   28.42 -	}
   28.43 -
   28.44  	if (kstat_chain_update(priv->kc) == -1)
   28.45  		return 0;
   28.46  
   28.47 -	ksp = kstat_lookup(priv->kc, mod, inst, (char *)name);
   28.48 +	if (dev->type == DEVICE_NIC) {
   28.49 +		ksp = kstat_lookup(priv->kc, "link", 0, (char *)dev->name);
   28.50 +	} else {
   28.51 +		ksp = kstat_lookup(priv->kc, "xdb", dev->instance,
   28.52 +		    (char *)"req_statistics");
   28.53 +	}
   28.54 +
   28.55  	if (ksp == NULL)
   28.56  		return 0;
   28.57 +
   28.58  	if (kstat_read(priv->kc, ksp, NULL) == -1)
   28.59  		return 0;
   28.60  
    29.1 --- a/tools/xentrace/xentrace.c	Fri Feb 29 09:18:01 2008 -0700
    29.2 +++ b/tools/xentrace/xentrace.c	Fri Feb 29 09:19:58 2008 -0700
    29.3 @@ -15,7 +15,6 @@
    29.4  #include <sys/mman.h>
    29.5  #include <sys/stat.h>
    29.6  #include <sys/types.h>
    29.7 -#include <sys/vfs.h>
    29.8  #include <fcntl.h>
    29.9  #include <unistd.h>
   29.10  #include <errno.h>
   29.11 @@ -25,6 +24,7 @@
   29.12  #include <getopt.h>
   29.13  #include <assert.h>
   29.14  #include <sys/poll.h>
   29.15 +#include <sys/statvfs.h>
   29.16  
   29.17  #include <xen/xen.h>
   29.18  #include <xen/trace.h>
   29.19 @@ -87,7 +87,7 @@ void close_handler(int signal)
   29.20  void write_buffer(unsigned int cpu, unsigned char *start, int size,
   29.21                 int total_size, int outfd)
   29.22  {
   29.23 -    struct statfs stat;
   29.24 +    struct statvfs stat;
   29.25      size_t written = 0;
   29.26      
   29.27      if ( opts.disk_rsvd != 0 )
   29.28 @@ -95,13 +95,13 @@ void write_buffer(unsigned int cpu, unsi
   29.29          unsigned long long freespace;
   29.30  
   29.31          /* Check that filesystem has enough space. */
   29.32 -        if ( fstatfs (outfd, &stat) )
   29.33 +        if ( fstatvfs (outfd, &stat) )
   29.34          {
   29.35                  fprintf(stderr, "Statfs failed!\n");
   29.36                  goto fail;
   29.37          }
   29.38  
   29.39 -        freespace = stat.f_bsize * (unsigned long long)stat.f_bfree;
   29.40 +        freespace = stat.f_frsize * (unsigned long long)stat.f_bfree;
   29.41  
   29.42          if ( total_size )
   29.43              freespace -= total_size;
    30.1 --- a/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Fri Feb 29 09:18:01 2008 -0700
    30.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/machine_reboot.c	Fri Feb 29 09:19:58 2008 -0700
    30.3 @@ -71,7 +71,7 @@ static int bp_suspend(void)
    30.4  	return suspend_cancelled;
    30.5  }
    30.6  
    30.7 -int __xen_suspend(int fast_suspend)
    30.8 +int __xen_suspend(int fast_suspend, void (*resume_notifier)(void))
    30.9  {
   30.10  	int err, suspend_cancelled, nr_cpus;
   30.11  	struct ap_suspend_info info;
   30.12 @@ -101,6 +101,7 @@ int __xen_suspend(int fast_suspend)
   30.13  
   30.14  	local_irq_disable();
   30.15  	suspend_cancelled = bp_suspend();
   30.16 +	resume_notifier();
   30.17  	local_irq_enable();
   30.18  
   30.19  	smp_mb();
    31.1 --- a/xen/arch/ia64/xen/machine_kexec.c	Fri Feb 29 09:18:01 2008 -0700
    31.2 +++ b/xen/arch/ia64/xen/machine_kexec.c	Fri Feb 29 09:19:58 2008 -0700
    31.3 @@ -24,6 +24,7 @@
    31.4  #include <linux/cpu.h>
    31.5  #include <linux/notifier.h>
    31.6  #include <asm/dom_fw_dom0.h>
    31.7 +#include <asm-generic/sections.h>
    31.8  
    31.9  #define kexec_flush_icache_page(page)					\
   31.10  do {									\
   31.11 @@ -144,6 +145,54 @@ void machine_reboot_kexec(xen_kexec_imag
   31.12  	machine_kexec(image);
   31.13  }
   31.14  
   31.15 +static int machine_kexec_get_xen(xen_kexec_range_t *range)
   31.16 +{
   31.17 +	range->start = range->start = ia64_tpa(_text);
   31.18 +	range->size = (unsigned long)_end - (unsigned long)_text;
   31.19 +	return 0;
   31.20 +}
   31.21 +
   31.22 +#define ELF_PAGE_SHIFT 16
   31.23 +#define ELF_PAGE_SIZE  (__IA64_UL_CONST(1) << ELF_PAGE_SHIFT)
   31.24 +#define ELF_PAGE_MASK  (~(ELF_PAGE_SIZE - 1))
   31.25 +
   31.26 +static int machine_kexec_get_xenheap(xen_kexec_range_t *range)
   31.27 +{
   31.28 +	range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK;
   31.29 +	range->size = (unsigned long)xenheap_phys_end -
   31.30 +		      (unsigned long)range->start;
   31.31 +	return 0;
   31.32 +}
   31.33 +
   31.34 +static int machine_kexec_get_boot_param(xen_kexec_range_t *range)
   31.35 +{
   31.36 +	range->start = __pa(ia64_boot_param);
   31.37 +	range->size = sizeof(*ia64_boot_param);
   31.38 +	return 0;
   31.39 +}
   31.40 +
   31.41 +static int machine_kexec_get_efi_memmap(xen_kexec_range_t *range)
   31.42 +{
   31.43 +	range->start = ia64_boot_param->efi_memmap;
   31.44 +	range->size = ia64_boot_param->efi_memmap_size;
   31.45 +	return 0;
   31.46 +}
   31.47 +
   31.48 +int machine_kexec_get(xen_kexec_range_t *range)
   31.49 +{
   31.50 +	switch (range->range) {
   31.51 +	case KEXEC_RANGE_MA_XEN:
   31.52 +		return machine_kexec_get_xen(range);
   31.53 +	case KEXEC_RANGE_MA_XENHEAP:
   31.54 +		return machine_kexec_get_xenheap(range);
   31.55 +	case KEXEC_RANGE_MA_BOOT_PARAM:
   31.56 +		return machine_kexec_get_boot_param(range);
   31.57 +	case KEXEC_RANGE_MA_EFI_MEMMAP:
   31.58 +		return machine_kexec_get_efi_memmap(range);
   31.59 +	}
   31.60 +	return -EINVAL;
   31.61 +}
   31.62 +
   31.63  /*
   31.64   * Local variables:
   31.65   * mode: C
    32.1 --- a/xen/arch/powerpc/machine_kexec.c	Fri Feb 29 09:18:01 2008 -0700
    32.2 +++ b/xen/arch/powerpc/machine_kexec.c	Fri Feb 29 09:19:58 2008 -0700
    32.3 @@ -24,6 +24,12 @@ void machine_kexec(xen_kexec_image_t *im
    32.4      printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
    32.5  }
    32.6  
    32.7 +int machine_kexec_get(xen_kexec_image_t *image)
    32.8 +{
    32.9 +    printk("STUB: " __FILE__ ": %s: not implemented\n", __FUNCTION__);
   32.10 +    return -1;
   32.11 +}
   32.12 +
   32.13  /*
   32.14   * Local variables:
   32.15   * mode: C
    33.1 --- a/xen/arch/x86/machine_kexec.c	Fri Feb 29 09:18:01 2008 -0700
    33.2 +++ b/xen/arch/x86/machine_kexec.c	Fri Feb 29 09:19:58 2008 -0700
    33.3 @@ -24,6 +24,9 @@ typedef void (*relocate_new_kernel_t)(
    33.4                  unsigned long *page_list,
    33.5                  unsigned long start_address);
    33.6  
    33.7 +extern int machine_kexec_get_xen(xen_kexec_range_t *range);
    33.8 +
    33.9 +
   33.10  int machine_kexec_load(int type, int slot, xen_kexec_image_t *image)
   33.11  {
   33.12      unsigned long prev_ma = 0;
   33.13 @@ -135,6 +138,13 @@ void machine_kexec(xen_kexec_image_t *im
   33.14      }
   33.15  }
   33.16  
   33.17 +int machine_kexec_get(xen_kexec_range_t *range)
   33.18 +{
   33.19 +	if (range->range != KEXEC_RANGE_MA_XEN)
   33.20 +		return -EINVAL;
   33.21 +	return machine_kexec_get_xen(range);
   33.22 +}
   33.23 +
   33.24  /*
   33.25   * Local variables:
   33.26   * mode: C
    34.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Feb 29 09:18:01 2008 -0700
    34.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Feb 29 09:19:58 2008 -0700
    34.3 @@ -55,12 +55,6 @@
    34.4   * l3-and-l2h-only shadow mode for PAE PV guests that would allow them 
    34.5   * to share l2h pages again. 
    34.6   *
    34.7 - * GUEST_WALK_TABLES TLB FLUSH COALESCE
    34.8 - * guest_walk_tables can do up to three remote TLB flushes as it walks to
    34.9 - * the first l1 of a new pagetable.  Should coalesce the flushes to the end, 
   34.10 - * and if we do flush, re-do the walk.  If anything has changed, then 
   34.11 - * pause all the other vcpus and do the walk *again*.
   34.12 - *
   34.13   * PSE disabled / PSE36
   34.14   * We don't support any modes other than PSE enabled, PSE36 disabled.
   34.15   * Neither of those would be hard to change, but we'd need to be able to 
   34.16 @@ -246,10 +240,95 @@ static uint32_t set_ad_bits(void *guest_
   34.17      return 0;
   34.18  }
   34.19  
   34.20 +/* This validation is called with lock held, and after write permission
   34.21 + * removal. Then check is atomic and no more inconsistent content can
   34.22 + * be observed before lock is released
   34.23 + *
   34.24 + * Return 1 to indicate success and 0 for inconsistency
   34.25 + */
   34.26 +static inline uint32_t
   34.27 +shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw)
   34.28 +{
   34.29 +    struct domain *d = v->domain;
   34.30 +    guest_l1e_t *l1p;
   34.31 +    guest_l2e_t *l2p;
   34.32 +#if GUEST_PAGING_LEVELS >= 4
   34.33 +    guest_l3e_t *l3p;
   34.34 +    guest_l4e_t *l4p;
   34.35 +#endif
   34.36 +    int mismatch = 0;
   34.37 +
   34.38 +    ASSERT(shadow_locked_by_me(d));
   34.39 +
   34.40 +    if ( gw->version ==
   34.41 +         atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
   34.42 +        return 1;
   34.43 +
   34.44 +    /* We may consider caching guest page mapping from last
   34.45 +     * guest table walk. However considering this check happens
   34.46 +     * relatively less-frequent, and a bit burden here to
   34.47 +     * remap guest page is better than caching mapping in each
   34.48 +     * guest table walk.
   34.49 +     *
   34.50 +     * Also when inconsistency occurs, simply return to trigger
   34.51 +     * another fault instead of re-validate new path to make
   34.52 +     * logic simple.
   34.53 +     */
   34.54 +    perfc_incr(shadow_check_gwalk);
   34.55 +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
   34.56 +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
   34.57 +    l4p = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable;
   34.58 +    mismatch |= (gw->l4e.l4 != l4p[guest_l4_table_offset(va)].l4);
   34.59 +    l3p = sh_map_domain_page(gw->l3mfn);
   34.60 +    mismatch |= (gw->l3e.l3 != l3p[guest_l3_table_offset(va)].l3);
   34.61 +    sh_unmap_domain_page(l3p);
   34.62 +#else
   34.63 +    mismatch |= (gw->l3e.l3 !=
   34.64 +                 v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)].l3);
   34.65 +#endif
   34.66 +    l2p = sh_map_domain_page(gw->l2mfn);
   34.67 +    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
   34.68 +    sh_unmap_domain_page(l2p);
   34.69 +#else
   34.70 +    l2p = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable;
   34.71 +    mismatch |= (gw->l2e.l2 != l2p[guest_l2_table_offset(va)].l2);
   34.72 +#endif
   34.73 +    if ( !(guest_supports_superpages(v) &&
   34.74 +           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
   34.75 +    {
   34.76 +        l1p = sh_map_domain_page(gw->l1mfn);
   34.77 +        mismatch |= (gw->l1e.l1 != l1p[guest_l1_table_offset(va)].l1);
   34.78 +        sh_unmap_domain_page(l1p);
   34.79 +    }
   34.80 +
   34.81 +    return !mismatch;
   34.82 +}
   34.83 +
   34.84 +/* Remove write access permissions from a gwalk_t in a batch, and
   34.85 + * return OR-ed result for TLB flush hint
   34.86 + */
   34.87 +static inline uint32_t
   34.88 +gw_remove_write_accesses(struct vcpu *v, unsigned long va, walk_t *gw)
   34.89 +{
   34.90 +    int rc = 0;
   34.91 +
   34.92 +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
   34.93 +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
   34.94 +    rc = sh_remove_write_access(v, gw->l3mfn, 3, va);
   34.95 +#endif
   34.96 +    rc |= sh_remove_write_access(v, gw->l2mfn, 2, va);
   34.97 +#endif
   34.98 +    if ( !(guest_supports_superpages(v) &&
   34.99 +           (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
  34.100 +        rc |= sh_remove_write_access(v, gw->l1mfn, 1, va);
  34.101 +
  34.102 +    return rc;
  34.103 +}
  34.104 +
  34.105  /* Walk the guest pagetables, after the manner of a hardware walker. 
  34.106   *
  34.107   * Inputs: a vcpu, a virtual address, a walk_t to fill, a 
  34.108 - *         pointer to a pagefault code, and a flag "shadow_op".
  34.109 + *         pointer to a pagefault code
  34.110   * 
  34.111   * We walk the vcpu's guest pagetables, filling the walk_t with what we
  34.112   * see and adding any Accessed and Dirty bits that are needed in the
  34.113 @@ -257,10 +336,9 @@ static uint32_t set_ad_bits(void *guest_
  34.114   * we go.  For the purposes of reading pagetables we treat all non-RAM
  34.115   * memory as contining zeroes.
  34.116   * 
  34.117 - * If "shadow_op" is non-zero, we are serving a genuine guest memory access, 
  34.118 - * and must (a) be under the shadow lock, and (b) remove write access
  34.119 - * from any guest PT pages we see, as we will be shadowing them soon
  34.120 - * and will rely on the contents' not having changed.
  34.121 + * The walk is done in a lock-free style, with some sanity check postponed
  34.122 + * after grabbing shadow lock later. Those delayed checks will make sure
  34.123 + * no inconsistent mapping being translated into shadow page table.
  34.124   * 
  34.125   * Returns 0 for success, or the set of permission bits that we failed on 
  34.126   * if the walk did not complete.
  34.127 @@ -268,8 +346,7 @@ static uint32_t set_ad_bits(void *guest_
  34.128   * checked the old return code anyway.
  34.129   */
  34.130  static uint32_t
  34.131 -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, 
  34.132 -                  uint32_t pfec, int shadow_op)
  34.133 +guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec)
  34.134  {
  34.135      struct domain *d = v->domain;
  34.136      p2m_type_t p2mt;
  34.137 @@ -282,12 +359,13 @@ guest_walk_tables(struct vcpu *v, unsign
  34.138      uint32_t gflags, mflags, rc = 0;
  34.139      int pse;
  34.140  
  34.141 -    ASSERT(!shadow_op || shadow_locked_by_me(d));
  34.142 -    
  34.143      perfc_incr(shadow_guest_walk);
  34.144      memset(gw, 0, sizeof(*gw));
  34.145      gw->va = va;
  34.146  
  34.147 +    gw->version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
  34.148 +    rmb();
  34.149 +
  34.150      /* Mandatory bits that must be set in every entry.  We invert NX, to
  34.151       * calculate as if there were an "X" bit that allowed access. 
  34.152       * We will accumulate, in rc, the set of flags that are missing. */
  34.153 @@ -312,9 +390,7 @@ guest_walk_tables(struct vcpu *v, unsign
  34.154          goto out;
  34.155      }
  34.156      ASSERT(mfn_valid(gw->l3mfn));
  34.157 -    /* This mfn is a pagetable: make sure the guest can't write to it. */
  34.158 -    if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
  34.159 -        flush_tlb_mask(d->domain_dirty_cpumask); 
  34.160 +
  34.161      /* Get the l3e and check its flags*/
  34.162      l3p = sh_map_domain_page(gw->l3mfn);
  34.163      gw->l3e = l3p[guest_l3_table_offset(va)];
  34.164 @@ -343,9 +419,7 @@ guest_walk_tables(struct vcpu *v, unsign
  34.165          goto out;
  34.166      }
  34.167      ASSERT(mfn_valid(gw->l2mfn));
  34.168 -    /* This mfn is a pagetable: make sure the guest can't write to it. */
  34.169 -    if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
  34.170 -        flush_tlb_mask(d->domain_dirty_cpumask); 
  34.171 +
  34.172      /* Get the l2e */
  34.173      l2p = sh_map_domain_page(gw->l2mfn);
  34.174      gw->l2e = l2p[guest_l2_table_offset(va)];
  34.175 @@ -403,10 +477,6 @@ guest_walk_tables(struct vcpu *v, unsign
  34.176              goto out;
  34.177          }
  34.178          ASSERT(mfn_valid(gw->l1mfn));
  34.179 -        /* This mfn is a pagetable: make sure the guest can't write to it. */
  34.180 -        if ( shadow_op 
  34.181 -             && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
  34.182 -            flush_tlb_mask(d->domain_dirty_cpumask); 
  34.183          l1p = sh_map_domain_page(gw->l1mfn);
  34.184          gw->l1e = l1p[guest_l1_table_offset(va)];
  34.185          gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
  34.186 @@ -548,8 +618,7 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  34.187      // XXX -- this is expensive, but it's easy to cobble together...
  34.188      // FIXME!
  34.189  
  34.190 -    shadow_lock(v->domain);
  34.191 -    if ( guest_walk_tables(v, addr, &gw, PFEC_page_present, 1) == 0 
  34.192 +    if ( guest_walk_tables(v, addr, &gw, PFEC_page_present) == 0 
  34.193           && mfn_valid(gw.l1mfn) )
  34.194      {
  34.195          if ( gl1mfn )
  34.196 @@ -558,8 +627,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  34.197              (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
  34.198      }
  34.199  
  34.200 -    shadow_unlock(v->domain);
  34.201 -
  34.202      return pl1e;
  34.203  }
  34.204  
  34.205 @@ -573,10 +640,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
  34.206      // XXX -- this is expensive, but it's easy to cobble together...
  34.207      // FIXME!
  34.208  
  34.209 -    shadow_lock(v->domain);
  34.210 -    (void) guest_walk_tables(v, addr, &gw, PFEC_page_present, 1);
  34.211 +    (void) guest_walk_tables(v, addr, &gw, PFEC_page_present);
  34.212      *(guest_l1e_t *)eff_l1e = gw.l1e;
  34.213 -    shadow_unlock(v->domain);
  34.214  }
  34.215  #endif /* CONFIG==SHADOW==GUEST */
  34.216  
  34.217 @@ -2842,14 +2907,12 @@ static int sh_page_fault(struct vcpu *v,
  34.218          return 0;
  34.219      }
  34.220  
  34.221 -    shadow_lock(d);
  34.222 -    
  34.223 -    shadow_audit_tables(v);
  34.224 -    
  34.225 -    if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
  34.226 +    if ( guest_walk_tables(v, va, &gw, regs->error_code) != 0 )
  34.227      {
  34.228          perfc_incr(shadow_fault_bail_real_fault);
  34.229 -        goto not_a_shadow_fault;
  34.230 +        SHADOW_PRINTK("not a shadow fault\n");
  34.231 +        reset_early_unshadow(v);
  34.232 +        return 0;
  34.233      }
  34.234  
  34.235      /* It's possible that the guest has put pagetables in memory that it has 
  34.236 @@ -2859,12 +2922,9 @@ static int sh_page_fault(struct vcpu *v,
  34.237      if ( unlikely(d->is_shutting_down) )
  34.238      {
  34.239          SHADOW_PRINTK("guest is shutting down\n");
  34.240 -        shadow_unlock(d);
  34.241          return 0;
  34.242      }
  34.243  
  34.244 -    sh_audit_gw(v, &gw);
  34.245 -
  34.246      /* What kind of access are we dealing with? */
  34.247      ft = ((regs->error_code & PFEC_write_access)
  34.248            ? ft_demand_write : ft_demand_read);
  34.249 @@ -2879,7 +2939,8 @@ static int sh_page_fault(struct vcpu *v,
  34.250          perfc_incr(shadow_fault_bail_bad_gfn);
  34.251          SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
  34.252                        gfn_x(gfn), mfn_x(gmfn));
  34.253 -        goto not_a_shadow_fault;
  34.254 +        reset_early_unshadow(v);
  34.255 +        return 0;
  34.256      }
  34.257  
  34.258  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
  34.259 @@ -2888,6 +2949,28 @@ static int sh_page_fault(struct vcpu *v,
  34.260                  regs->error_code | PFEC_page_present);
  34.261  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
  34.262  
  34.263 +    shadow_lock(d);
  34.264 +
  34.265 +    if ( gw_remove_write_accesses(v, va, &gw) )
  34.266 +    {
  34.267 +        /* Write permission removal is also a hint that other gwalks
  34.268 +         * overlapping with this one may be inconsistent
  34.269 +         */
  34.270 +        perfc_incr(shadow_rm_write_flush_tlb);
  34.271 +        atomic_inc(&d->arch.paging.shadow.gtable_dirty_version);
  34.272 +        flush_tlb_mask(d->domain_dirty_cpumask);
  34.273 +    }
  34.274 +
  34.275 +    if ( !shadow_check_gwalk(v, va, &gw) )
  34.276 +    {
  34.277 +        perfc_incr(shadow_inconsistent_gwalk);
  34.278 +        shadow_unlock(d);
  34.279 +        return EXCRET_fault_fixed;
  34.280 +    }
  34.281 +
  34.282 +    shadow_audit_tables(v);
  34.283 +    sh_audit_gw(v, &gw);
  34.284 +
  34.285      /* Make sure there is enough free shadow memory to build a chain of
  34.286       * shadow tables. (We never allocate a top-level shadow on this path,
  34.287       * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
  34.288 @@ -3223,7 +3306,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
  34.289          return vtlb_gfn;
  34.290  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
  34.291  
  34.292 -    if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
  34.293 +    if ( guest_walk_tables(v, va, &gw, pfec[0]) != 0 )
  34.294      {
  34.295          if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
  34.296              pfec[0] &= ~PFEC_page_present;
  34.297 @@ -4276,6 +4359,8 @@ static void emulate_unmap_dest(struct vc
  34.298      }
  34.299      else 
  34.300          sh_unmap_domain_page(addr);
  34.301 +
  34.302 +    atomic_inc(&v->domain->arch.paging.shadow.gtable_dirty_version);
  34.303  }
  34.304  
  34.305  int
  34.306 @@ -4430,29 +4515,13 @@ static char * sh_audit_flags(struct vcpu
  34.307      return NULL;
  34.308  }
  34.309  
  34.310 -static inline mfn_t
  34.311 -audit_gfn_to_mfn(struct vcpu *v, gfn_t gfn, mfn_t gmfn)
  34.312 -/* Convert this gfn to an mfn in the manner appropriate for the
  34.313 - * guest pagetable it's used in (gmfn) */ 
  34.314 -{
  34.315 -    p2m_type_t p2mt;
  34.316 -    if ( !shadow_mode_translate(v->domain) )
  34.317 -        return _mfn(gfn_x(gfn));
  34.318 -    
  34.319 -    if ( (mfn_to_page(gmfn)->u.inuse.type_info & PGT_type_mask)
  34.320 -         != PGT_writable_page ) 
  34.321 -        return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
  34.322 -    else 
  34.323 -        return gfn_to_mfn(v->domain, gfn, &p2mt);
  34.324 -} 
  34.325 -
  34.326 -
  34.327  int sh_audit_l1_table(struct vcpu *v, mfn_t sl1mfn, mfn_t x)
  34.328  {
  34.329      guest_l1e_t *gl1e, *gp;
  34.330      shadow_l1e_t *sl1e;
  34.331      mfn_t mfn, gmfn, gl1mfn;
  34.332      gfn_t gfn;
  34.333 +    p2m_type_t p2mt;
  34.334      char *s;
  34.335      int done = 0;
  34.336      
  34.337 @@ -4491,7 +4560,7 @@ int sh_audit_l1_table(struct vcpu *v, mf
  34.338              {
  34.339                  gfn = guest_l1e_get_gfn(*gl1e);
  34.340                  mfn = shadow_l1e_get_mfn(*sl1e);
  34.341 -                gmfn = audit_gfn_to_mfn(v, gfn, gl1mfn);
  34.342 +                gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  34.343                  if ( mfn_x(gmfn) != mfn_x(mfn) )
  34.344                      AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
  34.345                                 " --> %" PRI_mfn " != mfn %" PRI_mfn,
  34.346 @@ -4532,6 +4601,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  34.347      shadow_l2e_t *sl2e;
  34.348      mfn_t mfn, gmfn, gl2mfn;
  34.349      gfn_t gfn;
  34.350 +    p2m_type_t p2mt;
  34.351      char *s;
  34.352      int done = 0;
  34.353  
  34.354 @@ -4550,7 +4620,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  34.355              mfn = shadow_l2e_get_mfn(*sl2e);
  34.356              gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)  
  34.357                  ? get_fl1_shadow_status(v, gfn)
  34.358 -                : get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl2mfn), 
  34.359 +                : get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), 
  34.360                                      SH_type_l1_shadow);
  34.361              if ( mfn_x(gmfn) != mfn_x(mfn) )
  34.362                  AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
  34.363 @@ -4558,7 +4628,7 @@ int sh_audit_l2_table(struct vcpu *v, mf
  34.364                             " --> %" PRI_mfn " != mfn %" PRI_mfn,
  34.365                             gfn_x(gfn), 
  34.366                             (guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
  34.367 -                           : mfn_x(audit_gfn_to_mfn(v, gfn, gl2mfn)),
  34.368 +                           : mfn_x(gfn_to_mfn(v->domain, gfn, &p2mt)),
  34.369                             mfn_x(gmfn), mfn_x(mfn));
  34.370          }
  34.371      });
  34.372 @@ -4573,6 +4643,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
  34.373      shadow_l3e_t *sl3e;
  34.374      mfn_t mfn, gmfn, gl3mfn;
  34.375      gfn_t gfn;
  34.376 +    p2m_type_t p2mt;
  34.377      char *s;
  34.378      int done = 0;
  34.379  
  34.380 @@ -4589,7 +4660,7 @@ int sh_audit_l3_table(struct vcpu *v, mf
  34.381          {
  34.382              gfn = guest_l3e_get_gfn(*gl3e);
  34.383              mfn = shadow_l3e_get_mfn(*sl3e);
  34.384 -            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl3mfn), 
  34.385 +            gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), 
  34.386                                       ((GUEST_PAGING_LEVELS == 3 ||
  34.387                                         is_pv_32on64_vcpu(v))
  34.388                                        && !shadow_mode_external(v->domain)
  34.389 @@ -4612,6 +4683,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
  34.390      shadow_l4e_t *sl4e;
  34.391      mfn_t mfn, gmfn, gl4mfn;
  34.392      gfn_t gfn;
  34.393 +    p2m_type_t p2mt;
  34.394      char *s;
  34.395      int done = 0;
  34.396  
  34.397 @@ -4628,7 +4700,7 @@ int sh_audit_l4_table(struct vcpu *v, mf
  34.398          {
  34.399              gfn = guest_l4e_get_gfn(*gl4e);
  34.400              mfn = shadow_l4e_get_mfn(*sl4e);
  34.401 -            gmfn = get_shadow_status(v, audit_gfn_to_mfn(v, gfn, gl4mfn), 
  34.402 +            gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt), 
  34.403                                       SH_type_l3_shadow);
  34.404              if ( mfn_x(gmfn) != mfn_x(mfn) )
  34.405                  AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
    35.1 --- a/xen/arch/x86/mm/shadow/types.h	Fri Feb 29 09:18:01 2008 -0700
    35.2 +++ b/xen/arch/x86/mm/shadow/types.h	Fri Feb 29 09:19:58 2008 -0700
    35.3 @@ -435,6 +435,7 @@ struct shadow_walk_t
    35.4  #endif
    35.5      mfn_t l2mfn;                /* MFN that the level 2 entry was in */
    35.6      mfn_t l1mfn;                /* MFN that the level 1 entry was in */
    35.7 +    int version;                /* Saved guest dirty version */
    35.8  };
    35.9  
   35.10  /* macros for dealing with the naming of the internal function names of the
    36.1 --- a/xen/arch/x86/x86_32/Makefile	Fri Feb 29 09:18:01 2008 -0700
    36.2 +++ b/xen/arch/x86/x86_32/Makefile	Fri Feb 29 09:19:58 2008 -0700
    36.3 @@ -4,6 +4,7 @@ obj-y += gpr_switch.o
    36.4  obj-y += mm.o
    36.5  obj-y += seg_fixup.o
    36.6  obj-y += traps.o
    36.7 +obj-y += machine_kexec.o
    36.8  
    36.9  obj-$(crash_debug) += gdbstub.o
   36.10  
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/xen/arch/x86/x86_32/machine_kexec.c	Fri Feb 29 09:19:58 2008 -0700
    37.3 @@ -0,0 +1,33 @@
    37.4 +/******************************************************************************
    37.5 + * machine_kexec.c
    37.6 + *
    37.7 + * Xen port written by:
    37.8 + * - Simon 'Horms' Horman <horms@verge.net.au>
    37.9 + * - Magnus Damm <magnus@valinux.co.jp>
   37.10 + */
   37.11 +
   37.12 +#ifndef CONFIG_COMPAT
   37.13 +
   37.14 +#include <xen/types.h>
   37.15 +#include <xen/kernel.h>
   37.16 +#include <asm/page.h>
   37.17 +#include <public/kexec.h>
   37.18 +
   37.19 +int machine_kexec_get_xen(xen_kexec_range_t *range)
   37.20 +{
   37.21 +        range->start = virt_to_maddr(_start);
   37.22 +        range->size = (unsigned long)xenheap_phys_end -
   37.23 +                      (unsigned long)range->start;
   37.24 +        return 0;
   37.25 +}
   37.26 +#endif
   37.27 +
   37.28 +/*
   37.29 + * Local variables:
   37.30 + * mode: C
   37.31 + * c-set-style: "BSD"
   37.32 + * c-basic-offset: 4
   37.33 + * tab-width: 4
   37.34 + * indent-tabs-mode: nil
   37.35 + * End:
   37.36 + */
    38.1 --- a/xen/arch/x86/x86_64/Makefile	Fri Feb 29 09:18:01 2008 -0700
    38.2 +++ b/xen/arch/x86/x86_64/Makefile	Fri Feb 29 09:19:58 2008 -0700
    38.3 @@ -4,6 +4,7 @@ obj-y += entry.o
    38.4  obj-y += gpr_switch.o
    38.5  obj-y += mm.o
    38.6  obj-y += traps.o
    38.7 +obj-y += machine_kexec.o
    38.8  
    38.9  obj-$(crash_debug)   += gdbstub.o
   38.10  obj-$(CONFIG_COMPAT) += compat.o
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/xen/arch/x86/x86_64/machine_kexec.c	Fri Feb 29 09:19:58 2008 -0700
    39.3 @@ -0,0 +1,32 @@
    39.4 +/******************************************************************************
    39.5 + * machine_kexec.c
    39.6 + *
    39.7 + * Xen port written by:
    39.8 + * - Simon 'Horms' Horman <horms@verge.net.au>
    39.9 + * - Magnus Damm <magnus@valinux.co.jp>
   39.10 + */
   39.11 +
   39.12 +#ifndef CONFIG_COMPAT
   39.13 +
   39.14 +#include <xen/types.h>
   39.15 +#include <asm/page.h>
   39.16 +#include <public/kexec.h>
   39.17 +
   39.18 +int machine_kexec_get_xen(xen_kexec_range_t *range)
   39.19 +{
   39.20 +        range->start = xenheap_phys_start;
   39.21 +        range->size = (unsigned long)xenheap_phys_end -
   39.22 +                      (unsigned long)range->start;
   39.23 +        return 0;
   39.24 +}
   39.25 +#endif
   39.26 +
   39.27 +/*
   39.28 + * Local variables:
   39.29 + * mode: C
   39.30 + * c-set-style: "BSD"
   39.31 + * c-basic-offset: 4
   39.32 + * tab-width: 4
   39.33 + * indent-tabs-mode: nil
   39.34 + * End:
   39.35 + */
    40.1 --- a/xen/common/compat/kexec.c	Fri Feb 29 09:18:01 2008 -0700
    40.2 +++ b/xen/common/compat/kexec.c	Fri Feb 29 09:19:58 2008 -0700
    40.3 @@ -9,11 +9,6 @@
    40.4  
    40.5  #define do_kexec_op compat_kexec_op
    40.6  
    40.7 -#undef kexec_get
    40.8 -#define kexec_get(x)      compat_kexec_get_##x
    40.9 -#define xen_kexec_range   compat_kexec_range
   40.10 -#define xen_kexec_range_t compat_kexec_range_t
   40.11 -
   40.12  #define kexec_load_unload compat_kexec_load_unload
   40.13  #define xen_kexec_load    compat_kexec_load
   40.14  #define xen_kexec_load_t  compat_kexec_load_t
    41.1 --- a/xen/common/kexec.c	Fri Feb 29 09:18:01 2008 -0700
    41.2 +++ b/xen/common/kexec.c	Fri Feb 29 09:19:58 2008 -0700
    41.3 @@ -20,6 +20,7 @@
    41.4  #include <xen/spinlock.h>
    41.5  #include <xen/version.h>
    41.6  #include <xen/console.h>
    41.7 +#include <xen/kexec.h>
    41.8  #include <public/elfnote.h>
    41.9  #include <xsm/xsm.h>
   41.10  
   41.11 @@ -153,11 +154,7 @@ static int sizeof_note(const char *name,
   41.12              ELFNOTE_ALIGN(descsz));
   41.13  }
   41.14  
   41.15 -#define kexec_get(x)      kexec_get_##x
   41.16 -
   41.17 -#endif
   41.18 -
   41.19 -static int kexec_get(reserve)(xen_kexec_range_t *range)
   41.20 +static int kexec_get_reserve(xen_kexec_range_t *range)
   41.21  {
   41.22      if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
   41.23          range->start = kexec_crash_area.start;
   41.24 @@ -168,18 +165,7 @@ static int kexec_get(reserve)(xen_kexec_
   41.25      return 0;
   41.26  }
   41.27  
   41.28 -static int kexec_get(xen)(xen_kexec_range_t *range)
   41.29 -{
   41.30 -#ifdef CONFIG_X86_64
   41.31 -    range->start = xenheap_phys_start;
   41.32 -#else
   41.33 -    range->start = virt_to_maddr(_start);
   41.34 -#endif
   41.35 -    range->size = (unsigned long)xenheap_phys_end - (unsigned long)range->start;
   41.36 -    return 0;
   41.37 -}
   41.38 -
   41.39 -static int kexec_get(cpu)(xen_kexec_range_t *range)
   41.40 +static int kexec_get_cpu(xen_kexec_range_t *range)
   41.41  {
   41.42      int nr = range->nr;
   41.43      int nr_bytes = 0;
   41.44 @@ -223,7 +209,27 @@ static int kexec_get(cpu)(xen_kexec_rang
   41.45      return 0;
   41.46  }
   41.47  
   41.48 -static int kexec_get(range)(XEN_GUEST_HANDLE(void) uarg)
   41.49 +static int kexec_get_range_internal(xen_kexec_range_t *range)
   41.50 +{
   41.51 +    int ret = -EINVAL;
   41.52 +
   41.53 +    switch ( range->range )
   41.54 +    {
   41.55 +    case KEXEC_RANGE_MA_CRASH:
   41.56 +        ret = kexec_get_reserve(range);
   41.57 +        break;
   41.58 +    case KEXEC_RANGE_MA_CPU:
   41.59 +        ret = kexec_get_cpu(range);
   41.60 +        break;
   41.61 +    default:
   41.62 +        ret = machine_kexec_get(range);
   41.63 +        break;
   41.64 +    }
   41.65 +
   41.66 +    return ret;
   41.67 +}
   41.68 +
   41.69 +static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
   41.70  {
   41.71      xen_kexec_range_t range;
   41.72      int ret = -EINVAL;
   41.73 @@ -231,18 +237,7 @@ static int kexec_get(range)(XEN_GUEST_HA
   41.74      if ( unlikely(copy_from_guest(&range, uarg, 1)) )
   41.75          return -EFAULT;
   41.76  
   41.77 -    switch ( range.range )
   41.78 -    {
   41.79 -    case KEXEC_RANGE_MA_CRASH:
   41.80 -        ret = kexec_get(reserve)(&range);
   41.81 -        break;
   41.82 -    case KEXEC_RANGE_MA_XEN:
   41.83 -        ret = kexec_get(xen)(&range);
   41.84 -        break;
   41.85 -    case KEXEC_RANGE_MA_CPU:
   41.86 -        ret = kexec_get(cpu)(&range);
   41.87 -        break;
   41.88 -    }
   41.89 +    ret = kexec_get_range_internal(&range);
   41.90  
   41.91      if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
   41.92          return -EFAULT;
   41.93 @@ -250,6 +245,42 @@ static int kexec_get(range)(XEN_GUEST_HA
   41.94      return ret;
   41.95  }
   41.96  
   41.97 +#else /* COMPAT */
   41.98 +
   41.99 +#ifdef CONFIG_COMPAT
  41.100 +static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
  41.101 +{
  41.102 +    xen_kexec_range_t range;
  41.103 +    compat_kexec_range_t compat_range;
  41.104 +    int ret = -EINVAL;
  41.105 +
  41.106 +    if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
  41.107 +        return -EFAULT;
  41.108 +
  41.109 +    range.range = compat_range.range;
  41.110 +    range.nr = compat_range.nr;
  41.111 +    range.size = compat_range.size;
  41.112 +    range.start = compat_range.start;
  41.113 +
  41.114 +    ret = kexec_get_range_internal(&range);
  41.115 +
  41.116 +    if ( ret == 0 ) {
  41.117 +        range.range = compat_range.range;
  41.118 +        range.nr = compat_range.nr;
  41.119 +        range.size = compat_range.size;
  41.120 +        range.start = compat_range.start;
  41.121 +
  41.122 +        if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
  41.123 +             return -EFAULT;
  41.124 +    }
  41.125 +
  41.126 +    return ret;
  41.127 +}
  41.128 +#endif /* CONFIG_COMPAT */
  41.129 +
  41.130 +#endif /* COMPAT */
  41.131 +
  41.132 +
  41.133  #ifndef COMPAT
  41.134  
  41.135  static int kexec_load_get_bits(int type, int *base, int *bit)
  41.136 @@ -375,7 +406,11 @@ ret_t do_kexec_op(unsigned long op, XEN_
  41.137      switch ( op )
  41.138      {
  41.139      case KEXEC_CMD_kexec_get_range:
  41.140 -        ret = kexec_get(range)(uarg);
  41.141 +#ifndef COMPAT
  41.142 +        ret = kexec_get_range(uarg);
  41.143 +#else
  41.144 +        ret = kexec_get_range_compat(uarg);
  41.145 +#endif
  41.146          break;
  41.147      case KEXEC_CMD_kexec_load:
  41.148      case KEXEC_CMD_kexec_unload:
    42.1 --- a/xen/drivers/acpi/tables.c	Fri Feb 29 09:18:01 2008 -0700
    42.2 +++ b/xen/drivers/acpi/tables.c	Fri Feb 29 09:19:58 2008 -0700
    42.3 @@ -60,6 +60,7 @@ static char *acpi_table_signatures[ACPI_
    42.4  	[ACPI_HPET] = "HPET",
    42.5  	[ACPI_MCFG] = "MCFG",
    42.6  	[ACPI_DMAR] = "DMAR",
    42.7 +	[ACPI_IVRS] = "IVRS",
    42.8  };
    42.9  
   42.10  static char *mps_inti_flags_polarity[] = { "dfl", "high", "res", "low" };
    43.1 --- a/xen/drivers/passthrough/amd/Makefile	Fri Feb 29 09:18:01 2008 -0700
    43.2 +++ b/xen/drivers/passthrough/amd/Makefile	Fri Feb 29 09:19:58 2008 -0700
    43.3 @@ -2,3 +2,4 @@ obj-y += iommu_detect.o
    43.4  obj-y += iommu_init.o
    43.5  obj-y += iommu_map.o
    43.6  obj-y += pci_amd_iommu.o
    43.7 +obj-y += iommu_acpi.o
    44.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    44.2 +++ b/xen/drivers/passthrough/amd/iommu_acpi.c	Fri Feb 29 09:19:58 2008 -0700
    44.3 @@ -0,0 +1,874 @@
    44.4 +/*
    44.5 + * Copyright (C) 2007 Advanced Micro Devices, Inc.
    44.6 + * Author: Leo Duran <leo.duran@amd.com>
    44.7 + * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
    44.8 + *
    44.9 + * This program is free software; you can redistribute it and/or modify
   44.10 + * it under the terms of the GNU General Public License as published by
   44.11 + * the Free Software Foundation; either version 2 of the License, or
   44.12 + * (at your option) any later version.
   44.13 + *
   44.14 + * This program is distributed in the hope that it will be useful,
   44.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   44.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   44.17 + * GNU General Public License for more details.
   44.18 + *
   44.19 + * You should have received a copy of the GNU General Public License
   44.20 + * along with this program; if not, write to the Free Software
   44.21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   44.22 + */
   44.23 +
   44.24 +#include <xen/config.h>
   44.25 +#include <xen/errno.h>
   44.26 +#include <asm/amd-iommu.h>
   44.27 +#include <asm/hvm/svm/amd-iommu-proto.h>
   44.28 +#include <asm/hvm/svm/amd-iommu-acpi.h>
   44.29 +
   44.30 +extern unsigned long amd_iommu_page_entries;
   44.31 +extern unsigned short ivrs_bdf_entries;
   44.32 +extern struct ivrs_mappings *ivrs_mappings;
   44.33 +
   44.34 +static struct amd_iommu * __init find_iommu_from_bdf_cap(
   44.35 +           u16 bdf, u8 cap_offset)
   44.36 +{
   44.37 +    struct amd_iommu *iommu;
   44.38 +
   44.39 +    for_each_amd_iommu( iommu )
   44.40 +        if ( iommu->bdf == bdf && iommu->cap_offset == cap_offset )
   44.41 +            return iommu;
   44.42 +
   44.43 +    return NULL;
   44.44 +}
   44.45 +
   44.46 +static void __init reserve_iommu_exclusion_range(
   44.47 +    struct amd_iommu *iommu, uint64_t base, uint64_t limit)
   44.48 +{
   44.49 +    /* need to extend exclusion range? */
   44.50 +    if ( iommu->exclusion_enable )
   44.51 +    {
   44.52 +        if ( iommu->exclusion_base < base )
   44.53 +            base = iommu->exclusion_base;
   44.54 +        if ( iommu->exclusion_limit > limit )
   44.55 +            limit = iommu->exclusion_limit;
   44.56 +    }
   44.57 +
   44.58 +    iommu->exclusion_enable = IOMMU_CONTROL_ENABLED;
   44.59 +    iommu->exclusion_base = base;
   44.60 +    iommu->exclusion_limit = limit;
   44.61 +}
   44.62 +
   44.63 +static void __init reserve_iommu_exclusion_range_all(struct amd_iommu *iommu,
   44.64 +           unsigned long base, unsigned long limit)
   44.65 +{
   44.66 +    reserve_iommu_exclusion_range(iommu, base, limit);
   44.67 +    iommu->exclusion_allow_all = IOMMU_CONTROL_ENABLED;
   44.68 +}
   44.69 +
   44.70 +static void __init reserve_unity_map_for_device(u16 bdf, unsigned long base,
   44.71 +           unsigned long length, u8 iw, u8 ir)
   44.72 +{
   44.73 +    unsigned long old_top, new_top;
   44.74 +
   44.75 +    /* need to extend unity-mapped range? */
   44.76 +    if ( ivrs_mappings[bdf].unity_map_enable )
   44.77 +    {
   44.78 +        old_top = ivrs_mappings[bdf].addr_range_start +
   44.79 +            ivrs_mappings[bdf].addr_range_length;
   44.80 +        new_top = base + length;
   44.81 +        if ( old_top > new_top )
   44.82 +            new_top = old_top;
   44.83 +        if ( ivrs_mappings[bdf].addr_range_start < base )
   44.84 +            base = ivrs_mappings[bdf].addr_range_start;
   44.85 +        length = new_top - base;
   44.86 +   }
   44.87 +
   44.88 +    /* extend r/w permissioms and keep aggregate */
   44.89 +    if ( iw )
   44.90 +        ivrs_mappings[bdf].write_permission = IOMMU_CONTROL_ENABLED;
   44.91 +    if ( ir )
   44.92 +        ivrs_mappings[bdf].read_permission = IOMMU_CONTROL_ENABLED;
   44.93 +    ivrs_mappings[bdf].unity_map_enable = IOMMU_CONTROL_ENABLED;
   44.94 +    ivrs_mappings[bdf].addr_range_start = base;
   44.95 +    ivrs_mappings[bdf].addr_range_length = length;
   44.96 +}
   44.97 +
   44.98 +static int __init register_exclusion_range_for_all_devices(
   44.99 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.100 +{
  44.101 +    unsigned long range_top, iommu_top, length;
  44.102 +    struct amd_iommu *iommu;
  44.103 +    u16 bdf;
  44.104 +
  44.105 +    /* is part of exclusion range inside of IOMMU virtual address space? */
  44.106 +    /* note: 'limit' parameter is assumed to be page-aligned */
  44.107 +    range_top = limit + PAGE_SIZE;
  44.108 +    iommu_top = max_page * PAGE_SIZE;
  44.109 +    if ( base < iommu_top )
  44.110 +    {
  44.111 +        if (range_top > iommu_top)
  44.112 +            range_top = iommu_top;
  44.113 +        length = range_top - base;
  44.114 +        /* reserve r/w unity-mapped page entries for devices */
  44.115 +        /* note: these entries are part of the exclusion range */
  44.116 +        for (bdf = 0; bdf < ivrs_bdf_entries; ++bdf)
  44.117 +            reserve_unity_map_for_device(bdf, base, length, iw, ir);
  44.118 +        /* push 'base' just outside of virtual address space */
  44.119 +        base = iommu_top;
  44.120 +    }
  44.121 +    /* register IOMMU exclusion range settings */
  44.122 +    if (limit >= iommu_top)
  44.123 +    {
  44.124 +        for_each_amd_iommu( iommu )
  44.125 +            reserve_iommu_exclusion_range_all(iommu, base, limit);
  44.126 +    }
  44.127 +
  44.128 +    return 0;
  44.129 +}
  44.130 +
  44.131 +static int __init register_exclusion_range_for_device(u16 bdf,
  44.132 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.133 +{
  44.134 +    unsigned long range_top, iommu_top, length;
  44.135 +    struct amd_iommu *iommu;
  44.136 +    u16 bus, devfn, req;
  44.137 +
  44.138 +    bus = bdf >> 8;
  44.139 +    devfn = bdf & 0xFF;
  44.140 +    iommu = find_iommu_for_device(bus, devfn);
  44.141 +    if ( !iommu )
  44.142 +    {
  44.143 +        dprintk(XENLOG_ERR, "IVMD Error: No IOMMU for Dev_Id 0x%x!\n", bdf);
  44.144 +        return -ENODEV;
  44.145 +    }
  44.146 +    req = ivrs_mappings[bdf].dte_requestor_id;
  44.147 +
  44.148 +    /* note: 'limit' parameter is assumed to be page-aligned */
  44.149 +    range_top = limit + PAGE_SIZE;
  44.150 +    iommu_top = max_page * PAGE_SIZE;
  44.151 +    if ( base < iommu_top )
  44.152 +    {
  44.153 +        if (range_top > iommu_top)
  44.154 +            range_top = iommu_top;
  44.155 +        length = range_top - base;
  44.156 +        /* reserve unity-mapped page entries for device */
  44.157 +        /* note: these entries are part of the exclusion range */
  44.158 +        reserve_unity_map_for_device(bdf, base, length, iw, ir);
  44.159 +        reserve_unity_map_for_device(req, base, length, iw, ir);
  44.160 +
  44.161 +        /* push 'base' just outside of virtual address space */
  44.162 +        base = iommu_top;
  44.163 +    }
  44.164 +
  44.165 +   /* register IOMMU exclusion range settings for device */
  44.166 +   if ( limit >= iommu_top  )
  44.167 +    {
  44.168 +        reserve_iommu_exclusion_range(iommu, base, limit);
  44.169 +        ivrs_mappings[bdf].dte_allow_exclusion = IOMMU_CONTROL_ENABLED;
  44.170 +        ivrs_mappings[req].dte_allow_exclusion = IOMMU_CONTROL_ENABLED;
  44.171 +    }
  44.172 +
  44.173 +    return 0;
  44.174 +}
  44.175 +
  44.176 +static int __init register_exclusion_range_for_iommu_devices(
  44.177 +           struct amd_iommu *iommu,
  44.178 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.179 +{
  44.180 +    unsigned long range_top, iommu_top, length;
  44.181 +    u16 bus, devfn, bdf, req;
  44.182 +
  44.183 +    /* is part of exclusion range inside of IOMMU virtual address space? */
  44.184 +    /* note: 'limit' parameter is assumed to be page-aligned */
  44.185 +    range_top = limit + PAGE_SIZE;
  44.186 +    iommu_top = max_page * PAGE_SIZE;
  44.187 +    if ( base < iommu_top )
  44.188 +    {
  44.189 +        if (range_top > iommu_top)
  44.190 +            range_top = iommu_top;
  44.191 +        length = range_top - base;
  44.192 +        /* reserve r/w unity-mapped page entries for devices */
  44.193 +        /* note: these entries are part of the exclusion range */
  44.194 +        for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
  44.195 +        {
  44.196 +            bus = bdf >> 8;
  44.197 +            devfn = bdf & 0xFF;
  44.198 +            if ( iommu == find_iommu_for_device(bus, devfn) )
  44.199 +            {
  44.200 +                reserve_unity_map_for_device(bdf, base, length, iw, ir);
  44.201 +                req = ivrs_mappings[bdf].dte_requestor_id;
  44.202 +                reserve_unity_map_for_device(req, base, length, iw, ir);
  44.203 +            }
  44.204 +        }
  44.205 +
  44.206 +        /* push 'base' just outside of virtual address space */
  44.207 +        base = iommu_top;
  44.208 +    }
  44.209 +
  44.210 +    /* register IOMMU exclusion range settings */
  44.211 +    if (limit >= iommu_top)
  44.212 +        reserve_iommu_exclusion_range_all(iommu, base, limit);
  44.213 +    return 0;
  44.214 +}
  44.215 +
  44.216 +static int __init parse_ivmd_device_select(
  44.217 +           struct acpi_ivmd_block_header *ivmd_block,
  44.218 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.219 +{
  44.220 +    u16 bdf;
  44.221 +
  44.222 +    bdf = ivmd_block->header.dev_id;
  44.223 +    if (bdf >= ivrs_bdf_entries)
  44.224 +    {
  44.225 +        dprintk(XENLOG_ERR, "IVMD Error: Invalid Dev_Id 0x%x\n", bdf);
  44.226 +        return -ENODEV;
  44.227 +    }
  44.228 +
  44.229 +    return register_exclusion_range_for_device(bdf, base, limit, iw, ir);
  44.230 +}
  44.231 +
  44.232 +static int __init parse_ivmd_device_range(
  44.233 +           struct acpi_ivmd_block_header *ivmd_block,
  44.234 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.235 +{
  44.236 +    u16 first_bdf, last_bdf, bdf;
  44.237 +    int error;
  44.238 +
  44.239 +    first_bdf = ivmd_block->header.dev_id;
  44.240 +    if (first_bdf >= ivrs_bdf_entries)
  44.241 +    {
  44.242 +       dprintk(XENLOG_ERR, "IVMD Error: "
  44.243 +                    "Invalid Range_First Dev_Id 0x%x\n", first_bdf);
  44.244 +       return -ENODEV;
  44.245 +    }
  44.246 +
  44.247 +    last_bdf = ivmd_block->last_dev_id;
  44.248 +    if (last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf)
  44.249 +    {
  44.250 +        dprintk(XENLOG_ERR, "IVMD Error: "
  44.251 +                    "Invalid Range_Last Dev_Id 0x%x\n", last_bdf);
  44.252 +        return -ENODEV;
  44.253 +    }
  44.254 +
  44.255 +      dprintk(XENLOG_ERR, " Dev_Id Range: 0x%x -> 0x%x\n",
  44.256 +                    first_bdf, last_bdf);
  44.257 +
  44.258 +    for ( bdf = first_bdf, error = 0;
  44.259 +       bdf <= last_bdf && !error; ++bdf )
  44.260 +    {
  44.261 +       error = register_exclusion_range_for_device(
  44.262 +                     bdf, base, limit, iw, ir);
  44.263 +    }
  44.264 +
  44.265 +   return error;
  44.266 +}
  44.267 +
  44.268 +static int __init parse_ivmd_device_iommu(
  44.269 +           struct acpi_ivmd_block_header *ivmd_block,
  44.270 +           unsigned long base, unsigned long limit, u8 iw, u8 ir)
  44.271 +{
  44.272 +    struct amd_iommu *iommu;
  44.273 +
  44.274 +    /* find target IOMMU */
  44.275 +    iommu = find_iommu_from_bdf_cap(ivmd_block->header.dev_id,
  44.276 +                                    ivmd_block->cap_offset);
  44.277 +    if ( !iommu )
  44.278 +    {
  44.279 +       dprintk(XENLOG_ERR,
  44.280 +           "IVMD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
  44.281 +            ivmd_block->header.dev_id, ivmd_block->cap_offset);
  44.282 +       return -ENODEV;
  44.283 +    }
  44.284 +
  44.285 +    return register_exclusion_range_for_iommu_devices(
  44.286 +                 iommu, base, limit, iw, ir);
  44.287 +}
  44.288 +
  44.289 +static int __init parse_ivmd_block(struct acpi_ivmd_block_header *ivmd_block)
  44.290 +{
  44.291 +    unsigned long start_addr, mem_length, base, limit;
  44.292 +    u8 iw, ir;
  44.293 +
  44.294 +    if (ivmd_block->header.length <
  44.295 +       sizeof(struct acpi_ivmd_block_header))
  44.296 +    {
  44.297 +       dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Length!\n");
  44.298 +       return -ENODEV;
  44.299 +    }
  44.300 +
  44.301 +    start_addr = (unsigned long)ivmd_block->start_addr;
  44.302 +    mem_length = (unsigned long)ivmd_block->mem_length;
  44.303 +    base = start_addr & PAGE_MASK;
  44.304 +    limit = (start_addr + mem_length - 1) & PAGE_MASK;
  44.305 +
  44.306 +    dprintk(XENLOG_INFO, "IVMD Block: Type 0x%x\n",
  44.307 +                  ivmd_block->header.type);
  44.308 +    dprintk(XENLOG_INFO, " Start_Addr_Phys 0x%lx\n", start_addr);
  44.309 +    dprintk(XENLOG_INFO, " Mem_Length 0x%lx\n", mem_length);
  44.310 +
  44.311 +    if ( get_field_from_byte(ivmd_block->header.flags,
  44.312 +                             AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK,
  44.313 +                             AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT) )
  44.314 +        iw = ir = IOMMU_CONTROL_ENABLED;
  44.315 +    else if ( get_field_from_byte(ivmd_block->header.flags,
  44.316 +                                  AMD_IOMMU_ACPI_UNITY_MAPPING_MASK,
  44.317 +                                  AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT) )
  44.318 +    {
  44.319 +        iw = get_field_from_byte(ivmd_block->header.flags,
  44.320 +                                 AMD_IOMMU_ACPI_IW_PERMISSION_MASK,
  44.321 +                                 AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT);
  44.322 +        ir = get_field_from_byte(ivmd_block->header.flags,
  44.323 +                                 AMD_IOMMU_ACPI_IR_PERMISSION_MASK,
  44.324 +                                 AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT);
  44.325 +    }
  44.326 +    else
  44.327 +    {
  44.328 +       dprintk(KERN_ERR, "IVMD Error: Invalid Flag Field!\n");
  44.329 +       return -ENODEV;
  44.330 +    }
  44.331 +
  44.332 +    switch( ivmd_block->header.type )
  44.333 +    {
  44.334 +    case AMD_IOMMU_ACPI_IVMD_ALL_TYPE:
  44.335 +        return register_exclusion_range_for_all_devices(
  44.336 +           base, limit, iw, ir);
  44.337 +
  44.338 +    case AMD_IOMMU_ACPI_IVMD_ONE_TYPE:
  44.339 +        return parse_ivmd_device_select(ivmd_block,
  44.340 +           base, limit, iw, ir);
  44.341 +
  44.342 +    case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE:
  44.343 +        return parse_ivmd_device_range(ivmd_block,
  44.344 +            base, limit, iw, ir);
  44.345 +
  44.346 +    case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE:
  44.347 +        return parse_ivmd_device_iommu(ivmd_block,
  44.348 +           base, limit, iw, ir);
  44.349 +
  44.350 +    default:
  44.351 +        dprintk(XENLOG_ERR, "IVMD Error: Invalid Block Type!\n");
  44.352 +        return -ENODEV;
  44.353 +    }
  44.354 +}
  44.355 +
  44.356 +static u16 __init parse_ivhd_device_padding(u16 pad_length,
  44.357 +           u16 header_length, u16 block_length)
  44.358 +{
  44.359 +    if ( header_length < (block_length + pad_length) )
  44.360 +    {
  44.361 +        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
  44.362 +        return 0;
  44.363 +    }
  44.364 +
  44.365 +    return pad_length;
  44.366 +}
  44.367 +
  44.368 +static u16 __init parse_ivhd_device_select(
  44.369 +           union acpi_ivhd_device *ivhd_device)
  44.370 +{
  44.371 +    u16 bdf;
  44.372 +
  44.373 +    bdf = ivhd_device->header.dev_id;
  44.374 +    if ( bdf >= ivrs_bdf_entries )
  44.375 +    {
  44.376 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.377 +                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  44.378 +        return 0;
  44.379 +    }
  44.380 +
  44.381 +    /* override flags for device */
  44.382 +    ivrs_mappings[bdf].dte_sys_mgt_enable =
  44.383 +        get_field_from_byte(ivhd_device->header.flags,
  44.384 +                            AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.385 +                            AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.386 +
  44.387 +    return sizeof(struct acpi_ivhd_device_header);
  44.388 +}
  44.389 +
  44.390 +static u16 __init parse_ivhd_device_range(
  44.391 +           union acpi_ivhd_device *ivhd_device,
  44.392 +           u16 header_length, u16 block_length)
  44.393 +{
  44.394 +    u16 dev_length, first_bdf, last_bdf, bdf;
  44.395 +    u8 sys_mgt;
  44.396 +
  44.397 +    dev_length = sizeof(struct acpi_ivhd_device_range);
  44.398 +    if ( header_length < (block_length + dev_length) )
  44.399 +    {
  44.400 +        dprintk(XENLOG_ERR, "IVHD Error: Invalid Device_Entry Length!\n");
  44.401 +        return 0;
  44.402 +    }
  44.403 +
  44.404 +    if ( ivhd_device->range.trailer.type !=
  44.405 +        AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END) {
  44.406 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.407 +                "Invalid Range: End_Type 0x%x\n",
  44.408 +                ivhd_device->range.trailer.type);
  44.409 +        return 0;
  44.410 +    }
  44.411 +
  44.412 +    first_bdf = ivhd_device->header.dev_id;
  44.413 +    if ( first_bdf >= ivrs_bdf_entries )
  44.414 +    {
  44.415 +       dprintk(XENLOG_ERR, "IVHD Error: "
  44.416 +           "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  44.417 +       return 0;
  44.418 +    }
  44.419 +
  44.420 +    last_bdf = ivhd_device->range.trailer.dev_id;
  44.421 +    if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
  44.422 +    {
  44.423 +       dprintk(XENLOG_ERR, "IVHD Error: "
  44.424 +           "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  44.425 +       return 0;
  44.426 +    }
  44.427 +
  44.428 +    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  44.429 +        first_bdf, last_bdf);
  44.430 +
  44.431 +    /* override flags for range of devices */
  44.432 +    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
  44.433 +                                 AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.434 +                                 AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.435 +    for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
  44.436 +        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
  44.437 +
  44.438 +    return dev_length;
  44.439 +}
  44.440 +
  44.441 +static u16 __init parse_ivhd_device_alias(
  44.442 +           union acpi_ivhd_device *ivhd_device,
  44.443 +           u16 header_length, u16 block_length)
  44.444 +{
  44.445 +    u16 dev_length, alias_id, bdf;
  44.446 +
  44.447 +    dev_length = sizeof(struct acpi_ivhd_device_alias);
  44.448 +    if ( header_length < (block_length + dev_length) )
  44.449 +    {
  44.450 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.451 +            "Invalid Device_Entry Length!\n");
  44.452 +        return 0;
  44.453 +    }
  44.454 +
  44.455 +    bdf = ivhd_device->header.dev_id;
  44.456 +    if ( bdf >= ivrs_bdf_entries )
  44.457 +    {
  44.458 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.459 +                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  44.460 +        return 0;
  44.461 +    }
  44.462 +
  44.463 +    alias_id = ivhd_device->alias.dev_id;
  44.464 +    if ( alias_id >= ivrs_bdf_entries )
  44.465 +    {
  44.466 +       dprintk(XENLOG_ERR, "IVHD Error: "
  44.467 +               "Invalid Alias Dev_Id 0x%x\n", alias_id);
  44.468 +       return 0;
  44.469 +    }
  44.470 +
  44.471 +    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
  44.472 +
  44.473 +    /* override requestor_id and flags for device */
  44.474 +    ivrs_mappings[bdf].dte_requestor_id = alias_id;
  44.475 +    ivrs_mappings[bdf].dte_sys_mgt_enable =
  44.476 +            get_field_from_byte(ivhd_device->header.flags,
  44.477 +                                AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.478 +                                AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.479 +    ivrs_mappings[alias_id].dte_sys_mgt_enable =
  44.480 +            ivrs_mappings[bdf].dte_sys_mgt_enable;
  44.481 +
  44.482 +    return dev_length;
  44.483 +}
  44.484 +
  44.485 +static u16 __init parse_ivhd_device_alias_range(
  44.486 +           union acpi_ivhd_device *ivhd_device,
  44.487 +           u16 header_length, u16 block_length)
  44.488 +{
  44.489 +
  44.490 +    u16 dev_length, first_bdf, last_bdf, alias_id, bdf;
  44.491 +    u8 sys_mgt;
  44.492 +
  44.493 +    dev_length = sizeof(struct acpi_ivhd_device_alias_range);
  44.494 +    if ( header_length < (block_length + dev_length) )
  44.495 +    {
  44.496 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.497 +                "Invalid Device_Entry Length!\n");
  44.498 +        return 0;
  44.499 +    }
  44.500 +
  44.501 +    if ( ivhd_device->alias_range.trailer.type !=
  44.502 +       AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
  44.503 +    {
  44.504 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.505 +                "Invalid Range: End_Type 0x%x\n",
  44.506 +                ivhd_device->alias_range.trailer.type);
  44.507 +        return 0;
  44.508 +    }
  44.509 +
  44.510 +    first_bdf = ivhd_device->header.dev_id;
  44.511 +    if ( first_bdf >= ivrs_bdf_entries )
  44.512 +    {
  44.513 +        dprintk(XENLOG_ERR,"IVHD Error: "
  44.514 +                "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  44.515 +        return 0;
  44.516 +    }
  44.517 +
  44.518 +    last_bdf = ivhd_device->alias_range.trailer.dev_id;
  44.519 +    if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
  44.520 +    {
  44.521 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.522 +                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  44.523 +        return 0;
  44.524 +    }
  44.525 +
  44.526 +    alias_id = ivhd_device->alias_range.alias.dev_id;
  44.527 +    if ( alias_id >= ivrs_bdf_entries )
  44.528 +    {
  44.529 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.530 +                "Invalid Alias Dev_Id 0x%x\n", alias_id);
  44.531 +        return 0;
  44.532 +    }
  44.533 +
  44.534 +    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  44.535 +            first_bdf, last_bdf);
  44.536 +    dprintk(XENLOG_INFO, " Dev_Id Alias: 0x%x\n", alias_id);
  44.537 +
  44.538 +    /* override requestor_id and flags for range of devices */
  44.539 +    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
  44.540 +                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.541 +                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.542 +    for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
  44.543 +    {
  44.544 +        ivrs_mappings[bdf].dte_requestor_id = alias_id;
  44.545 +        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
  44.546 +    }
  44.547 +    ivrs_mappings[alias_id].dte_sys_mgt_enable = sys_mgt;
  44.548 +
  44.549 +    return dev_length;
  44.550 +}
  44.551 +
  44.552 +static u16 __init parse_ivhd_device_extended(
  44.553 +           union acpi_ivhd_device *ivhd_device,
  44.554 +           u16 header_length, u16 block_length)
  44.555 +{
  44.556 +    u16 dev_length, bdf;
  44.557 +
  44.558 +    dev_length = sizeof(struct acpi_ivhd_device_extended);
  44.559 +    if ( header_length < (block_length + dev_length) )
  44.560 +    {
  44.561 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.562 +                "Invalid Device_Entry Length!\n");
  44.563 +        return 0;
  44.564 +    }
  44.565 +
  44.566 +    bdf = ivhd_device->header.dev_id;
  44.567 +    if ( bdf >= ivrs_bdf_entries )
  44.568 +    {
  44.569 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.570 +                "Invalid Device_Entry Dev_Id 0x%x\n", bdf);
  44.571 +        return 0;
  44.572 +    }
  44.573 +
  44.574 +    /* override flags for device */
  44.575 +    ivrs_mappings[bdf].dte_sys_mgt_enable =
  44.576 +        get_field_from_byte(ivhd_device->header.flags,
  44.577 +                            AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.578 +                            AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.579 +
  44.580 +    return dev_length;
  44.581 +}
  44.582 +
  44.583 +static u16 __init parse_ivhd_device_extended_range(
  44.584 +           union acpi_ivhd_device *ivhd_device,
  44.585 +           u16 header_length, u16 block_length)
  44.586 +{
  44.587 +    u16 dev_length, first_bdf, last_bdf, bdf;
  44.588 +    u8 sys_mgt;
  44.589 +
  44.590 +    dev_length = sizeof(struct acpi_ivhd_device_extended_range);
  44.591 +    if ( header_length < (block_length + dev_length) )
  44.592 +    {
  44.593 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.594 +                "Invalid Device_Entry Length!\n");
  44.595 +        return 0;
  44.596 +    }
  44.597 +
  44.598 +    if ( ivhd_device->extended_range.trailer.type !=
  44.599 +        AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END )
  44.600 +    {
  44.601 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.602 +                "Invalid Range: End_Type 0x%x\n",
  44.603 +                ivhd_device->extended_range.trailer.type);
  44.604 +        return 0;
  44.605 +    }
  44.606 +
  44.607 +    first_bdf = ivhd_device->header.dev_id;
  44.608 +    if ( first_bdf >= ivrs_bdf_entries )
  44.609 +    {
  44.610 +       dprintk(XENLOG_ERR, "IVHD Error: "
  44.611 +           "Invalid Range: First Dev_Id 0x%x\n", first_bdf);
  44.612 +       return 0;
  44.613 +    }
  44.614 +
  44.615 +    last_bdf = ivhd_device->extended_range.trailer.dev_id;
  44.616 +    if ( last_bdf >= ivrs_bdf_entries || last_bdf <= first_bdf )
  44.617 +    {
  44.618 +        dprintk(XENLOG_ERR, "IVHD Error: "
  44.619 +                "Invalid Range: Last Dev_Id 0x%x\n", last_bdf);
  44.620 +        return 0;
  44.621 +    }
  44.622 +
  44.623 +    dprintk(XENLOG_INFO, " Dev_Id Range: 0x%x -> 0x%x\n",
  44.624 +            first_bdf, last_bdf);
  44.625 +
  44.626 +    /* override flags for range of devices */
  44.627 +    sys_mgt = get_field_from_byte(ivhd_device->header.flags,
  44.628 +                                  AMD_IOMMU_ACPI_SYS_MGT_MASK,
  44.629 +                                  AMD_IOMMU_ACPI_SYS_MGT_SHIFT);
  44.630 +    for ( bdf = first_bdf; bdf <= last_bdf; ++bdf )
  44.631 +        ivrs_mappings[bdf].dte_sys_mgt_enable = sys_mgt;
  44.632 +
  44.633 +    return dev_length;
  44.634 +}
  44.635 +
  44.636 +static int __init parse_ivhd_block(struct acpi_ivhd_block_header *ivhd_block)
  44.637 +{
  44.638 +    union acpi_ivhd_device *ivhd_device;
  44.639 +    u16 block_length, dev_length;
  44.640 +    struct amd_iommu *iommu;
  44.641 +
  44.642 +    if ( ivhd_block->header.length <
  44.643 +        sizeof(struct acpi_ivhd_block_header) )
  44.644 +    {
  44.645 +        dprintk(XENLOG_ERR, "IVHD Error: Invalid Block Length!\n");
  44.646 +        return -ENODEV;
  44.647 +    }
  44.648 +
  44.649 +    iommu = find_iommu_from_bdf_cap(ivhd_block->header.dev_id,
  44.650 +            ivhd_block->cap_offset);
  44.651 +    if ( !iommu )
  44.652 +    {
  44.653 +        dprintk(XENLOG_ERR,
  44.654 +                "IVHD Error: No IOMMU for Dev_Id 0x%x  Cap 0x%x\n",
  44.655 +                ivhd_block->header.dev_id, ivhd_block->cap_offset);
  44.656 +       return -ENODEV;
  44.657 +    }
  44.658 +
  44.659 +    dprintk(XENLOG_INFO, "IVHD Block:\n");
  44.660 +    dprintk(XENLOG_INFO, " Cap_Offset 0x%x\n",
  44.661 +            ivhd_block->cap_offset);
  44.662 +    dprintk(XENLOG_INFO, " MMIO_BAR_Phys 0x%lx\n",
  44.663 +            (unsigned long)ivhd_block->mmio_base);
  44.664 +    dprintk(XENLOG_INFO, " PCI_Segment 0x%x\n",
  44.665 +            ivhd_block->pci_segment);
  44.666 +    dprintk(XENLOG_INFO, " IOMMU_Info 0x%x\n",
  44.667 +            ivhd_block->iommu_info);
  44.668 +
  44.669 +    /* override IOMMU support flags */
  44.670 +    iommu->coherent = get_field_from_byte(ivhd_block->header.flags,
  44.671 +                                          AMD_IOMMU_ACPI_COHERENT_MASK,
  44.672 +                                          AMD_IOMMU_ACPI_COHERENT_SHIFT);
  44.673 +    iommu->iotlb_support = get_field_from_byte(ivhd_block->header.flags,
  44.674 +                                          AMD_IOMMU_ACPI_IOTLB_SUP_MASK,
  44.675 +                                          AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT);
  44.676 +    iommu->isochronous = get_field_from_byte(ivhd_block->header.flags,
  44.677 +                                          AMD_IOMMU_ACPI_ISOC_MASK,
  44.678 +                                          AMD_IOMMU_ACPI_ISOC_SHIFT);
  44.679 +    iommu->res_pass_pw = get_field_from_byte(ivhd_block->header.flags,
  44.680 +                                          AMD_IOMMU_ACPI_RES_PASS_PW_MASK,
  44.681 +                                          AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT);
  44.682 +    iommu->pass_pw = get_field_from_byte(ivhd_block->header.flags,
  44.683 +                                          AMD_IOMMU_ACPI_PASS_PW_MASK,
  44.684 +                                          AMD_IOMMU_ACPI_PASS_PW_SHIFT);
  44.685 +    iommu->ht_tunnel_enable = get_field_from_byte(
  44.686 +                                          ivhd_block->header.flags,
  44.687 +                                          AMD_IOMMU_ACPI_HT_TUN_ENB_MASK,
  44.688 +                                          AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT);
  44.689 +
  44.690 +    /* parse Device Entries */
  44.691 +    block_length = sizeof(struct acpi_ivhd_block_header);
  44.692 +    while( ivhd_block->header.length >=
  44.693 +       (block_length + sizeof(struct acpi_ivhd_device_header)) )
  44.694 +    {
  44.695 +        ivhd_device = (union acpi_ivhd_device *)
  44.696 +                ((u8 *)ivhd_block + block_length);
  44.697 +
  44.698 +        dprintk(XENLOG_INFO, "IVHD Device Entry:\n");
  44.699 +        dprintk(XENLOG_INFO, " Type 0x%x\n",
  44.700 +                ivhd_device->header.type);
  44.701 +        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n",
  44.702 +                ivhd_device->header.dev_id);
  44.703 +        dprintk(XENLOG_INFO, " Flags 0x%x\n",
  44.704 +                ivhd_device->header.flags);
  44.705 +
  44.706 +        switch( ivhd_device->header.type )
  44.707 +        {
  44.708 +        case AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD:
  44.709 +            dev_length = parse_ivhd_device_padding(
  44.710 +                sizeof(u32),
  44.711 +                ivhd_block->header.length, block_length);
  44.712 +            break;
  44.713 +        case AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD:
  44.714 +            dev_length = parse_ivhd_device_padding(
  44.715 +                sizeof(u64),
  44.716 +                ivhd_block->header.length, block_length);
  44.717 +            break;
  44.718 +        case AMD_IOMMU_ACPI_IVHD_DEV_SELECT:
  44.719 +            dev_length = parse_ivhd_device_select(ivhd_device);
  44.720 +            break;
  44.721 +        case AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START:
  44.722 +            dev_length = parse_ivhd_device_range(ivhd_device,
  44.723 +                ivhd_block->header.length, block_length);
  44.724 +            break;
  44.725 +        case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT:
  44.726 +            dev_length = parse_ivhd_device_alias(
  44.727 +                ivhd_device,
  44.728 +                ivhd_block->header.length, block_length);
  44.729 +            break;
  44.730 +        case AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE:
  44.731 +            dev_length = parse_ivhd_device_alias_range(
  44.732 +                ivhd_device,
  44.733 +                ivhd_block->header.length, block_length);
  44.734 +            break;
  44.735 +        case AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT:
  44.736 +            dev_length = parse_ivhd_device_extended(
  44.737 +                ivhd_device,
  44.738 +                ivhd_block->header.length, block_length);
  44.739 +            break;
  44.740 +        case AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE:
  44.741 +            dev_length = parse_ivhd_device_extended_range(
  44.742 +                ivhd_device,
  44.743 +                ivhd_block->header.length, block_length);
  44.744 +            break;
  44.745 +        default:
  44.746 +            dprintk(XENLOG_ERR, "IVHD Error: "
  44.747 +                "Invalid Device Type!\n");
  44.748 +            dev_length = 0;
  44.749 +            break;
  44.750 +        }
  44.751 +
  44.752 +        block_length += dev_length;
  44.753 +        if ( !dev_length )
  44.754 +            return -ENODEV;
  44.755 +    }
  44.756 +
  44.757 +    return 0;
  44.758 +}
  44.759 +
  44.760 +static int __init parse_ivrs_block(struct acpi_ivrs_block_header *ivrs_block)
  44.761 +{
  44.762 +    struct acpi_ivhd_block_header *ivhd_block;
  44.763 +    struct acpi_ivmd_block_header *ivmd_block;
  44.764 +
  44.765 +    switch(ivrs_block->type)
  44.766 +    {
  44.767 +    case AMD_IOMMU_ACPI_IVHD_TYPE:
  44.768 +        ivhd_block = (struct acpi_ivhd_block_header *)ivrs_block;
  44.769 +        return parse_ivhd_block(ivhd_block);
  44.770 +
  44.771 +    case AMD_IOMMU_ACPI_IVMD_ALL_TYPE:
  44.772 +    case AMD_IOMMU_ACPI_IVMD_ONE_TYPE:
  44.773 +    case AMD_IOMMU_ACPI_IVMD_RANGE_TYPE:
  44.774 +    case AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE:
  44.775 +        ivmd_block = (struct acpi_ivmd_block_header *)ivrs_block;
  44.776 +        return parse_ivmd_block(ivmd_block);
  44.777 +
  44.778 +    default:
  44.779 +        dprintk(XENLOG_ERR, "IVRS Error: Invalid Block Type!\n");
  44.780 +        return -ENODEV;
  44.781 +    }
  44.782 +
  44.783 +    return 0;
  44.784 +}
  44.785 +
  44.786 +void __init dump_acpi_table_header(struct acpi_table_header *table)
  44.787 +{
  44.788 +    int i;
  44.789 +
  44.790 +    printk(XENLOG_INFO "AMD IOMMU: ACPI Table:\n");
  44.791 +    printk(XENLOG_INFO " Signature ");
  44.792 +    for ( i = 0; i < ACPI_NAME_SIZE; ++i )
  44.793 +        printk("%c", table->signature[i]);
  44.794 +    printk("\n");
  44.795 +
  44.796 +    printk(" Length 0x%x\n", table->length);
  44.797 +    printk(" Revision 0x%x\n", table->revision);
  44.798 +    printk(" CheckSum 0x%x\n", table->checksum);
  44.799 +
  44.800 +    printk(" OEM_Id ");
  44.801 +    for ( i = 0; i < ACPI_OEM_ID_SIZE; ++i )
  44.802 +        printk("%c", table->oem_id[i]);
  44.803 +    printk("\n");
  44.804 +
  44.805 +    printk(" OEM_Table_Id ");
  44.806 +    for ( i = 0; i < ACPI_OEM_TABLE_ID_SIZE; ++i )
  44.807 +        printk("%c", table->oem_table_id[i]);
  44.808 +    printk("\n");
  44.809 +
  44.810 +    printk(" OEM_Revision 0x%x\n", table->oem_revision);
  44.811 +
  44.812 +    printk(" Creator_Id ");
  44.813 +    for ( i = 0; i < ACPI_NAME_SIZE; ++i )
  44.814 +        printk("%c", table->asl_compiler_id[i]);
  44.815 +    printk("\n");
  44.816 +
  44.817 +    printk(" Creator_Revision 0x%x\n",
  44.818 +       table->asl_compiler_revision);
  44.819 +}
  44.820 +
  44.821 +int __init parse_ivrs_table(unsigned long phys_addr,
  44.822 +                                  unsigned long size)
  44.823 +{
  44.824 +    struct acpi_ivrs_block_header *ivrs_block;
  44.825 +    unsigned long length, i;
  44.826 +    u8 checksum, *raw_table;
  44.827 +    int error = 0;
  44.828 +    struct acpi_table_header  *table =
  44.829 +        (struct acpi_table_header *) __acpi_map_table(phys_addr, size);
  44.830 +
  44.831 +    BUG_ON(!table);
  44.832 +
  44.833 +#if 0
  44.834 +    dump_acpi_table_header(table);
  44.835 +#endif
  44.836 +
  44.837 +    /* validate checksum: sum of entire table == 0 */
  44.838 +    checksum = 0;
  44.839 +    raw_table = (u8 *)table;
  44.840 +    for ( i = 0; i < table->length; ++i )
  44.841 +        checksum += raw_table[i];
  44.842 +    if ( checksum )
  44.843 +    {
  44.844 +        dprintk(XENLOG_ERR, "IVRS Error: "
  44.845 +                "Invalid Checksum 0x%x\n", checksum);
  44.846 +        return -ENODEV;
  44.847 +    }
  44.848 +
  44.849 +    /* parse IVRS blocks */
  44.850 +    length = sizeof(struct acpi_ivrs_table_header);
  44.851 +    while( error == 0 && table->length >
  44.852 +       (length + sizeof(struct acpi_ivrs_block_header)) )
  44.853 +    {
  44.854 +        ivrs_block = (struct acpi_ivrs_block_header *)
  44.855 +                ((u8 *)table + length);
  44.856 +
  44.857 +        dprintk(XENLOG_INFO, "IVRS Block:\n");
  44.858 +        dprintk(XENLOG_INFO, " Type 0x%x\n", ivrs_block->type);
  44.859 +        dprintk(XENLOG_INFO, " Flags 0x%x\n", ivrs_block->flags);
  44.860 +        dprintk(XENLOG_INFO, " Length 0x%x\n", ivrs_block->length);
  44.861 +        dprintk(XENLOG_INFO, " Dev_Id 0x%x\n", ivrs_block->dev_id);
  44.862 +
  44.863 +        if (table->length >= (length + ivrs_block->length))
  44.864 +           error = parse_ivrs_block(ivrs_block);
  44.865 +        else
  44.866 +        {
  44.867 +           dprintk(XENLOG_ERR, "IVRS Error: "
  44.868 +               "Table Length Exceeded: 0x%x -> 0x%lx\n",
  44.869 +               table->length,
  44.870 +               (length + ivrs_block->length));
  44.871 +           return -ENODEV;
  44.872 +        }
  44.873 +        length += ivrs_block->length;
  44.874 +    }
  44.875 +
  44.876 +    return error;
  44.877 +}
    45.1 --- a/xen/drivers/passthrough/amd/iommu_detect.c	Fri Feb 29 09:18:01 2008 -0700
    45.2 +++ b/xen/drivers/passthrough/amd/iommu_detect.c	Fri Feb 29 09:19:58 2008 -0700
    45.3 @@ -86,31 +86,25 @@ int __init get_iommu_last_downstream_bus
    45.4  int __init get_iommu_capabilities(u8 bus, u8 dev, u8 func, u8 cap_ptr,
    45.5              struct amd_iommu *iommu)
    45.6  {
    45.7 -    u32 cap_header, cap_range;
    45.8 +    u32 cap_header, cap_range, misc_info;
    45.9      u64 mmio_bar;
   45.10  
   45.11 -#if HACK_BIOS_SETTINGS
   45.12 -    /* remove it when BIOS available */
   45.13 -    write_pci_config(bus, dev, func,
   45.14 -        cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET, 0x00000000);
   45.15 -    write_pci_config(bus, dev, func,
   45.16 -        cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET, 0x40000001);
   45.17 -    /* remove it when BIOS available */
   45.18 -#endif
   45.19 +    mmio_bar = (u64)read_pci_config(bus, dev, func,
   45.20 +            cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
   45.21 +    mmio_bar |= read_pci_config(bus, dev, func,
   45.22 +            cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET); 
   45.23 +    iommu->mmio_base_phys = mmio_bar & (u64)~0x3FFF;
   45.24  
   45.25 -    mmio_bar = (u64)read_pci_config(bus, dev, func,
   45.26 -             cap_ptr + PCI_CAP_MMIO_BAR_HIGH_OFFSET) << 32;
   45.27 -    mmio_bar |= read_pci_config(bus, dev, func,
   45.28 -            cap_ptr + PCI_CAP_MMIO_BAR_LOW_OFFSET) &
   45.29 -            PCI_CAP_MMIO_BAR_LOW_MASK;
   45.30 -    iommu->mmio_base_phys = (unsigned long)mmio_bar;
   45.31 -
   45.32 -    if ( (mmio_bar == 0) || ( (mmio_bar & 0x3FFF) != 0 ) ) {
   45.33 +    if ( (mmio_bar & 0x1) == 0 || iommu->mmio_base_phys == 0 )
   45.34 +    {
   45.35          dprintk(XENLOG_ERR ,
   45.36                  "AMD IOMMU: Invalid MMIO_BAR = 0x%"PRIx64"\n", mmio_bar);
   45.37          return -ENODEV;
   45.38      }
   45.39  
   45.40 +    iommu->bdf = (bus << 8) | PCI_DEVFN(dev, func);
   45.41 +    iommu->cap_offset = cap_ptr;
   45.42 +
   45.43      cap_header = read_pci_config(bus, dev, func, cap_ptr);
   45.44      iommu->revision = get_field_from_reg_u32(cap_header,
   45.45                    PCI_CAP_REV_MASK, PCI_CAP_REV_SHIFT);
   45.46 @@ -119,12 +113,15 @@ int __init get_iommu_capabilities(u8 bus
   45.47      iommu->ht_tunnel_support = get_field_from_reg_u32(cap_header,
   45.48                      PCI_CAP_HT_TUNNEL_MASK,
   45.49                      PCI_CAP_HT_TUNNEL_SHIFT);
   45.50 -    iommu->not_present_cached = get_field_from_reg_u32(cap_header,
   45.51 +    iommu->pte_not_present_cached = get_field_from_reg_u32(cap_header,
   45.52                      PCI_CAP_NP_CACHE_MASK,
   45.53                      PCI_CAP_NP_CACHE_SHIFT);
   45.54  
   45.55      cap_range = read_pci_config(bus, dev, func,
   45.56              cap_ptr + PCI_CAP_RANGE_OFFSET);
   45.57 +    iommu->unit_id = get_field_from_reg_u32(cap_range,
   45.58 +                PCI_CAP_UNIT_ID_MASK,
   45.59 +                PCI_CAP_UNIT_ID_SHIFT);
   45.60      iommu->root_bus = get_field_from_reg_u32(cap_range,
   45.61                  PCI_CAP_BUS_NUMBER_MASK,
   45.62                  PCI_CAP_BUS_NUMBER_SHIFT);
   45.63 @@ -135,6 +132,11 @@ int __init get_iommu_capabilities(u8 bus
   45.64                  PCI_CAP_LAST_DEVICE_MASK,
   45.65                  PCI_CAP_LAST_DEVICE_SHIFT);
   45.66  
   45.67 +    misc_info = read_pci_config(bus, dev, func,
   45.68 +            cap_ptr + PCI_MISC_INFO_OFFSET);
   45.69 +    iommu->msi_number = get_field_from_reg_u32(misc_info,
   45.70 +                PCI_CAP_MSI_NUMBER_MASK,
   45.71 +                PCI_CAP_MSI_NUMBER_SHIFT);
   45.72      return 0;
   45.73  }
   45.74  
    46.1 --- a/xen/drivers/passthrough/amd/iommu_init.c	Fri Feb 29 09:18:01 2008 -0700
    46.2 +++ b/xen/drivers/passthrough/amd/iommu_init.c	Fri Feb 29 09:19:58 2008 -0700
    46.3 @@ -137,8 +137,49 @@ static void __init set_iommu_command_buf
    46.4      writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
    46.5  }
    46.6  
    46.7 +static void __init register_iommu_exclusion_range(struct amd_iommu *iommu)
    46.8 +{
    46.9 +    u64 addr_lo, addr_hi;
   46.10 +    u32 entry;
   46.11 +
   46.12 +    addr_lo = iommu->exclusion_limit & DMA_32BIT_MASK;
   46.13 +    addr_hi = iommu->exclusion_limit >> 32;
   46.14 +
   46.15 +    set_field_in_reg_u32((u32)addr_hi, 0,
   46.16 +        IOMMU_EXCLUSION_LIMIT_HIGH_MASK,
   46.17 +        IOMMU_EXCLUSION_LIMIT_HIGH_SHIFT, &entry);
   46.18 +    writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_HIGH_OFFSET);
   46.19 +
   46.20 +    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
   46.21 +        IOMMU_EXCLUSION_LIMIT_LOW_MASK,
   46.22 +        IOMMU_EXCLUSION_LIMIT_LOW_SHIFT, &entry);
   46.23 +    writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_LIMIT_LOW_OFFSET);
   46.24 +
   46.25 +    addr_lo = iommu->exclusion_base & DMA_32BIT_MASK;
   46.26 +    addr_hi = iommu->exclusion_base >> 32;
   46.27 +
   46.28 +    set_field_in_reg_u32((u32)addr_hi, 0,
   46.29 +        IOMMU_EXCLUSION_BASE_HIGH_MASK,
   46.30 +        IOMMU_EXCLUSION_BASE_HIGH_SHIFT, &entry);
   46.31 +    writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_HIGH_OFFSET);
   46.32 +
   46.33 +    set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
   46.34 +        IOMMU_EXCLUSION_BASE_LOW_MASK,
   46.35 +        IOMMU_EXCLUSION_BASE_LOW_SHIFT, &entry);
   46.36 +
   46.37 +    set_field_in_reg_u32(iommu->exclusion_allow_all, entry,
   46.38 +        IOMMU_EXCLUSION_ALLOW_ALL_MASK,
   46.39 +        IOMMU_EXCLUSION_ALLOW_ALL_SHIFT, &entry);
   46.40 +
   46.41 +    set_field_in_reg_u32(iommu->exclusion_enable, entry,
   46.42 +        IOMMU_EXCLUSION_RANGE_ENABLE_MASK,
   46.43 +        IOMMU_EXCLUSION_RANGE_ENABLE_SHIFT, &entry);
   46.44 +    writel(entry, iommu->mmio_base+IOMMU_EXCLUSION_BASE_LOW_OFFSET);
   46.45 +}
   46.46 +
   46.47  void __init enable_iommu(struct amd_iommu *iommu)
   46.48  {
   46.49 +    register_iommu_exclusion_range(iommu);
   46.50      set_iommu_command_buffer_control(iommu, IOMMU_CONTROL_ENABLED);
   46.51      set_iommu_translation_control(iommu, IOMMU_CONTROL_ENABLED);
   46.52      printk("AMD IOMMU %d: Enabled\n", nr_amd_iommus);
    47.1 --- a/xen/drivers/passthrough/amd/iommu_map.c	Fri Feb 29 09:18:01 2008 -0700
    47.2 +++ b/xen/drivers/passthrough/amd/iommu_map.c	Fri Feb 29 09:19:58 2008 -0700
    47.3 @@ -234,16 +234,19 @@ static void amd_iommu_set_page_directory
    47.4  }
    47.5  
    47.6  void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u16 domain_id,
    47.7 -                                   u8 paging_mode)
    47.8 +           u8 sys_mgt, u8 dev_ex, u8 paging_mode)
    47.9  {
   47.10      u64 addr_hi, addr_lo;
   47.11      u32 entry;
   47.12  
   47.13 -    dte[6] = dte[5] = dte[4] = 0;
   47.14 +    dte[7] = dte[6] = dte[5] = dte[4] = 0;
   47.15  
   47.16 -    set_field_in_reg_u32(IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED, 0,
   47.17 +    set_field_in_reg_u32(sys_mgt, 0,
   47.18                           IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_MASK,
   47.19                           IOMMU_DEV_TABLE_SYS_MGT_MSG_ENABLE_SHIFT, &entry);
   47.20 +    set_field_in_reg_u32(dev_ex, entry,
   47.21 +                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_MASK,
   47.22 +                         IOMMU_DEV_TABLE_ALLOW_EXCLUSION_SHIFT, &entry);
   47.23      dte[3] = entry;
   47.24  
   47.25      set_field_in_reg_u32(domain_id, 0,
   47.26 @@ -448,3 +451,34 @@ int amd_iommu_unmap_page(struct domain *
   47.27  
   47.28      return 0;
   47.29  }
   47.30 +
   47.31 +int amd_iommu_reserve_domain_unity_map(
   47.32 +           struct domain *domain,
   47.33 +           unsigned long phys_addr,
   47.34 +           unsigned long size, int iw, int ir)
   47.35 +{
   47.36 +    unsigned long flags, npages, i;
   47.37 +    void *pte;
   47.38 +    struct hvm_iommu *hd = domain_hvm_iommu(domain);
   47.39 +
   47.40 +    npages = region_to_pages(phys_addr, size);
   47.41 +
   47.42 +    spin_lock_irqsave(&hd->mapping_lock, flags);
   47.43 +    for ( i = 0; i < npages; ++i )
   47.44 +    {
   47.45 +        pte = get_pte_from_page_tables(hd->root_table,
   47.46 +           hd->paging_mode, phys_addr>>PAGE_SHIFT);
   47.47 +        if ( pte == 0 )
   47.48 +        {
   47.49 +            dprintk(XENLOG_ERR,
   47.50 +                    "AMD IOMMU: Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
   47.51 +            spin_unlock_irqrestore(&hd->mapping_lock, flags);
   47.52 +            return -EFAULT;
   47.53 +        }
   47.54 +        set_page_table_entry_present((u32 *)pte,
   47.55 +           phys_addr, iw, ir);
   47.56 +        phys_addr += PAGE_SIZE;
   47.57 +    }
   47.58 +    spin_unlock_irqrestore(&hd->mapping_lock, flags);
   47.59 +    return 0;
   47.60 +}
    48.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Feb 29 09:18:01 2008 -0700
    48.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c	Fri Feb 29 09:19:58 2008 -0700
    48.3 @@ -20,6 +20,7 @@
    48.4  
    48.5  #include <asm/amd-iommu.h>
    48.6  #include <asm/hvm/svm/amd-iommu-proto.h>
    48.7 +#include <asm/hvm/svm/amd-iommu-acpi.h>
    48.8  #include <xen/sched.h>
    48.9  #include <asm/mm.h>
   48.10  #include "../pci-direct.h"
   48.11 @@ -30,6 +31,9 @@ long amd_iommu_poll_comp_wait = COMPLETI
   48.12  static long amd_iommu_cmd_buffer_entries = IOMMU_CMD_BUFFER_DEFAULT_ENTRIES;
   48.13  int nr_amd_iommus = 0;
   48.14  
   48.15 +unsigned short ivrs_bdf_entries = 0;
   48.16 +struct ivrs_mappings *ivrs_mappings = NULL;
   48.17 +
   48.18  /* will set if amd-iommu HW is found */
   48.19  int amd_iommu_enabled = 0;
   48.20  
   48.21 @@ -82,13 +86,12 @@ static void __init detect_cleanup(void)
   48.22          deallocate_iommu_resources(iommu);
   48.23          xfree(iommu);
   48.24      }
   48.25 -}
   48.26  
   48.27 -static int requestor_id_from_bdf(int bdf)
   48.28 -{
   48.29 -    /* HACK - HACK */
   48.30 -    /* account for possible 'aliasing' by parent device */
   48.31 -    return bdf;
   48.32 +    if ( ivrs_mappings )
   48.33 +    {
   48.34 +        xfree(ivrs_mappings);
   48.35 +        ivrs_mappings = NULL;
   48.36 +    }
   48.37  }
   48.38  
   48.39  static int __init allocate_iommu_table_struct(struct table_struct *table,
   48.40 @@ -179,21 +182,52 @@ static int __init amd_iommu_init(void)
   48.41  {
   48.42      struct amd_iommu *iommu;
   48.43      unsigned long flags;
   48.44 +    u16 bdf;
   48.45  
   48.46      for_each_amd_iommu ( iommu )
   48.47      {
   48.48          spin_lock_irqsave(&iommu->lock, flags);
   48.49  
   48.50 +        /* assign default IOMMU values */
   48.51 +        iommu->coherent = IOMMU_CONTROL_ENABLED;
   48.52 +        iommu->isochronous = IOMMU_CONTROL_ENABLED;
   48.53 +        iommu->res_pass_pw = IOMMU_CONTROL_ENABLED;
   48.54 +        iommu->pass_pw = IOMMU_CONTROL_ENABLED;
   48.55 +        iommu->ht_tunnel_enable = iommu->ht_tunnel_support ?
   48.56 +            IOMMU_CONTROL_ENABLED : IOMMU_CONTROL_DISABLED;
   48.57 +        iommu->exclusion_enable = IOMMU_CONTROL_DISABLED;
   48.58 +        iommu->exclusion_allow_all = IOMMU_CONTROL_DISABLED;
   48.59 +
   48.60          /* register IOMMU data strucures in MMIO space */
   48.61          if ( map_iommu_mmio_region(iommu) != 0 )
   48.62              goto error_out;
   48.63          register_iommu_dev_table_in_mmio_space(iommu);
   48.64          register_iommu_cmd_buffer_in_mmio_space(iommu);
   48.65  
   48.66 +        spin_unlock_irqrestore(&iommu->lock, flags);
   48.67 +    }
   48.68 +
   48.69 +    /* assign default values for device entries */
   48.70 +    for ( bdf = 0; bdf < ivrs_bdf_entries; ++bdf )
   48.71 +    {
   48.72 +        ivrs_mappings[bdf].dte_requestor_id = bdf;
   48.73 +        ivrs_mappings[bdf].dte_sys_mgt_enable =
   48.74 +            IOMMU_DEV_TABLE_SYS_MGT_MSG_FORWARDED;
   48.75 +        ivrs_mappings[bdf].dte_allow_exclusion =
   48.76 +            IOMMU_CONTROL_DISABLED;
   48.77 +        ivrs_mappings[bdf].unity_map_enable =
   48.78 +            IOMMU_CONTROL_DISABLED;
   48.79 +    }
   48.80 +
   48.81 +    if ( acpi_table_parse(ACPI_IVRS, parse_ivrs_table) != 0 )
   48.82 +        dprintk(XENLOG_INFO, "AMD IOMMU: Did not find IVRS table!\n");
   48.83 +
   48.84 +    for_each_amd_iommu ( iommu )
   48.85 +    {
   48.86 +        spin_lock_irqsave(&iommu->lock, flags);
   48.87          /* enable IOMMU translation services */
   48.88          enable_iommu(iommu);
   48.89          nr_amd_iommus++;
   48.90 -
   48.91          spin_unlock_irqrestore(&iommu->lock, flags);
   48.92      }
   48.93  
   48.94 @@ -229,31 +263,38 @@ struct amd_iommu *find_iommu_for_device(
   48.95  }
   48.96  
   48.97  void amd_iommu_setup_domain_device(
   48.98 -    struct domain *domain, struct amd_iommu *iommu, int requestor_id)
   48.99 +    struct domain *domain, struct amd_iommu *iommu, int bdf)
  48.100  {
  48.101      void *dte;
  48.102      u64 root_ptr;
  48.103      unsigned long flags;
  48.104 +    int req_id;
  48.105 +    u8 sys_mgt, dev_ex;
  48.106      struct hvm_iommu *hd = domain_hvm_iommu(domain);
  48.107  
  48.108 -    BUG_ON( !hd->root_table||!hd->paging_mode );
  48.109 +    BUG_ON( !hd->root_table || !hd->paging_mode );
  48.110  
  48.111      root_ptr = (u64)virt_to_maddr(hd->root_table);
  48.112 +    /* get device-table entry */
  48.113 +    req_id = ivrs_mappings[bdf].dte_requestor_id;
  48.114      dte = iommu->dev_table.buffer +
  48.115 -        (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
  48.116 +        (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
  48.117  
  48.118      if ( !amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
  48.119      {
  48.120          spin_lock_irqsave(&iommu->lock, flags); 
  48.121  
  48.122 -        amd_iommu_set_dev_table_entry(
  48.123 -            (u32 *)dte,
  48.124 -            root_ptr, hd->domain_id, hd->paging_mode);
  48.125 -        invalidate_dev_table_entry(iommu, requestor_id);
  48.126 +        /* bind DTE to domain page-tables */
  48.127 +        sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
  48.128 +        dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
  48.129 +        amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr,
  48.130 +            req_id, sys_mgt, dev_ex, hd->paging_mode);
  48.131 +
  48.132 +        invalidate_dev_table_entry(iommu, req_id);
  48.133          flush_command_buffer(iommu);
  48.134          dprintk(XENLOG_INFO, "AMD IOMMU: Set DTE req_id:%x, "
  48.135                  "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
  48.136 -                requestor_id, root_ptr, hd->domain_id, hd->paging_mode);
  48.137 +                req_id, root_ptr, hd->domain_id, hd->paging_mode);
  48.138  
  48.139          spin_unlock_irqrestore(&iommu->lock, flags);
  48.140      }
  48.141 @@ -266,7 +307,7 @@ void __init amd_iommu_setup_dom0_devices
  48.142      struct pci_dev *pdev;
  48.143      int bus, dev, func;
  48.144      u32 l;
  48.145 -    int req_id, bdf;
  48.146 +    int bdf;
  48.147  
  48.148      for ( bus = 0; bus < 256; bus++ )
  48.149      {
  48.150 @@ -286,11 +327,12 @@ void __init amd_iommu_setup_dom0_devices
  48.151                  list_add_tail(&pdev->list, &hd->pdev_list);
  48.152  
  48.153                  bdf = (bus << 8) | pdev->devfn;
  48.154 -                req_id = requestor_id_from_bdf(bdf);
  48.155 -                iommu = find_iommu_for_device(bus, pdev->devfn);
  48.156 +                /* supported device? */
  48.157 +                iommu = (bdf < ivrs_bdf_entries) ?
  48.158 +                    find_iommu_for_device(bus, pdev->devfn) : NULL;
  48.159  
  48.160                  if ( iommu )
  48.161 -                    amd_iommu_setup_domain_device(dom0, iommu, req_id);
  48.162 +                    amd_iommu_setup_domain_device(dom0, iommu, bdf);
  48.163              }
  48.164          }
  48.165      }
  48.166 @@ -299,6 +341,8 @@ void __init amd_iommu_setup_dom0_devices
  48.167  int amd_iommu_detect(void)
  48.168  {
  48.169      unsigned long i;
  48.170 +    int last_bus;
  48.171 +    struct amd_iommu *iommu;
  48.172  
  48.173      if ( !enable_amd_iommu )
  48.174      {
  48.175 @@ -319,6 +363,28 @@ int amd_iommu_detect(void)
  48.176          printk("AMD IOMMU: Not found!\n");
  48.177          return 0;
  48.178      }
  48.179 +    else
  48.180 +    {
  48.181 +        /* allocate 'ivrs mappings' table */
  48.182 +        /* note: the table has entries to accomodate all IOMMUs */
  48.183 +        last_bus = 0;
  48.184 +        for_each_amd_iommu (iommu)
  48.185 +           if (iommu->last_downstream_bus > last_bus)
  48.186 +               last_bus = iommu->last_downstream_bus;
  48.187 +
  48.188 +        ivrs_bdf_entries = (last_bus + 1) *
  48.189 +                IOMMU_DEV_TABLE_ENTRIES_PER_BUS;
  48.190 +        ivrs_mappings = xmalloc_array( struct ivrs_mappings, ivrs_bdf_entries);
  48.191 +
  48.192 +        if ( !ivrs_mappings )
  48.193 +        {
  48.194 +            dprintk(XENLOG_ERR, "AMD IOMMU:"
  48.195 +                        " Error allocating IVRS DevMappings table\n");
  48.196 +            goto error_out;
  48.197 +        }
  48.198 +        memset(ivrs_mappings, 0,
  48.199 +            ivrs_bdf_entries * sizeof(struct ivrs_mappings));
  48.200 +    }
  48.201  
  48.202      if ( amd_iommu_init() != 0 )
  48.203      {
  48.204 @@ -407,23 +473,25 @@ int amd_iommu_domain_init(struct domain 
  48.205  }
  48.206  
  48.207  static void amd_iommu_disable_domain_device(
  48.208 -    struct domain *domain, struct amd_iommu *iommu, u16 requestor_id)
  48.209 +    struct domain *domain, struct amd_iommu *iommu, int bdf)
  48.210  {
  48.211      void *dte;
  48.212      unsigned long flags;
  48.213 +    int req_id;
  48.214  
  48.215 +    req_id = ivrs_mappings[bdf].dte_requestor_id;
  48.216      dte = iommu->dev_table.buffer +
  48.217 -        (requestor_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
  48.218 +        (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
  48.219  
  48.220      if ( amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
  48.221      {
  48.222          spin_lock_irqsave(&iommu->lock, flags); 
  48.223          memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
  48.224 -        invalidate_dev_table_entry(iommu, requestor_id);
  48.225 +        invalidate_dev_table_entry(iommu, req_id);
  48.226          flush_command_buffer(iommu);
  48.227          dprintk(XENLOG_INFO , "AMD IOMMU: disable DTE 0x%x,"
  48.228                  " domain_id:%d, paging_mode:%d\n",
  48.229 -                requestor_id,  domain_hvm_iommu(domain)->domain_id,
  48.230 +                req_id,  domain_hvm_iommu(domain)->domain_id,
  48.231                  domain_hvm_iommu(domain)->paging_mode);
  48.232          spin_unlock_irqrestore(&iommu->lock, flags);
  48.233      }
  48.234 @@ -438,7 +506,7 @@ static int reassign_device( struct domai
  48.235      struct hvm_iommu *target_hd = domain_hvm_iommu(target);
  48.236      struct pci_dev *pdev;
  48.237      struct amd_iommu *iommu;
  48.238 -    int req_id, bdf;
  48.239 +    int bdf;
  48.240      unsigned long flags;
  48.241  
  48.242      for_each_pdev( source, pdev )
  48.243 @@ -450,12 +518,13 @@ static int reassign_device( struct domai
  48.244          pdev->devfn = devfn;
  48.245  
  48.246          bdf = (bus << 8) | devfn;
  48.247 -        req_id = requestor_id_from_bdf(bdf);
  48.248 -        iommu = find_iommu_for_device(bus, devfn);
  48.249 +        /* supported device? */
  48.250 +        iommu = (bdf < ivrs_bdf_entries) ?
  48.251 +            find_iommu_for_device(bus, pdev->devfn) : NULL;
  48.252  
  48.253          if ( iommu )
  48.254          {
  48.255 -            amd_iommu_disable_domain_device(source, iommu, req_id);
  48.256 +            amd_iommu_disable_domain_device(source, iommu, bdf);
  48.257              /* Move pci device from the source domain to target domain. */
  48.258              spin_lock_irqsave(&source_hd->iommu_list_lock, flags);
  48.259              spin_lock_irqsave(&target_hd->iommu_list_lock, flags);
  48.260 @@ -463,7 +532,7 @@ static int reassign_device( struct domai
  48.261              spin_unlock_irqrestore(&target_hd->iommu_list_lock, flags);
  48.262              spin_unlock_irqrestore(&source_hd->iommu_list_lock, flags);
  48.263  
  48.264 -            amd_iommu_setup_domain_device(target, iommu, req_id);
  48.265 +            amd_iommu_setup_domain_device(target, iommu, bdf);
  48.266              gdprintk(XENLOG_INFO ,
  48.267                       "AMD IOMMU: reassign %x:%x.%x domain %d -> domain %d\n",
  48.268                       bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
  48.269 @@ -484,6 +553,19 @@ static int reassign_device( struct domai
  48.270  
  48.271  int amd_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
  48.272  {
  48.273 +    int bdf = (bus << 8) | devfn;
  48.274 +    int req_id;
  48.275 +    req_id = ivrs_mappings[bdf].dte_requestor_id;
  48.276 +
  48.277 +    if (ivrs_mappings[req_id].unity_map_enable)
  48.278 +    {
  48.279 +        amd_iommu_reserve_domain_unity_map(d,
  48.280 +            ivrs_mappings[req_id].addr_range_start,
  48.281 +            ivrs_mappings[req_id].addr_range_length,
  48.282 +            ivrs_mappings[req_id].write_permission,
  48.283 +            ivrs_mappings[req_id].read_permission);
  48.284 +    }
  48.285 +
  48.286      pdev_flr(bus, devfn);
  48.287      return reassign_device(dom0, d, bus, devfn);
  48.288  }
    49.1 --- a/xen/include/asm-x86/amd-iommu.h	Fri Feb 29 09:18:01 2008 -0700
    49.2 +++ b/xen/include/asm-x86/amd-iommu.h	Fri Feb 29 09:19:58 2008 -0700
    49.3 @@ -43,15 +43,26 @@ struct amd_iommu {
    49.4      struct list_head list;
    49.5      spinlock_t lock; /* protect iommu */
    49.6  
    49.7 -    int iotlb_support;
    49.8 -    int ht_tunnel_support;
    49.9 -    int not_present_cached;
   49.10 +    u16 bdf;
   49.11 +    u8  cap_offset;
   49.12      u8  revision;
   49.13 +    u8  unit_id;
   49.14 +    u8  msi_number;
   49.15  
   49.16      u8  root_bus;
   49.17      u8  first_devfn;
   49.18      u8  last_devfn;
   49.19  
   49.20 +    u8 pte_not_present_cached;
   49.21 +    u8 ht_tunnel_support;
   49.22 +    u8 iotlb_support;
   49.23 +
   49.24 +    u8 isochronous;
   49.25 +    u8 coherent;
   49.26 +    u8 res_pass_pw;
   49.27 +    u8 pass_pw;
   49.28 +    u8 ht_tunnel_enable;
   49.29 +
   49.30      int last_downstream_bus;
   49.31      int downstream_bus_present[PCI_MAX_BUS_COUNT];
   49.32  
   49.33 @@ -61,10 +72,23 @@ struct amd_iommu {
   49.34      struct table_struct dev_table;
   49.35      struct table_struct cmd_buffer;
   49.36      u32 cmd_buffer_tail;
   49.37 +    struct table_struct event_log;
   49.38 +    u32 event_log_head;
   49.39  
   49.40 -    int exclusion_enabled;
   49.41 -    unsigned long exclusion_base;
   49.42 -    unsigned long exclusion_limit;
   49.43 +    int exclusion_enable;
   49.44 +    int exclusion_allow_all;
   49.45 +    uint64_t exclusion_base;
   49.46 +    uint64_t exclusion_limit;
   49.47  };
   49.48  
   49.49 +struct ivrs_mappings {
   49.50 +    u16 dte_requestor_id;
   49.51 +    u8 dte_sys_mgt_enable;
   49.52 +    u8 dte_allow_exclusion;
   49.53 +    u8 unity_map_enable;
   49.54 +    u8 write_permission;
   49.55 +    u8 read_permission;
   49.56 +    unsigned long addr_range_start;
   49.57 +    unsigned long addr_range_length;
   49.58 +};
   49.59  #endif /* _ASM_X86_64_AMD_IOMMU_H */
    50.1 --- a/xen/include/asm-x86/domain.h	Fri Feb 29 09:18:01 2008 -0700
    50.2 +++ b/xen/include/asm-x86/domain.h	Fri Feb 29 09:19:58 2008 -0700
    50.3 @@ -97,6 +97,11 @@ struct shadow_domain {
    50.4  
    50.5      /* Fast MMIO path heuristic */
    50.6      int has_fast_mmio_entries;
    50.7 +
    50.8 +    /* reflect guest table dirty status, incremented by write
    50.9 +     * emulation and remove write permission
   50.10 +     */
   50.11 +    atomic_t          gtable_dirty_version;
   50.12  };
   50.13  
   50.14  struct shadow_vcpu {
    51.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    51.2 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-acpi.h	Fri Feb 29 09:19:58 2008 -0700
    51.3 @@ -0,0 +1,176 @@
    51.4 +/*
    51.5 + * Copyright (C) 2007 Advanced Micro Devices, Inc.
    51.6 + * Author: Leo Duran <leo.duran@amd.com>
    51.7 + * Author: Wei Wang <wei.wang2@amd.com> - adapted to xen
    51.8 + *
    51.9 + * This program is free software; you can redistribute it and/or modify
   51.10 + * it under the terms of the GNU General Public License as published by
   51.11 + * the Free Software Foundation; either version 2 of the License, or
   51.12 + * (at your option) any later version.
   51.13 + *
   51.14 + * This program is distributed in the hope that it will be useful,
   51.15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   51.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   51.17 + * GNU General Public License for more details.
   51.18 + *
   51.19 + * You should have received a copy of the GNU General Public License
   51.20 + * along with this program; if not, write to the Free Software
   51.21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
   51.22 + */
   51.23 +
   51.24 +#ifndef _ASM_X86_64_AMD_IOMMU_ACPI_H
   51.25 +#define _ASM_X86_64_AMD_IOMMU_ACPI_H
   51.26 +
   51.27 +#include <xen/acpi.h>
   51.28 +
   51.29 +/* I/O Virtualization Reporting Structure */
   51.30 +#define AMD_IOMMU_ACPI_IVRS_SIG            "IVRS"
   51.31 +#define AMD_IOMMU_ACPI_IVHD_TYPE       0x10
   51.32 +#define AMD_IOMMU_ACPI_IVMD_ALL_TYPE       0x20
   51.33 +#define AMD_IOMMU_ACPI_IVMD_ONE_TYPE       0x21
   51.34 +#define AMD_IOMMU_ACPI_IVMD_RANGE_TYPE     0x22
   51.35 +#define AMD_IOMMU_ACPI_IVMD_IOMMU_TYPE     0x23
   51.36 +
   51.37 +/* 4-byte Device Entries */
   51.38 +#define AMD_IOMMU_ACPI_IVHD_DEV_U32_PAD        0
   51.39 +#define AMD_IOMMU_ACPI_IVHD_DEV_SELECT     2
   51.40 +#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_START    3
   51.41 +#define AMD_IOMMU_ACPI_IVHD_DEV_RANGE_END  4
   51.42 +
   51.43 +/* 8-byte Device Entries */
   51.44 +#define AMD_IOMMU_ACPI_IVHD_DEV_U64_PAD        64
   51.45 +#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_SELECT   66
   51.46 +#define AMD_IOMMU_ACPI_IVHD_DEV_ALIAS_RANGE    67
   51.47 +#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_SELECT 70
   51.48 +#define AMD_IOMMU_ACPI_IVHD_DEV_EXT_RANGE  71
   51.49 +
   51.50 +/* IVHD IOMMU Flags */
   51.51 +#define AMD_IOMMU_ACPI_COHERENT_MASK       0x20
   51.52 +#define AMD_IOMMU_ACPI_COHERENT_SHIFT      5
   51.53 +#define AMD_IOMMU_ACPI_IOTLB_SUP_MASK      0x10
   51.54 +#define AMD_IOMMU_ACPI_IOTLB_SUP_SHIFT     4
   51.55 +#define AMD_IOMMU_ACPI_ISOC_MASK       0x08
   51.56 +#define AMD_IOMMU_ACPI_ISOC_SHIFT      3
   51.57 +#define AMD_IOMMU_ACPI_RES_PASS_PW_MASK        0x04
   51.58 +#define AMD_IOMMU_ACPI_RES_PASS_PW_SHIFT   2
   51.59 +#define AMD_IOMMU_ACPI_PASS_PW_MASK        0x02
   51.60 +#define AMD_IOMMU_ACPI_PASS_PW_SHIFT       1
   51.61 +#define AMD_IOMMU_ACPI_HT_TUN_ENB_MASK     0x01
   51.62 +#define AMD_IOMMU_ACPI_HT_TUN_ENB_SHIFT        0
   51.63 +
   51.64 +/* IVHD Device Flags */
   51.65 +#define AMD_IOMMU_ACPI_LINT1_PASS_MASK     0x80
   51.66 +#define AMD_IOMMU_ACPI_LINT1_PASS_SHIFT        7
   51.67 +#define AMD_IOMMU_ACPI_LINT0_PASS_MASK     0x40
   51.68 +#define AMD_IOMMU_ACPI_LINT0_PASS_SHIFT        6
   51.69 +#define AMD_IOMMU_ACPI_SYS_MGT_MASK        0x30
   51.70 +#define AMD_IOMMU_ACPI_SYS_MGT_SHIFT       4
   51.71 +#define AMD_IOMMU_ACPI_NMI_PASS_MASK       0x04
   51.72 +#define AMD_IOMMU_ACPI_NMI_PASS_SHIFT      2
   51.73 +#define AMD_IOMMU_ACPI_EINT_PASS_MASK      0x02
   51.74 +#define AMD_IOMMU_ACPI_EINT_PASS_SHIFT     1
   51.75 +#define AMD_IOMMU_ACPI_INIT_PASS_MASK      0x01
   51.76 +#define AMD_IOMMU_ACPI_INIT_PASS_SHIFT     0
   51.77 +
   51.78 +/* IVHD Device Extended Flags */
   51.79 +#define AMD_IOMMU_ACPI_ATS_DISABLED_MASK   0x80000000
   51.80 +#define AMD_IOMMU_ACPI_ATS_DISABLED_SHIFT  31
   51.81 +
   51.82 +/* IVMD Device Flags */
   51.83 +#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_MASK    0x08
   51.84 +#define AMD_IOMMU_ACPI_EXCLUSION_RANGE_SHIFT   3
   51.85 +#define AMD_IOMMU_ACPI_IW_PERMISSION_MASK  0x04
   51.86 +#define AMD_IOMMU_ACPI_IW_PERMISSION_SHIFT 2
   51.87 +#define AMD_IOMMU_ACPI_IR_PERMISSION_MASK  0x02
   51.88 +#define AMD_IOMMU_ACPI_IR_PERMISSION_SHIFT 1
   51.89 +#define AMD_IOMMU_ACPI_UNITY_MAPPING_MASK  0x01
   51.90 +#define AMD_IOMMU_ACPI_UNITY_MAPPING_SHIFT 0
   51.91 +
   51.92 +#define ACPI_OEM_ID_SIZE                6
   51.93 +#define ACPI_OEM_TABLE_ID_SIZE          8
   51.94 +
   51.95 +#pragma pack(1)
   51.96 +struct acpi_ivrs_table_header {
   51.97 +   struct acpi_table_header acpi_header;
   51.98 +   u32 io_info;
   51.99 +   u8  reserved[8];
  51.100 +};
  51.101 +
  51.102 +struct acpi_ivrs_block_header {
  51.103 +   u8  type;
  51.104 +   u8  flags;
  51.105 +   u16 length;
  51.106 +   u16 dev_id;
  51.107 +};
  51.108 +
  51.109 +struct acpi_ivhd_block_header {
  51.110 +   struct acpi_ivrs_block_header header;
  51.111 +   u16 cap_offset;
  51.112 +   u64 mmio_base;
  51.113 +   u16 pci_segment;
  51.114 +   u16 iommu_info;
  51.115 +   u8 reserved[4];
  51.116 +};
  51.117 +
  51.118 +struct acpi_ivhd_device_header {
  51.119 +   u8  type;
  51.120 +   u16 dev_id;
  51.121 +   u8  flags;
  51.122 +};
  51.123 +
  51.124 +struct acpi_ivhd_device_trailer {
  51.125 +   u8  type;
  51.126 +   u16 dev_id;
  51.127 +   u8  reserved;
  51.128 +};
  51.129 +
  51.130 +struct acpi_ivhd_device_range {
  51.131 +   struct acpi_ivhd_device_header header;
  51.132 +   struct acpi_ivhd_device_trailer trailer;
  51.133 +};
  51.134 +
  51.135 +struct acpi_ivhd_device_alias {
  51.136 +   struct acpi_ivhd_device_header header;
  51.137 +   u8  reserved1;
  51.138 +   u16 dev_id;
  51.139 +   u8  reserved2;
  51.140 +};
  51.141 +
  51.142 +struct acpi_ivhd_device_alias_range {
  51.143 +   struct acpi_ivhd_device_alias alias;
  51.144 +   struct acpi_ivhd_device_trailer trailer;
  51.145 +};
  51.146 +
  51.147 +struct acpi_ivhd_device_extended {
  51.148 +   struct acpi_ivhd_device_header header;
  51.149 +   u32 ext_flags;
  51.150 +};
  51.151 +
  51.152 +struct acpi_ivhd_device_extended_range {
  51.153 +   struct acpi_ivhd_device_extended extended;
  51.154 +   struct acpi_ivhd_device_trailer trailer;
  51.155 +};
  51.156 +
  51.157 +union acpi_ivhd_device {
  51.158 +   struct acpi_ivhd_device_header header;
  51.159 +   struct acpi_ivhd_device_range range;
  51.160 +   struct acpi_ivhd_device_alias alias;
  51.161 +   struct acpi_ivhd_device_alias_range alias_range;
  51.162 +   struct acpi_ivhd_device_extended extended;
  51.163 +   struct acpi_ivhd_device_extended_range extended_range;
  51.164 +};
  51.165 +
  51.166 +struct acpi_ivmd_block_header {
  51.167 +   struct acpi_ivrs_block_header header;
  51.168 +   union {
  51.169 +       u16 last_dev_id;
  51.170 +       u16 cap_offset;
  51.171 +       u16 reserved1;
  51.172 +   };
  51.173 +   u64 reserved2;
  51.174 +   u64 start_addr;
  51.175 +   u64 mem_length;
  51.176 +};
  51.177 +#pragma pack()
  51.178 +
  51.179 +#endif /* _ASM_X86_64_AMD_IOMMU_ACPI_H */
    52.1 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Fri Feb 29 09:18:01 2008 -0700
    52.2 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h	Fri Feb 29 09:19:58 2008 -0700
    52.3 @@ -118,6 +118,12 @@
    52.4  #define PCI_CAP_LAST_DEVICE_MASK	0xFF000000
    52.5  #define PCI_CAP_LAST_DEVICE_SHIFT	24
    52.6  
    52.7 +#define PCI_CAP_UNIT_ID_MASK    0x0000001F
    52.8 +#define PCI_CAP_UNIT_ID_SHIFT   0
    52.9 +#define PCI_MISC_INFO_OFFSET    0x10
   52.10 +#define PCI_CAP_MSI_NUMBER_MASK     0x0000001F
   52.11 +#define PCI_CAP_MSI_NUMBER_SHIFT    0
   52.12 +
   52.13  /* Device Table */
   52.14  #define IOMMU_DEV_TABLE_BASE_LOW_OFFSET		0x00
   52.15  #define IOMMU_DEV_TABLE_BASE_HIGH_OFFSET	0x04
    53.1 --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Fri Feb 29 09:18:01 2008 -0700
    53.2 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h	Fri Feb 29 09:19:58 2008 -0700
    53.3 @@ -21,6 +21,7 @@
    53.4  #ifndef _ASM_X86_64_AMD_IOMMU_PROTO_H
    53.5  #define _ASM_X86_64_AMD_IOMMU_PROTO_H
    53.6  
    53.7 +#include <xen/sched.h>
    53.8  #include <asm/amd-iommu.h>
    53.9  
   53.10  #define for_each_amd_iommu(amd_iommu) \
   53.11 @@ -54,10 +55,12 @@ void __init enable_iommu(struct amd_iomm
   53.12  int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
   53.13  int amd_iommu_unmap_page(struct domain *d, unsigned long gfn);
   53.14  void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry);
   53.15 +int amd_iommu_reserve_domain_unity_map(struct domain *domain,
   53.16 +        unsigned long phys_addr, unsigned long size, int iw, int ir);
   53.17  
   53.18  /* device table functions */
   53.19 -void amd_iommu_set_dev_table_entry(u32 *dte,
   53.20 -        u64 root_ptr, u16 domain_id, u8 paging_mode);
   53.21 +void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr,
   53.22 +        u16 domain_id, u8 sys_mgt, u8 dev_ex, u8 paging_mode);
   53.23  int amd_iommu_is_dte_page_translation_valid(u32 *entry);
   53.24  void invalidate_dev_table_entry(struct amd_iommu *iommu,
   53.25              u16 devic_id);
   53.26 @@ -69,11 +72,14 @@ void flush_command_buffer(struct amd_iom
   53.27  /* iommu domain funtions */
   53.28  int amd_iommu_domain_init(struct domain *domain);
   53.29  void amd_iommu_setup_domain_device(struct domain *domain,
   53.30 -    struct amd_iommu *iommu, int requestor_id);
   53.31 +    struct amd_iommu *iommu, int bdf);
   53.32  
   53.33  /* find iommu for bdf */
   53.34  struct amd_iommu *find_iommu_for_device(int bus, int devfn);
   53.35  
   53.36 +/* amd-iommu-acpi functions */
   53.37 +int __init parse_ivrs_table(unsigned long phys_addr, unsigned long size);
   53.38 +
   53.39  static inline u32 get_field_from_reg_u32(u32 reg_value, u32 mask, u32 shift)
   53.40  {
   53.41      u32 field;
   53.42 @@ -91,4 +97,16 @@ static inline u32 set_field_in_reg_u32(u
   53.43      return reg_value;
   53.44  }
   53.45  
   53.46 +static inline u8 get_field_from_byte(u8 value, u8 mask, u8 shift)
   53.47 +{
   53.48 +    u8 field;
   53.49 +    field = (value & mask) >> shift;
   53.50 +    return field;
   53.51 +}
   53.52 +
   53.53 +static inline unsigned long region_to_pages(unsigned long addr, unsigned long size)
   53.54 +{
   53.55 +    return (PAGE_ALIGN(addr + size) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
   53.56 +}
   53.57 +
   53.58  #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
    54.1 --- a/xen/include/asm-x86/perfc_defn.h	Fri Feb 29 09:18:01 2008 -0700
    54.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Feb 29 09:19:58 2008 -0700
    54.3 @@ -88,6 +88,11 @@ PERFCOUNTER(shadow_up_pointer,     "shad
    54.4  PERFCOUNTER(shadow_unshadow_bf,    "shadow unshadow brute-force")
    54.5  PERFCOUNTER(shadow_get_page_fail,  "shadow_get_page_from_l1e failed")
    54.6  PERFCOUNTER(shadow_guest_walk,     "shadow walks guest tables")
    54.7 +PERFCOUNTER(shadow_check_gwalk,    "shadow checks gwalk")
    54.8 +PERFCOUNTER(shadow_inconsistent_gwalk, "shadow check inconsistent gwalk")
    54.9 +PERFCOUNTER(shadow_rm_write_flush_tlb,
   54.10 +                                   "shadow flush tlb by removing write perm")
   54.11 +
   54.12  PERFCOUNTER(shadow_invlpg,         "shadow emulates invlpg")
   54.13  PERFCOUNTER(shadow_invlpg_fault,   "shadow invlpg faults")
   54.14  
    55.1 --- a/xen/include/public/io/kbdif.h	Fri Feb 29 09:18:01 2008 -0700
    55.2 +++ b/xen/include/public/io/kbdif.h	Fri Feb 29 09:19:58 2008 -0700
    55.3 @@ -65,7 +65,7 @@ struct xenkbd_position
    55.4      uint8_t type;        /* XENKBD_TYPE_POS */
    55.5      int32_t abs_x;       /* absolute X position (in FB pixels) */
    55.6      int32_t abs_y;       /* absolute Y position (in FB pixels) */
    55.7 -    int32_t abs_z;       /* absolute Z position (wheel) */
    55.8 +    int32_t rel_z;       /* relative Z motion (wheel) */
    55.9  };
   55.10  
   55.11  #define XENKBD_IN_EVENT_SIZE 40
    56.1 --- a/xen/include/public/kexec.h	Fri Feb 29 09:18:01 2008 -0700
    56.2 +++ b/xen/include/public/kexec.h	Fri Feb 29 09:19:58 2008 -0700
    56.3 @@ -126,9 +126,18 @@ typedef struct xen_kexec_load {
    56.4      xen_kexec_image_t image;
    56.5  } xen_kexec_load_t;
    56.6  
    56.7 -#define KEXEC_RANGE_MA_CRASH 0   /* machine address and size of crash area */
    56.8 -#define KEXEC_RANGE_MA_XEN   1   /* machine address and size of Xen itself */
    56.9 -#define KEXEC_RANGE_MA_CPU   2   /* machine address and size of a CPU note */
   56.10 +#define KEXEC_RANGE_MA_CRASH      0 /* machine address and size of crash area */
   56.11 +#define KEXEC_RANGE_MA_XEN        1 /* machine address and size of Xen itself */
   56.12 +#define KEXEC_RANGE_MA_CPU        2 /* machine address and size of a CPU note */
   56.13 +#define KEXEC_RANGE_MA_XENHEAP    3 /* machine address and size of xenheap
   56.14 +                                     * Note that although this is adjacent
   56.15 +                                     * to Xen it exists in a separate EFI
   56.16 +                                     * region on ia64, and thus needs to be
   56.17 +                                     * inserted into iomem_machine separately */
   56.18 +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of
   56.19 +                                     * the ia64_boot_param */
   56.20 +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of
   56.21 +                                     * of the EFI Memory Map */
   56.22  
   56.23  /*
   56.24   * Find the address and size of certain memory areas
    57.1 --- a/xen/include/xen/acpi.h	Fri Feb 29 09:18:01 2008 -0700
    57.2 +++ b/xen/include/xen/acpi.h	Fri Feb 29 09:19:58 2008 -0700
    57.3 @@ -368,6 +368,7 @@ enum acpi_table_id {
    57.4  	ACPI_HPET,
    57.5  	ACPI_MCFG,
    57.6  	ACPI_DMAR,
    57.7 +	ACPI_IVRS,
    57.8  	ACPI_TABLE_COUNT
    57.9  };
   57.10  
    58.1 --- a/xen/include/xen/kexec.h	Fri Feb 29 09:18:01 2008 -0700
    58.2 +++ b/xen/include/xen/kexec.h	Fri Feb 29 09:19:58 2008 -0700
    58.3 @@ -31,6 +31,7 @@ void kexec_disable_iosapic(void);
    58.4  void kexec_crash_save_cpu(void);
    58.5  crash_xen_info_t *kexec_crash_save_info(void);
    58.6  void machine_crash_shutdown(void);
    58.7 +int machine_kexec_get(xen_kexec_range_t *range);
    58.8  
    58.9  #endif /* __XEN_KEXEC_H__ */
   58.10