ia64/xen-unstable

changeset 4055:a13b9052d91d

bitkeeper revision 1.1236.1.68 (422f5a4b3BG7Wh49f5-3defzeghSBw)

Merge bk://xen.bkbits.net/xeno-unstable.bk
into bkbits.net:/repos/x/xen-ia64/xeno-unstable-ia64.bk
author xen-ia64.adm@bkbits.net
date Wed Mar 09 20:19:23 2005 +0000 (2005-03-09)
parents f8026d38aa87 a38777f9f099
children 557e8cf002c3
files .rootkeys BitKeeper/etc/ignore BitKeeper/etc/logging_ok buildconfigs/Rules.mk buildconfigs/mk.netbsd-2.0-xenU linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h netbsd-2.0-xen-sparse/Makefile netbsd-2.0-xen-sparse/mkbuildtree netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c tools/blktap/blktaplib.c tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_gnttab.c tools/libxc/xc_vmx_build.c tools/tests/Makefile tools/tests/test_x86_emulator.c tools/x2d2/Makefile tools/x2d2/cntrl_con.c tools/x2d2/minixend.c tools/x2d2/minixend.h tools/x2d2/util.c tools/xcs/ctrl_interface.c xen/Makefile xen/arch/ia64/domain.c xen/arch/ia64/patch/linux-2.6.7/time.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/x86_32/seg_fixup.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/traps.c xen/arch/x86/x86_emulate.c xen/common/domain.c xen/common/grant_table.c xen/common/page_alloc.c xen/common/physdev.c xen/common/schedule.c xen/include/asm-x86/mm.h xen/include/asm-x86/shadow.h xen/include/asm-x86/x86_64/string.h xen/include/asm-x86/x86_emulate.h xen/include/public/arch-x86_64.h xen/include/public/grant_table.h xen/include/public/io/blkif.h xen/include/public/io/domain_controller.h xen/include/public/io/ring.h xen/include/public/io/usbif.h xen/include/xen/grant_table.h xen/include/xen/sched.h xen/include/xen/time.h
line diff
     1.1 --- a/.rootkeys	Tue Mar 08 22:57:09 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 09 20:19:23 2005 +0000
     1.3 @@ -311,6 +311,8 @@ 413cb3b46JnvK1UurZAubeQoFg1W-w netbsd-2.
     1.4  413cb3b5rIKB3TbyhK3pbNyVkYysqA netbsd-2.0-xen-sparse/sys/arch/xen/xen/xen_debug.c
     1.5  413cb3b5eKxnzoodEqaWn2wrPnHWnA netbsd-2.0-xen-sparse/sys/arch/xen/xen/xencons.c
     1.6  413cb3b5F56TvQWAmO5TsuzhtzLFPQ netbsd-2.0-xen-sparse/sys/arch/xen/xen/xenkbc.c
     1.7 +422e4430vKaHLOOGS7X-SUUe3EBCgw netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h
     1.8 +422e4430-gOD358H8nGGnNWes08Nng netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c
     1.9  413cb3b53nyOv1OIeDSsCXhBFDXvJA netbsd-2.0-xen-sparse/sys/nfs/files.nfs
    1.10  413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.10/agpgart.patch
    1.11  3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
    1.12 @@ -558,6 +560,7 @@ 3fbba6dbEVkVMX0JuDFzap9jeaucGA tools/lib
    1.13  3fbba6dbasJQV-MVElDC0DGSHMiL5w tools/libxc/xc_domain.c
    1.14  40278d99BLsfUv3qxv0I8C1sClZ0ow tools/libxc/xc_elf.h
    1.15  403e0977Bjsm_e82pwvl9VvaJxh8Gg tools/libxc/xc_evtchn.c
    1.16 +4227c129ZKjJPNYooHVzBCyinf7Y6Q tools/libxc/xc_gnttab.c
    1.17  40e03333Eegw8czSWvHsbKxrRZJjRA tools/libxc/xc_io.c
    1.18  40e03333vrWGbLAhyJjXlqCHaJt7eA tools/libxc/xc_io.h
    1.19  3fbba6dbNCU7U6nsMYiXzKkp3ztaJg tools/libxc/xc_linux_build.c
    1.20 @@ -785,6 +788,8 @@ 4104ffca-jPHLVOrW0n0VghEXXtKxg tools/sv/
    1.21  40fcefb3yMSrZvApO9ToIi-iQwnchA tools/sv/images/xen.png
    1.22  41013a83z27rKvWIxAfUBMVZ1eDCDg tools/sv/inc/script.js
    1.23  40fcefb3zGC9XNBkSwTEobCoq8YClA tools/sv/inc/style.css
    1.24 +422f27c8MDeRoOWZNdcRC5VDTcj3TQ tools/tests/Makefile
    1.25 +422f27c81CCtXt4Lthf7JF3Ajr0fUA tools/tests/test_x86_emulator.c
    1.26  420b963dK3yGNtqxRM8npGZtrCQd1g tools/vnet/00INSTALL
    1.27  41a21888_WlknVWjSxb32Fo13_ujsw tools/vnet/00README
    1.28  420a9b706I-bN_uPdiy0m3rmDifNNg tools/vnet/INSTALL
    1.29 @@ -845,11 +850,6 @@ 41a2188aIf3Xk6uvk7KzjdpOsflAEw tools/vne
    1.30  41a2188ar6_vOO3_tEJQjmFVU3409A tools/vnet/vnetd/vcache.h
    1.31  41a2188aETrGU60X9WtGhYVfU7z0Pw tools/vnet/vnetd/vnetd.c
    1.32  41a2188ahYjemudGyB7078AWMFR-0w tools/vnet/vnetd/vnetd.h
    1.33 -4194e861IgTabTt8HOuh143QIJFD1Q tools/x2d2/Makefile
    1.34 -4194e861M2gcBz4i94cQYpqzi8n6UA tools/x2d2/cntrl_con.c
    1.35 -4194e8612TrrMvC8ZlA4h2ZYCPWz4g tools/x2d2/minixend.c
    1.36 -4194e861x2eqNCD61RYPCUEBVdMYuw tools/x2d2/minixend.h
    1.37 -4194e861A4V9VbD_FYmgXpYEj5YwVg tools/x2d2/util.c
    1.38  41d58ba63w1WfBmd6Cr_18nhLNv7PA tools/xcs/Makefile
    1.39  41d58ba6NxgkfzD_rmsGjgd_zJ3H_w tools/xcs/bindings.c
    1.40  41d58ba6I2umi60mShq4Pl0RDg7lzQ tools/xcs/connection.c
    1.41 @@ -1014,6 +1014,7 @@ 41bf1717XhPz_dNT5OKSjgmbFuWBuA xen/arch/
    1.42  42000d3cMb8o1WuFBXC07c8i3lPZBw xen/arch/x86/x86_64/traps.c
    1.43  40e96d3ahBTZqbTViInnq0lM03vs7A xen/arch/x86/x86_64/usercopy.c
    1.44  40e96d3akN3Hu_J5Bk-WXD8OGscrYQ xen/arch/x86/x86_64/xen.lds
    1.45 +422f27c8J9DQfCpegccMid59XhSmGA xen/arch/x86/x86_emulate.c
    1.46  3ddb79bdff-gj-jFGKjOejeHLqL8Lg xen/common/Makefile
    1.47  3e397e66AyyD5fYraAySWuwi9uqSXg xen/common/ac_timer.c
    1.48  3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
    1.49 @@ -1177,6 +1178,7 @@ 4208e2a3Fktw4ZttKdDxbhvTQ6brfQ xen/inclu
    1.50  404f1bb86rAXB3aLS1vYdcqpJiEcyg xen/include/asm-x86/x86_64/regs.h
    1.51  40e1966azOJZfNI6Ilthe6Q-T3Hewg xen/include/asm-x86/x86_64/string.h
    1.52  404f1bc4tWkB9Qr8RkKtZGW5eMQzhw xen/include/asm-x86/x86_64/uaccess.h
    1.53 +422f27c8RHFkePhD34VIEpMMqofZcA xen/include/asm-x86/x86_emulate.h
    1.54  400304fcmRQmDdFYEzDh0wcBba9alg xen/include/public/COPYING
    1.55  421098b7OKb9YH_EUA_UpCxBjaqtgA xen/include/public/arch-ia64.h
    1.56  404f1bc68SXxmv0zQpXBWGrCzSyp8w xen/include/public/arch-x86_32.h
     2.1 --- a/BitKeeper/etc/ignore	Tue Mar 08 22:57:09 2005 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Wed Mar 09 20:19:23 2005 +0000
     2.3 @@ -121,32 +121,6 @@ tools/blktap/vdi_snap
     2.4  tools/blktap/vdi_snap_list
     2.5  tools/blktap/vdi_tree
     2.6  tools/blktap/vdi_validate
     2.7 -tools/blktap/xen/arch-ia64.h
     2.8 -tools/blktap/xen/arch-x86_32.h
     2.9 -tools/blktap/xen/arch-x86_64.h
    2.10 -tools/blktap/xen/dom0_ops.h
    2.11 -tools/blktap/xen/event_channel.h
    2.12 -tools/blktap/xen/grant_table.h
    2.13 -tools/blktap/xen/io/blkif.h
    2.14 -tools/blktap/xen/io/domain_controller.h
    2.15 -tools/blktap/xen/io/ioreq.h
    2.16 -tools/blktap/xen/io/netif.h
    2.17 -tools/blktap/xen/io/ring.h
    2.18 -tools/blktap/xen/io/usbif.h
    2.19 -tools/blktap/xen/linux/privcmd.h
    2.20 -tools/blktap/xen/linux/suspend.h
    2.21 -tools/blktap/xen/physdev.h
    2.22 -tools/blktap/xen/sched_ctl.h
    2.23 -tools/blktap/xen/trace.h
    2.24 -tools/blktap/xen/xen.h
    2.25 -tools/cmdline/Makefile
    2.26 -tools/cmdline/ringbuster
    2.27 -tools/cmdline/ringbuster.c
    2.28 -tools/cmdline/xdestroy
    2.29 -tools/cmdline/xdestroy.c
    2.30 -tools/cmdline/xevtchn
    2.31 -tools/cmdline/xevtchn.c
    2.32 -tools/cmdline/xls
    2.33 -tools/cmdline/xls.c
    2.34 -tools/cmdline/xphysinfo
    2.35 -tools/cmdline/xphysinfo.c
    2.36 +tools/blktap/xen/*
    2.37 +tools/cmdline/*
    2.38 +tools/tests/test_x86_emulator
     3.1 --- a/BitKeeper/etc/logging_ok	Tue Mar 08 22:57:09 2005 +0000
     3.2 +++ b/BitKeeper/etc/logging_ok	Wed Mar 09 20:19:23 2005 +0000
     3.3 @@ -19,6 +19,7 @@ cl349@firebug.cl.cam.ac.uk
     3.4  cl349@freefall.cl.cam.ac.uk
     3.5  cl349@labyrinth.cl.cam.ac.uk
     3.6  cwc22@centipede.cl.cam.ac.uk
     3.7 +cwc22@donkeykong.cl.cam.ac.uk
     3.8  djm@kirby.fc.hp.com
     3.9  gm281@boulderdash.cl.cam.ac.uk
    3.10  gm281@tetrapod.cl.cam.ac.uk
    3.11 @@ -51,6 +52,7 @@ mafetter@fleming.research
    3.12  mark@maw48.kings.cam.ac.uk
    3.13  maw48@labyrinth.cl.cam.ac.uk
    3.14  mjw@wray-m-3.hpl.hp.com
    3.15 +mulix@mulix.org
    3.16  mwilli2@anvil.research
    3.17  mwilli2@equilibrium.research
    3.18  mwilli2@equilibrium.research.intel-research.net
     4.1 --- a/buildconfigs/Rules.mk	Tue Mar 08 22:57:09 2005 +0000
     4.2 +++ b/buildconfigs/Rules.mk	Wed Mar 09 20:19:23 2005 +0000
     4.3 @@ -27,7 +27,7 @@ linux-%.tar.bz2:
     4.4  # Expand NetBSD release to NetBSD version
     4.5  NETBSD_RELEASE  ?= 2.0
     4.6  NETBSD_VER      ?= $(patsubst netbsd-%-xen-sparse,%,$(wildcard netbsd-$(NETBSD_RELEASE)*-xen-sparse))
     4.7 -NETBSD_CVSSNAP  ?= 20041103
     4.8 +NETBSD_CVSSNAP  ?= 20050309
     4.9  
    4.10  # Setup NetBSD search path
    4.11  NETBSD_SRC_PATH	?= .:..
     5.1 --- a/buildconfigs/mk.netbsd-2.0-xenU	Tue Mar 08 22:57:09 2005 +0000
     5.2 +++ b/buildconfigs/mk.netbsd-2.0-xenU	Wed Mar 09 20:19:23 2005 +0000
     5.3 @@ -1,6 +1,6 @@
     5.4  
     5.5  NETBSD_RELEASE   ?= 2.0
     5.6 -NETBSD_CVSSNAP   ?= 20041103
     5.7 +NETBSD_CVSSNAP   ?= 20050309
     5.8  
     5.9  EXTRAVERSION = xenU
    5.10  
     6.1 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c	Tue Mar 08 22:57:09 2005 +0000
     6.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c	Wed Mar 09 20:19:23 2005 +0000
     6.3 @@ -362,7 +362,9 @@ int __direct_remap_area_pages(struct mm_
     6.4  	spin_lock(&mm->page_table_lock);
     6.5  	do {
     6.6  		error = -ENOMEM;
     6.7 -		pmd_t *pmd = pmd_alloc(mm, dir, address);
     6.8 +		pmd_t *pmd;
     6.9 +
    6.10 +		pmd = pmd_alloc(mm, dir, address);
    6.11  		if (!pmd)
    6.12  			break;
    6.13  		error = 0;
     7.1 --- a/linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c	Tue Mar 08 22:57:09 2005 +0000
     7.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c	Wed Mar 09 20:19:23 2005 +0000
     7.3 @@ -50,11 +50,11 @@
     7.4   * Extra ring macros to sync a consumer index up to the public producer index. 
     7.5   * Generally UNSAFE, but we use it for recovery and shutdown in some cases.
     7.6   */
     7.7 -#define RING_DROP_PENDING_REQUESTS(_p, _r)                              \
     7.8 +#define RING_DROP_PENDING_REQUESTS(_r)                                  \
     7.9      do {                                                                \
    7.10          (_r)->req_cons = (_r)->sring->req_prod;                         \
    7.11      } while (0)
    7.12 -#define RING_DROP_PENDING_RESPONSES(_p, _r)                             \
    7.13 +#define RING_DROP_PENDING_RESPONSES(_r)                                 \
    7.14      do {                                                                \
    7.15          (_r)->rsp_cons = (_r)->sring->rsp_prod;                         \
    7.16      } while (0)
    7.17 @@ -125,7 +125,7 @@ static void ctrl_if_rxmsg_default_handle
    7.18  static void __ctrl_if_tx_tasklet(unsigned long data)
    7.19  {
    7.20      ctrl_msg_t *msg;
    7.21 -    int         was_full = RING_FULL(CTRL_RING, &ctrl_if_tx_ring);
    7.22 +    int         was_full = RING_FULL(&ctrl_if_tx_ring);
    7.23      RING_IDX    i, rp;
    7.24  
    7.25      i  = ctrl_if_tx_ring.rsp_cons;
    7.26 @@ -134,7 +134,7 @@ static void __ctrl_if_tx_tasklet(unsigne
    7.27  
    7.28      for ( ; i != rp; i++ )
    7.29      {
    7.30 -        msg = RING_GET_RESPONSE(CTRL_RING, &ctrl_if_tx_ring, i);
    7.31 +        msg = RING_GET_RESPONSE(&ctrl_if_tx_ring, i);
    7.32          
    7.33          DPRINTK("Rx-Rsp %u/%u :: %d/%d\n", i-1,
    7.34                  ctrl_if_tx_ring.sring->rsp_prod,
    7.35 @@ -157,7 +157,7 @@ static void __ctrl_if_tx_tasklet(unsigne
    7.36      smp_mb();
    7.37      ctrl_if_tx_ring.rsp_cons = i;
    7.38              
    7.39 -    if ( was_full && !RING_FULL(CTRL_RING, &ctrl_if_tx_ring) )
    7.40 +    if ( was_full && !RING_FULL(&ctrl_if_tx_ring) )
    7.41      {
    7.42          wake_up(&ctrl_if_tx_wait);
    7.43          run_task_queue(&ctrl_if_tx_tq);
    7.44 @@ -193,7 +193,7 @@ static void __ctrl_if_rx_tasklet(unsigne
    7.45   
    7.46      for ( ; i != rp; i++) 
    7.47      {
    7.48 -        pmsg = RING_GET_REQUEST(CTRL_RING, &ctrl_if_rx_ring, i);
    7.49 +        pmsg = RING_GET_REQUEST(&ctrl_if_rx_ring, i);
    7.50          memcpy(&msg, pmsg, offsetof(ctrl_msg_t, msg));
    7.51  
    7.52          DPRINTK("Rx-Req %u/%u :: %d/%d\n", i-1,
    7.53 @@ -227,10 +227,10 @@ static void __ctrl_if_rx_tasklet(unsigne
    7.54  static irqreturn_t ctrl_if_interrupt(int irq, void *dev_id,
    7.55                                       struct pt_regs *regs)
    7.56  {
    7.57 -    if ( RING_HAS_UNCONSUMED_RESPONSES(CTRL_RING, &ctrl_if_tx_ring) )
    7.58 +    if ( RING_HAS_UNCONSUMED_RESPONSES(&ctrl_if_tx_ring) )
    7.59          tasklet_schedule(&ctrl_if_tx_tasklet);
    7.60  
    7.61 -    if ( RING_HAS_UNCONSUMED_REQUESTS(CTRL_RING, &ctrl_if_rx_ring) )
    7.62 +    if ( RING_HAS_UNCONSUMED_REQUESTS(&ctrl_if_rx_ring) )
    7.63          tasklet_schedule(&ctrl_if_rx_tasklet);
    7.64  
    7.65      return IRQ_HANDLED;
    7.66 @@ -248,7 +248,7 @@ ctrl_if_send_message_noblock(
    7.67  
    7.68      spin_lock_irqsave(&ctrl_if_lock, flags);
    7.69  
    7.70 -    if ( RING_FULL(CTRL_RING, &ctrl_if_tx_ring) )
    7.71 +    if ( RING_FULL(&ctrl_if_tx_ring) )
    7.72      {
    7.73          spin_unlock_irqrestore(&ctrl_if_lock, flags);
    7.74          return -EAGAIN;
    7.75 @@ -269,11 +269,11 @@ ctrl_if_send_message_noblock(
    7.76              ctrl_if_tx_ring.rsp_cons,
    7.77              msg->type, msg->subtype);
    7.78  
    7.79 -    dmsg = RING_GET_REQUEST(CTRL_RING, &ctrl_if_tx_ring, 
    7.80 +    dmsg = RING_GET_REQUEST(&ctrl_if_tx_ring, 
    7.81              ctrl_if_tx_ring.req_prod_pvt);
    7.82      memcpy(dmsg, msg, sizeof(*msg));
    7.83      ctrl_if_tx_ring.req_prod_pvt++;
    7.84 -    RING_PUSH_REQUESTS(CTRL_RING, &ctrl_if_tx_ring);
    7.85 +    RING_PUSH_REQUESTS(&ctrl_if_tx_ring);
    7.86  
    7.87      spin_unlock_irqrestore(&ctrl_if_lock, flags);
    7.88  
    7.89 @@ -373,7 +373,7 @@ ctrl_if_enqueue_space_callback(
    7.90      struct tq_struct *task)
    7.91  {
    7.92      /* Fast path. */
    7.93 -    if ( !RING_FULL(CTRL_RING, &ctrl_if_tx_ring) )
    7.94 +    if ( !RING_FULL(&ctrl_if_tx_ring) )
    7.95          return 0;
    7.96  
    7.97      (void)queue_task(task, &ctrl_if_tx_tq);
    7.98 @@ -384,7 +384,7 @@ ctrl_if_enqueue_space_callback(
    7.99       * certainly return 'not full'.
   7.100       */
   7.101      smp_mb();
   7.102 -    return RING_FULL(CTRL_RING, &ctrl_if_tx_ring);
   7.103 +    return RING_FULL(&ctrl_if_tx_ring);
   7.104  }
   7.105  
   7.106  void
   7.107 @@ -404,13 +404,13 @@ ctrl_if_send_response(
   7.108              ctrl_if_rx_ring.rsp_prod_pvt, 
   7.109              msg->type, msg->subtype);
   7.110  
   7.111 -    dmsg = RING_GET_RESPONSE(CTRL_RING, &ctrl_if_rx_ring, 
   7.112 +    dmsg = RING_GET_RESPONSE(&ctrl_if_rx_ring, 
   7.113              ctrl_if_rx_ring.rsp_prod_pvt);
   7.114      if ( dmsg != msg )
   7.115          memcpy(dmsg, msg, sizeof(*msg));
   7.116  
   7.117      ctrl_if_rx_ring.rsp_prod_pvt++;
   7.118 -    RING_PUSH_RESPONSES(CTRL_RING, &ctrl_if_rx_ring);
   7.119 +    RING_PUSH_RESPONSES(&ctrl_if_rx_ring);
   7.120  
   7.121      spin_unlock_irqrestore(&ctrl_if_lock, flags);
   7.122  
   7.123 @@ -481,6 +481,8 @@ void ctrl_if_suspend(void)
   7.124  
   7.125  void ctrl_if_resume(void)
   7.126  {
   7.127 +    control_if_t *ctrl_if = get_ctrl_if();
   7.128 +
   7.129      if ( xen_start_info.flags & SIF_INITDOMAIN )
   7.130      {
   7.131          /*
   7.132 @@ -501,8 +503,8 @@ void ctrl_if_resume(void)
   7.133      }
   7.134  
   7.135      /* Sync up with shared indexes. */
   7.136 -    RING_DROP_PENDING_RESPONSES(CTRL_RING, &ctrl_if_tx_ring);
   7.137 -    RING_DROP_PENDING_REQUESTS(CTRL_RING, &ctrl_if_rx_ring);
   7.138 +    FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring);
   7.139 +    BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring);
   7.140  
   7.141      ctrl_if_evtchn = xen_start_info.domain_controller_evtchn;
   7.142      ctrl_if_irq    = bind_evtchn_to_irq(ctrl_if_evtchn);
   7.143 @@ -521,8 +523,8 @@ void __init ctrl_if_init(void)
   7.144      for ( i = 0; i < 256; i++ )
   7.145          ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
   7.146  
   7.147 -    FRONT_RING_ATTACH(CTRL_RING, &ctrl_if_tx_ring, &ctrl_if->tx_ring);
   7.148 -    BACK_RING_ATTACH(CTRL_RING, &ctrl_if_rx_ring, &ctrl_if->rx_ring);
   7.149 +    FRONT_RING_ATTACH(&ctrl_if_tx_ring, &ctrl_if->tx_ring);
   7.150 +    BACK_RING_ATTACH(&ctrl_if_rx_ring, &ctrl_if->rx_ring);
   7.151      
   7.152      spin_lock_init(&ctrl_if_lock);
   7.153  
   7.154 @@ -552,7 +554,7 @@ int ctrl_if_transmitter_empty(void)
   7.155  
   7.156  void ctrl_if_discard_responses(void)
   7.157  {
   7.158 -    RING_DROP_PENDING_RESPONSES(CTRL_RING, &ctrl_if_tx_ring);
   7.159 +    RING_DROP_PENDING_RESPONSES(&ctrl_if_tx_ring);
   7.160  }
   7.161  
   7.162  EXPORT_SYMBOL(ctrl_if_send_message_noblock);
     8.1 --- a/linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c	Tue Mar 08 22:57:09 2005 +0000
     8.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c	Wed Mar 09 20:19:23 2005 +0000
     8.3 @@ -43,7 +43,7 @@ static ssize_t read_mem(struct file * fi
     8.4  			size_t count, loff_t *ppos)
     8.5  {
     8.6  	unsigned long i, p = *ppos;
     8.7 -	ssize_t read = 0;
     8.8 +	ssize_t read = -EFAULT;
     8.9  	void *v;
    8.10  
    8.11  	if ((v = ioremap(p, count)) == NULL) {
    8.12 @@ -60,12 +60,12 @@ static ssize_t read_mem(struct file * fi
    8.13  		return count;
    8.14  	}
    8.15  	if (copy_to_user(buf, v, count))
    8.16 -		return -EFAULT;
    8.17 -	iounmap(v);
    8.18 +		goto out;
    8.19  
    8.20 -	read += count;
    8.21 +	read = count;
    8.22  	*ppos += read;
    8.23 -
    8.24 +out:
    8.25 +	iounmap(v);
    8.26  	return read;
    8.27  }
    8.28  
    8.29 @@ -73,18 +73,18 @@ static ssize_t write_mem(struct file * f
    8.30  			 size_t count, loff_t *ppos)
    8.31  {
    8.32  	unsigned long p = *ppos;
    8.33 -	ssize_t written = 0;
    8.34 +	ssize_t written = -EFAULT;
    8.35  	void *v;
    8.36  
    8.37  	if ((v = ioremap(p, count)) == NULL)
    8.38  		return -EFAULT;
    8.39  	if (copy_to_user(v, buf, count))
    8.40 -		return -EFAULT;
    8.41 -	iounmap(v);
    8.42 +		goto out;
    8.43  
    8.44 -	written += count;
    8.45 +	written = count;
    8.46  	*ppos += written;
    8.47 -
    8.48 +out:
    8.49 +	iounmap(v);
    8.50  	return written;
    8.51  }
    8.52  
     9.1 --- a/linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c	Tue Mar 08 22:57:09 2005 +0000
     9.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c	Wed Mar 09 20:19:23 2005 +0000
     9.3 @@ -6,6 +6,7 @@
     9.4   * 2. Accessing others' memory reservations via grant references.
     9.5   * (i.e., mechanisms for both sender and recipient of grant references)
     9.6   * 
     9.7 + * Copyright (c) 2005, Christopher Clark
     9.8   * Copyright (c) 2004, K A Fraser
     9.9   */
    9.10  
    9.11 @@ -14,6 +15,9 @@
    9.12  #include <linux/sched.h>
    9.13  #include <asm/pgtable.h>
    9.14  #include <asm/fixmap.h>
    9.15 +#include <asm/uaccess.h>
    9.16 +#include <asm-xen/xen_proc.h>
    9.17 +#include <asm-xen/linux-public/privcmd.h>
    9.18  #include <asm-xen/gnttab.h>
    9.19  
    9.20  #ifndef set_fixmap_ma
    9.21 @@ -28,8 +32,13 @@
    9.22  #define ASSERT(_p) ((void)0)
    9.23  #endif
    9.24  
    9.25 +#define WPRINTK(fmt, args...) \
    9.26 +    printk(KERN_WARNING "xen_grant: " fmt, ##args)
    9.27 +
    9.28 +
    9.29  EXPORT_SYMBOL(gnttab_grant_foreign_access);
    9.30  EXPORT_SYMBOL(gnttab_end_foreign_access);
    9.31 +EXPORT_SYMBOL(gnttab_query_foreign_access);
    9.32  EXPORT_SYMBOL(gnttab_grant_foreign_transfer);
    9.33  EXPORT_SYMBOL(gnttab_end_foreign_transfer);
    9.34  
    9.35 @@ -39,6 +48,10 @@ static grant_ref_t gnttab_free_head;
    9.36  
    9.37  static grant_entry_t *shared;
    9.38  
    9.39 +/* /proc/xen/grant */
    9.40 +static struct proc_dir_entry *grant_pde;
    9.41 +
    9.42 +
    9.43  /*
    9.44   * Lock-free grant-entry allocator
    9.45   */
    9.46 @@ -84,9 +97,18 @@ gnttab_grant_foreign_access(
    9.47      return ref;
    9.48  }
    9.49  
    9.50 +int
    9.51 +gnttab_query_foreign_access( grant_ref_t ref )
    9.52 +{
    9.53 +    u16 nflags;
    9.54 +
    9.55 +    nflags = shared[ref].flags;
    9.56 +
    9.57 +    return ( nflags & (GTF_reading|GTF_writing) );
    9.58 +}
    9.59 +
    9.60  void
    9.61 -gnttab_end_foreign_access(
    9.62 -    grant_ref_t ref, int readonly)
    9.63 +gnttab_end_foreign_access( grant_ref_t ref, int readonly )
    9.64  {
    9.65      u16 flags, nflags;
    9.66  
    9.67 @@ -141,7 +163,76 @@ gnttab_end_foreign_transfer(
    9.68      return frame;
    9.69  }
    9.70  
    9.71 -void __init gnttab_init(void)
    9.72 +static int grant_ioctl(struct inode *inode, struct file *file,
    9.73 +                       unsigned int cmd, unsigned long data)
    9.74 +{
    9.75 +    int                     ret;
    9.76 +    privcmd_hypercall_t     hypercall;
    9.77 +
    9.78 +    /* XXX Need safety checks here if using for anything other
    9.79 +     *     than debugging */
    9.80 +    return -ENOSYS;
    9.81 +
    9.82 +    if ( cmd != IOCTL_PRIVCMD_HYPERCALL )
    9.83 +        return -ENOSYS;
    9.84 +
    9.85 +    if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
    9.86 +        return -EFAULT;
    9.87 +
    9.88 +    if ( hypercall.op != __HYPERVISOR_grant_table_op )
    9.89 +        return -ENOSYS;
    9.90 +
    9.91 +    /* hypercall-invoking asm taken from privcmd.c */
    9.92 +    __asm__ __volatile__ (
    9.93 +        "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
    9.94 +        "movl  4(%%eax),%%ebx ;"
    9.95 +        "movl  8(%%eax),%%ecx ;"
    9.96 +        "movl 12(%%eax),%%edx ;"
    9.97 +        "movl 16(%%eax),%%esi ;"
    9.98 +        "movl 20(%%eax),%%edi ;"
    9.99 +        "movl   (%%eax),%%eax ;"
   9.100 +        TRAP_INSTR "; "
   9.101 +        "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
   9.102 +        : "=a" (ret) : "0" (&hypercall) : "memory" );
   9.103 +                                                                                    
   9.104 +    return ret;
   9.105 +}
   9.106 +
   9.107 +static struct file_operations grant_file_ops = {
   9.108 +    ioctl:  grant_ioctl,
   9.109 +};
   9.110 +
   9.111 +static int grant_read(char *page, char **start, off_t off,
   9.112 +                      int count, int *eof, void *data)
   9.113 +{
   9.114 +    int             len;
   9.115 +    unsigned int    i;
   9.116 +    grant_entry_t  *gt;
   9.117 +
   9.118 +    gt = (grant_entry_t *)shared;
   9.119 +    len = 0;
   9.120 +
   9.121 +    for ( i = 0; i < NR_GRANT_REFS; i++ )
   9.122 +        if ( gt[i].flags )
   9.123 +            len += sprintf( page + len,
   9.124 +                    "Grant: ref (0x%x) flags (0x%hx) dom (0x%hx) frame (0x%x)\n", 
   9.125 +                    i,
   9.126 +                    gt[i].flags,
   9.127 +                    gt[i].domid,
   9.128 +                    gt[i].frame );
   9.129 +
   9.130 +    *eof = 1;
   9.131 +    return len;
   9.132 +}
   9.133 +
   9.134 +static int grant_write(struct file *file, const char __user *buffer,
   9.135 +                       unsigned long count, void *data)
   9.136 +{
   9.137 +    /* TODO: implement this */
   9.138 +    return -ENOSYS;
   9.139 +}
   9.140 +
   9.141 +static int __init gnttab_init(void)
   9.142  {
   9.143      gnttab_setup_table_t setup;
   9.144      unsigned long        frame;
   9.145 @@ -160,4 +251,25 @@ void __init gnttab_init(void)
   9.146  
   9.147      set_fixmap_ma(FIX_GNTTAB, frame << PAGE_SHIFT);
   9.148      shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB);
   9.149 +
   9.150 +    /*
   9.151 +     *  /proc/xen/grant : used by libxc to access grant tables
   9.152 +     */
   9.153 +    if ( (grant_pde = create_xen_proc_entry("grant", 0600)) == NULL )
   9.154 +    {
   9.155 +        WPRINTK("Unable to create grant xen proc entry\n");
   9.156 +        return -1;
   9.157 +    }
   9.158 +
   9.159 +    grant_file_ops.read   = grant_pde->proc_fops->read;
   9.160 +    grant_file_ops.write  = grant_pde->proc_fops->write;
   9.161 +
   9.162 +    grant_pde->proc_fops  = &grant_file_ops;
   9.163 +
   9.164 +    grant_pde->read_proc  = &grant_read;
   9.165 +    grant_pde->write_proc = &grant_write;
   9.166 +
   9.167 +    return 0;
   9.168  }
   9.169 +
   9.170 +__initcall(gnttab_init);
    10.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c	Tue Mar 08 22:57:09 2005 +0000
    10.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c	Wed Mar 09 20:19:23 2005 +0000
    10.3 @@ -287,7 +287,7 @@ static int do_block_io_op(blkif_t *blkif
    10.4      rmb(); /* Ensure we see queued requests up to 'rp'. */
    10.5  
    10.6      for ( i = blk_ring->req_cons; 
    10.7 -         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(BLKIF_RING, blk_ring, i);
    10.8 +         (i != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, i);
    10.9            i++ )
   10.10      {
   10.11          if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   10.12 @@ -296,7 +296,7 @@ static int do_block_io_op(blkif_t *blkif
   10.13              break;
   10.14          }
   10.15          
   10.16 -        req = RING_GET_REQUEST(BLKIF_RING, blk_ring, i);
   10.17 +        req = RING_GET_REQUEST(blk_ring, i);
   10.18          switch ( req->operation )
   10.19          {
   10.20          case BLKIF_OP_READ:
   10.21 @@ -561,13 +561,13 @@ static void make_response(blkif_t *blkif
   10.22  
   10.23      /* Place on the response ring for the relevant domain. */ 
   10.24      spin_lock_irqsave(&blkif->blk_ring_lock, flags);
   10.25 -    resp = RING_GET_RESPONSE(BLKIF_RING, blk_ring, blk_ring->rsp_prod_pvt);
   10.26 +    resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
   10.27      resp->id        = id;
   10.28      resp->operation = op;
   10.29      resp->status    = st;
   10.30      wmb(); /* Ensure other side can see the response fields. */
   10.31      blk_ring->rsp_prod_pvt++;
   10.32 -    RING_PUSH_RESPONSES(BLKIF_RING, blk_ring);
   10.33 +    RING_PUSH_RESPONSES(blk_ring);
   10.34      spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
   10.35  
   10.36      /* Kick the relevant domain. */
    11.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c	Tue Mar 08 22:57:09 2005 +0000
    11.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c	Wed Mar 09 20:19:23 2005 +0000
    11.3 @@ -197,8 +197,8 @@ void blkif_connect(blkif_be_connect_t *c
    11.4          return;
    11.5      }
    11.6      sring = (blkif_sring_t *)vma->addr;
    11.7 -    SHARED_RING_INIT(BLKIF_RING, sring);
    11.8 -    BACK_RING_INIT(BLKIF_RING, &blkif->blk_ring, sring);
    11.9 +    SHARED_RING_INIT(sring);
   11.10 +    BACK_RING_INIT(&blkif->blk_ring, sring);
   11.11      
   11.12      blkif->evtchn        = evtchn;
   11.13      blkif->irq           = bind_evtchn_to_irq(evtchn);
    12.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c	Tue Mar 08 22:57:09 2005 +0000
    12.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c	Wed Mar 09 20:19:23 2005 +0000
    12.3 @@ -75,7 +75,7 @@ static blkif_response_t blkif_control_rs
    12.4  static blkif_front_ring_t blk_ring;
    12.5  
    12.6  unsigned long rec_ring_free;
    12.7 -blkif_request_t rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)];
    12.8 +blkif_request_t rec_ring[RING_SIZE(&blk_ring)];
    12.9  
   12.10  static int recovery = 0;           /* "Recovery in progress" flag.  Protected
   12.11                                      * by the blkif_io_lock */
   12.12 @@ -90,7 +90,7 @@ static inline int GET_ID_FROM_FREELIST( 
   12.13  {
   12.14      unsigned long free = rec_ring_free;
   12.15  
   12.16 -    if ( free > RING_SIZE(BLKIF_RING, &blk_ring) )
   12.17 +    if ( free > RING_SIZE(&blk_ring) )
   12.18          BUG();
   12.19  
   12.20      rec_ring_free = rec_ring[free].id;
   12.21 @@ -151,7 +151,7 @@ static inline void translate_req_to_mfn(
   12.22  static inline void flush_requests(void)
   12.23  {
   12.24      DISABLE_SCATTERGATHER();
   12.25 -    RING_PUSH_REQUESTS(BLKIF_RING, &blk_ring);
   12.26 +    RING_PUSH_REQUESTS(&blk_ring);
   12.27      notify_via_evtchn(blkif_evtchn);
   12.28  }
   12.29  
   12.30 @@ -331,7 +331,7 @@ static int blkif_queue_request(struct re
   12.31          return 1;
   12.32  
   12.33      /* Fill out a communications ring structure. */
   12.34 -    ring_req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
   12.35 +    ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
   12.36      id = GET_ID_FROM_FREELIST();
   12.37      rec_ring[id].id = (unsigned long) req;
   12.38  
   12.39 @@ -384,7 +384,7 @@ void do_blkif_request(request_queue_t *r
   12.40              continue;
   12.41          }
   12.42  
   12.43 -        if ( RING_FULL(BLKIF_RING, &blk_ring) )
   12.44 +        if ( RING_FULL(&blk_ring) )
   12.45          {
   12.46              blk_stop_queue(rq);
   12.47              break;
   12.48 @@ -429,7 +429,7 @@ static irqreturn_t blkif_int(int irq, vo
   12.49      {
   12.50          unsigned long id;
   12.51  
   12.52 -        bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
   12.53 +        bret = RING_GET_RESPONSE(&blk_ring, i);
   12.54          id = bret->id;
   12.55          req = (struct request *)rec_ring[id].id;
   12.56          blkif_completion( &rec_ring[id] );
   12.57 @@ -515,11 +515,11 @@ static void kick_pending_request_queues(
   12.58  {
   12.59      /* We kick pending request queues if the ring is reasonably empty. */
   12.60      if ( (nr_pending != 0) && 
   12.61 -         (RING_PENDING_REQUESTS(BLKIF_RING, &blk_ring) < 
   12.62 -          (RING_SIZE(BLKIF_RING, &blk_ring) >> 1)) )
   12.63 +         (RING_PENDING_REQUESTS(&blk_ring) < 
   12.64 +          (RING_SIZE(&blk_ring) >> 1)) )
   12.65      {
   12.66          /* Attempt to drain the queue, but bail if the ring becomes full. */
   12.67 -        while ( (nr_pending != 0) && !RING_FULL(BLKIF_RING, &blk_ring) )
   12.68 +        while ( (nr_pending != 0) && !RING_FULL(&blk_ring) )
   12.69              do_blkif_request(pending_queues[--nr_pending]);
   12.70      }
   12.71  }
   12.72 @@ -813,7 +813,7 @@ static int blkif_queue_request(unsigned 
   12.73               (sg_dev == device) &&
   12.74               (sg_next_sect == sector_number) )
   12.75          {
   12.76 -            req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, 
   12.77 +            req = RING_GET_REQUEST(&blk_ring, 
   12.78                                     blk_ring.req_prod_pvt - 1);
   12.79              bh = (struct buffer_head *)id;
   12.80       
   12.81 @@ -834,7 +834,7 @@ static int blkif_queue_request(unsigned 
   12.82  
   12.83              return 0;
   12.84          }
   12.85 -        else if ( RING_FULL(BLKIF_RING, &blk_ring) )
   12.86 +        else if ( RING_FULL(&blk_ring) )
   12.87          {
   12.88              return 1;
   12.89          }
   12.90 @@ -851,7 +851,7 @@ static int blkif_queue_request(unsigned 
   12.91      }
   12.92  
   12.93      /* Fill out a communications ring structure. */
   12.94 -    req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
   12.95 +    req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
   12.96  
   12.97      xid = GET_ID_FROM_FREELIST();
   12.98      rec_ring[xid].id = id;
   12.99 @@ -976,7 +976,7 @@ static void blkif_int(int irq, void *dev
  12.100          unsigned long id;
  12.101          blkif_response_t *bret;
  12.102          
  12.103 -        bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
  12.104 +        bret = RING_GET_RESPONSE(&blk_ring, i);
  12.105          id = bret->id;
  12.106          bh = (struct buffer_head *)rec_ring[id].id; 
  12.107  
  12.108 @@ -1026,21 +1026,21 @@ void blkif_control_send(blkif_request_t 
  12.109      blkif_request_t *req_d;
  12.110  
  12.111   retry:
  12.112 -    while ( RING_FULL(BLKIF_RING, &blk_ring) )
  12.113 +    while ( RING_FULL(&blk_ring) )
  12.114      {
  12.115          set_current_state(TASK_INTERRUPTIBLE);
  12.116          schedule_timeout(1);
  12.117      }
  12.118  
  12.119      spin_lock_irqsave(&blkif_io_lock, flags);
  12.120 -    if ( RING_FULL(BLKIF_RING, &blk_ring) )
  12.121 +    if ( RING_FULL(&blk_ring) )
  12.122      {
  12.123          spin_unlock_irqrestore(&blkif_io_lock, flags);
  12.124          goto retry;
  12.125      }
  12.126  
  12.127      DISABLE_SCATTERGATHER();
  12.128 -    req_d = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
  12.129 +    req_d = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt);
  12.130      *req_d = *req;    
  12.131  
  12.132      id = GET_ID_FROM_FREELIST();
  12.133 @@ -1130,8 +1130,8 @@ static void blkif_disconnect(void)
  12.134          free_page((unsigned long)blk_ring.sring);
  12.135      
  12.136      sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
  12.137 -    SHARED_RING_INIT(BLKIF_RING, sring);
  12.138 -    FRONT_RING_INIT(BLKIF_RING, &blk_ring, sring);
  12.139 +    SHARED_RING_INIT(sring);
  12.140 +    FRONT_RING_INIT(&blk_ring, sring);
  12.141      blkif_state  = BLKIF_STATE_DISCONNECTED;
  12.142      blkif_send_interface_connect();
  12.143  }
  12.144 @@ -1151,11 +1151,11 @@ static void blkif_recover(void)
  12.145       * This will need to be fixed once we have barriers */
  12.146  
  12.147      /* Stage 1 : Find active and move to safety. */
  12.148 -    for ( i = 0; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
  12.149 +    for ( i = 0; i < RING_SIZE(&blk_ring); i++ )
  12.150      {
  12.151          if ( rec_ring[i].id >= PAGE_OFFSET )
  12.152          {
  12.153 -            req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, 
  12.154 +            req = RING_GET_REQUEST(&blk_ring, 
  12.155                                     blk_ring.req_prod_pvt);
  12.156              translate_req_to_mfn(req, &rec_ring[i]);
  12.157              blk_ring.req_prod_pvt++;
  12.158 @@ -1165,17 +1165,17 @@ static void blkif_recover(void)
  12.159      /* Stage 2 : Set up shadow list. */
  12.160      for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) 
  12.161      {
  12.162 -        req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, i);
  12.163 +        req = RING_GET_REQUEST(&blk_ring, i);
  12.164          rec_ring[i].id = req->id;  
  12.165          req->id = i;
  12.166          translate_req_to_pfn(&rec_ring[i], req);
  12.167      }
  12.168  
  12.169      /* Stage 3 : Set up free list. */
  12.170 -    for ( ; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
  12.171 +    for ( ; i < RING_SIZE(&blk_ring); i++ )
  12.172          rec_ring[i].id = i+1;
  12.173      rec_ring_free = blk_ring.req_prod_pvt;
  12.174 -    rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)-1].id = 0x0fffffff;
  12.175 +    rec_ring[RING_SIZE(&blk_ring)-1].id = 0x0fffffff;
  12.176  
  12.177      /* blk_ring->req_prod will be set when we flush_requests().*/
  12.178      wmb();
  12.179 @@ -1368,9 +1368,9 @@ int __init xlblk_init(void)
  12.180      printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
  12.181  
  12.182      rec_ring_free = 0;
  12.183 -    for ( i = 0; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
  12.184 +    for ( i = 0; i < RING_SIZE(&blk_ring); i++ )
  12.185          rec_ring[i].id = i+1;
  12.186 -    rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)-1].id = 0x0fffffff;
  12.187 +    rec_ring[RING_SIZE(&blk_ring)-1].id = 0x0fffffff;
  12.188  
  12.189      (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
  12.190                                      CALLBACK_IN_BLOCKING_CONTEXT);
    13.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c	Tue Mar 08 22:57:09 2005 +0000
    13.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c	Wed Mar 09 20:19:23 2005 +0000
    13.3 @@ -228,8 +228,8 @@ void blkif_ptfe_connect(blkif_be_connect
    13.4      }
    13.5  
    13.6      sring = (blkif_sring_t *)vma->addr;
    13.7 -    SHARED_RING_INIT(BLKIF_RING, sring);
    13.8 -    BACK_RING_INIT(BLKIF_RING, &blkif->blk_ring, sring);
    13.9 +    SHARED_RING_INIT(sring);
   13.10 +    BACK_RING_INIT(&blkif->blk_ring, sring);
   13.11      
   13.12      blkif->evtchn        = evtchn;
   13.13      blkif->irq           = bind_evtchn_to_irq(evtchn);
   13.14 @@ -301,8 +301,8 @@ static void blkif_ptbe_disconnect(void)
   13.15      blkif_sring_t *sring;
   13.16      
   13.17      sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
   13.18 -    SHARED_RING_INIT(BLKIF_RING, sring);
   13.19 -    FRONT_RING_INIT(BLKIF_RING, &blktap_be_ring, sring);
   13.20 +    SHARED_RING_INIT(sring);
   13.21 +    FRONT_RING_INIT(&blktap_be_ring, sring);
   13.22      blktap_be_state  = BLKIF_STATE_DISCONNECTED;
   13.23      DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n");
   13.24      blkif_ptbe_send_interface_connect();
    14.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Tue Mar 08 22:57:09 2005 +0000
    14.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c	Wed Mar 09 20:19:23 2005 +0000
    14.3 @@ -97,7 +97,7 @@ inline int write_resp_to_fe_ring(blkif_t
    14.4      ar = &active_reqs[ID_TO_IDX(rsp->id)];
    14.5      rsp->id = ar->id;
    14.6              
    14.7 -    resp_d = RING_GET_RESPONSE(BLKIF_RING, &blkif->blk_ring,
    14.8 +    resp_d = RING_GET_RESPONSE(&blkif->blk_ring,
    14.9              blkif->blk_ring.rsp_prod_pvt);
   14.10      memcpy(resp_d, rsp, sizeof(blkif_response_t));
   14.11      wmb();
   14.12 @@ -118,7 +118,7 @@ inline int write_req_to_be_ring(blkif_re
   14.13          return 0;
   14.14      }
   14.15      
   14.16 -    req_d = RING_GET_REQUEST(BLKIF_RING, &blktap_be_ring,
   14.17 +    req_d = RING_GET_REQUEST(&blktap_be_ring,
   14.18              blktap_be_ring.req_prod_pvt);
   14.19      memcpy(req_d, req, sizeof(blkif_request_t));
   14.20      wmb();
   14.21 @@ -129,7 +129,7 @@ inline int write_req_to_be_ring(blkif_re
   14.22  
   14.23  inline void kick_fe_domain(blkif_t *blkif) 
   14.24  {
   14.25 -    RING_PUSH_RESPONSES(BLKIF_RING, &blkif->blk_ring);
   14.26 +    RING_PUSH_RESPONSES(&blkif->blk_ring);
   14.27      notify_via_evtchn(blkif->evtchn);
   14.28      DPRINTK("notified FE(dom %u)\n", blkif->domid);
   14.29      
   14.30 @@ -141,7 +141,7 @@ inline void kick_be_domain(void)
   14.31          return;
   14.32      
   14.33      wmb(); /* Ensure that the frontend can see the requests. */
   14.34 -    RING_PUSH_REQUESTS(BLKIF_RING, &blktap_be_ring);
   14.35 +    RING_PUSH_REQUESTS(&blktap_be_ring);
   14.36      notify_via_evtchn(blktap_be_evtchn);
   14.37      DPRINTK("notified BE\n");
   14.38  }
   14.39 @@ -299,7 +299,7 @@ static int do_block_io_op(blkif_t *blkif
   14.40      
   14.41      for ( i = blkif->blk_ring.req_cons; 
   14.42           (i != rp) && 
   14.43 -            !RING_REQUEST_CONS_OVERFLOW(BLKIF_RING, &blkif->blk_ring, i);
   14.44 +            !RING_REQUEST_CONS_OVERFLOW(&blkif->blk_ring, i);
   14.45            i++ )
   14.46      {
   14.47          
   14.48 @@ -309,12 +309,13 @@ static int do_block_io_op(blkif_t *blkif
   14.49              break;
   14.50          }
   14.51          
   14.52 -        req_s = RING_GET_REQUEST(BLKIF_RING, &blkif->blk_ring, i);
   14.53 +        req_s = RING_GET_REQUEST(&blkif->blk_ring, i);
   14.54          /* This is a new request:  
   14.55           * Assign an active request record, and remap the id. 
   14.56           */
   14.57          ar = get_active_req();
   14.58          ar->id = req_s->id;
   14.59 +        ar->nr_pages = req_s->nr_segments; 
   14.60          blkif_get(blkif);
   14.61          ar->blkif = blkif;
   14.62          req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar));
   14.63 @@ -354,10 +355,9 @@ static int do_block_io_op(blkif_t *blkif
   14.64              /* copy the request message to the BERing */
   14.65  
   14.66              DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", 
   14.67 -                    (unsigned)__SHARED_RING_MASK(BLKIF_RING, 
   14.68 -                        blktap_be_ring.sring, i), 
   14.69 -                    (unsigned)__SHARED_RING_MASK(BLKIF_RING, 
   14.70 -                        blktap_be_ring.sring, blktap_be_ring.req_prod_pvt));
   14.71 +                    (unsigned)i & (RING_SIZE(&blktap_be_ring)-1),
   14.72 +                    (unsigned)blktap_be_ring.req_prod_pvt & 
   14.73 +                    (RING_SIZE((&blktap_be_ring)-1)));
   14.74              
   14.75              write_req_to_be_ring(req_s);
   14.76              notify_be = 1;
   14.77 @@ -398,7 +398,7 @@ irqreturn_t blkif_ptbe_int(int irq, void
   14.78        
   14.79      for ( i = blktap_be_ring.rsp_cons; i != rp; i++)
   14.80      {
   14.81 -        resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_be_ring, i);
   14.82 +        resp_s = RING_GET_RESPONSE(&blktap_be_ring, i);
   14.83          
   14.84          /* BE -> FE interposition point is here. */
   14.85      
   14.86 @@ -426,11 +426,9 @@ irqreturn_t blkif_ptbe_int(int irq, void
   14.87              /* Copy the response message to FERing */
   14.88           
   14.89              DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", 
   14.90 -                    (unsigned)__SHARED_RING_MASK(BLKIF_RING, 
   14.91 -                        blkif->blk_ring.sring, i), 
   14.92 -                    (unsigned)__SHARED_RING_MASK(BLKIF_RING, 
   14.93 -                        blkif->blk_ring.sring, 
   14.94 -                        blkif->blk_ring.rsp_prod_pvt));
   14.95 +                    (unsigned)i & (RING_SIZE(&blkif->blk_ring)-1),
   14.96 +                    (unsigned)blkif->blk_ring.rsp_prod_pvt & 
   14.97 +                    (RING_SIZE((&blkif->blk_ring)-1)));
   14.98  
   14.99              write_resp_to_fe_ring(blkif, resp_s);
  14.100              kick_fe_domain(blkif);
    15.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Tue Mar 08 22:57:09 2005 +0000
    15.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c	Wed Mar 09 20:19:23 2005 +0000
    15.3 @@ -86,8 +86,8 @@ static int blktap_open(struct inode *ino
    15.4  
    15.5      SetPageReserved(virt_to_page(csring));
    15.6      
    15.7 -    SHARED_RING_INIT(CTRL_RING, csring);
    15.8 -    FRONT_RING_INIT(CTRL_RING, &blktap_uctrl_ring, csring);
    15.9 +    SHARED_RING_INIT(csring);
   15.10 +    FRONT_RING_INIT(&blktap_uctrl_ring, csring);
   15.11  
   15.12  
   15.13      /* Allocate the fe ring. */
   15.14 @@ -97,8 +97,8 @@ static int blktap_open(struct inode *ino
   15.15  
   15.16      SetPageReserved(virt_to_page(sring));
   15.17      
   15.18 -    SHARED_RING_INIT(BLKIF_RING, sring);
   15.19 -    FRONT_RING_INIT(BLKIF_RING, &blktap_ufe_ring, sring);
   15.20 +    SHARED_RING_INIT(sring);
   15.21 +    FRONT_RING_INIT(&blktap_ufe_ring, sring);
   15.22  
   15.23      /* Allocate the be ring. */
   15.24      sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
   15.25 @@ -107,8 +107,8 @@ static int blktap_open(struct inode *ino
   15.26  
   15.27      SetPageReserved(virt_to_page(sring));
   15.28      
   15.29 -    SHARED_RING_INIT(BLKIF_RING, sring);
   15.30 -    BACK_RING_INIT(BLKIF_RING, &blktap_ube_ring, sring);
   15.31 +    SHARED_RING_INIT(sring);
   15.32 +    BACK_RING_INIT(&blktap_ube_ring, sring);
   15.33  
   15.34      DPRINTK(KERN_ALERT "blktap open.\n");
   15.35  
   15.36 @@ -252,13 +252,13 @@ static unsigned int blktap_poll(struct f
   15.37  {
   15.38          poll_wait(file, &blktap_wait, wait);
   15.39  
   15.40 -        if ( RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_uctrl_ring) ||
   15.41 -             RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_ufe_ring)   ||
   15.42 -             RING_HAS_UNPUSHED_RESPONSES(BLKIF_RING, &blktap_ube_ring) ) {
   15.43 +        if ( RING_HAS_UNPUSHED_REQUESTS(&blktap_uctrl_ring) ||
   15.44 +             RING_HAS_UNPUSHED_REQUESTS(&blktap_ufe_ring)   ||
   15.45 +             RING_HAS_UNPUSHED_RESPONSES(&blktap_ube_ring) ) {
   15.46  
   15.47 -            RING_PUSH_REQUESTS(BLKIF_RING, &blktap_uctrl_ring);
   15.48 -            RING_PUSH_REQUESTS(BLKIF_RING, &blktap_ufe_ring);
   15.49 -            RING_PUSH_RESPONSES(BLKIF_RING, &blktap_ube_ring);
   15.50 +            RING_PUSH_REQUESTS(&blktap_uctrl_ring);
   15.51 +            RING_PUSH_REQUESTS(&blktap_ufe_ring);
   15.52 +            RING_PUSH_RESPONSES(&blktap_ube_ring);
   15.53              return POLLIN | POLLRDNORM;
   15.54          }
   15.55  
   15.56 @@ -298,12 +298,12 @@ int blktap_write_fe_ring(blkif_request_t
   15.57          return 0;
   15.58      }
   15.59  
   15.60 -    if ( RING_FULL(BLKIF_RING, &blktap_ufe_ring) ) {
   15.61 +    if ( RING_FULL(&blktap_ufe_ring) ) {
   15.62          DPRINTK("blktap: fe_ring is full, can't add.\n");
   15.63          return 0;
   15.64      }
   15.65  
   15.66 -    target = RING_GET_REQUEST(BLKIF_RING, &blktap_ufe_ring,
   15.67 +    target = RING_GET_REQUEST(&blktap_ufe_ring,
   15.68              blktap_ufe_ring.req_prod_pvt);
   15.69      memcpy(target, req, sizeof(*req));
   15.70  
   15.71 @@ -344,7 +344,7 @@ int blktap_write_be_ring(blkif_response_
   15.72  
   15.73      /* No test for fullness in the response direction. */
   15.74  
   15.75 -    target = RING_GET_RESPONSE(BLKIF_RING, &blktap_ube_ring,
   15.76 +    target = RING_GET_RESPONSE(&blktap_ube_ring,
   15.77              blktap_ube_ring.rsp_prod_pvt);
   15.78      memcpy(target, rsp, sizeof(*rsp));
   15.79  
   15.80 @@ -355,6 +355,24 @@ int blktap_write_be_ring(blkif_response_
   15.81      return 0;
   15.82  }
   15.83  
   15.84 +static void blktap_fast_flush_area(int idx, int nr_pages)
   15.85 +{
   15.86 +    multicall_entry_t mcl[MMAP_PAGES_PER_REQUEST];
   15.87 +    int               i;
   15.88 +
   15.89 +    for ( i = 0; i < nr_pages; i++ )
   15.90 +    {
   15.91 +        mcl[i].op = __HYPERVISOR_update_va_mapping;
   15.92 +        mcl[i].args[0] = MMAP_VADDR(idx, i);
   15.93 +        mcl[i].args[1] = 0;
   15.94 +        mcl[i].args[2] = 0;
   15.95 +    }
   15.96 +
   15.97 +    mcl[nr_pages-1].args[2] = UVMF_FLUSH_TLB;
   15.98 +    if ( unlikely(HYPERVISOR_multicall(mcl, nr_pages) != 0) )
   15.99 +        BUG();
  15.100 +}
  15.101 +
  15.102  static int blktap_read_fe_ring(void)
  15.103  {
  15.104      /* This is called to read responses from the UFE ring. */
  15.105 @@ -375,11 +393,12 @@ static int blktap_read_fe_ring(void)
  15.106          
  15.107          for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ )
  15.108          {
  15.109 -            resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_ufe_ring, i);
  15.110 +            resp_s = RING_GET_RESPONSE(&blktap_ufe_ring, i);
  15.111              
  15.112              DPRINTK("resp->fe_ring\n");
  15.113              ar = lookup_active_req(ID_TO_IDX(resp_s->id));
  15.114              blkif = ar->blkif;
  15.115 +            blktap_fast_flush_area(ID_TO_IDX(resp_s->id), ar->nr_pages);
  15.116              write_resp_to_fe_ring(blkif, resp_s);
  15.117              kick_fe_domain(blkif);
  15.118          }
  15.119 @@ -406,7 +425,7 @@ static int blktap_read_be_ring(void)
  15.120          rmb();
  15.121          for ( i = blktap_ube_ring.req_cons; i != rp; i++ )
  15.122          {
  15.123 -            req_s = RING_GET_REQUEST(BLKIF_RING, &blktap_ube_ring, i);
  15.124 +            req_s = RING_GET_REQUEST(&blktap_ube_ring, i);
  15.125  
  15.126              DPRINTK("req->be_ring\n");
  15.127              write_req_to_be_ring(req_s);
  15.128 @@ -430,7 +449,7 @@ int blktap_write_ctrl_ring(ctrl_msg_t *m
  15.129  
  15.130      /* No test for fullness in the response direction. */
  15.131  
  15.132 -    target = RING_GET_REQUEST(CTRL_RING, &blktap_uctrl_ring,
  15.133 +    target = RING_GET_REQUEST(&blktap_uctrl_ring,
  15.134              blktap_uctrl_ring.req_prod_pvt);
  15.135      memcpy(target, msg, sizeof(*msg));
  15.136  
    16.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c	Tue Mar 08 22:57:09 2005 +0000
    16.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c	Wed Mar 09 20:19:23 2005 +0000
    16.3 @@ -177,7 +177,7 @@ static int privcmd_ioctl(struct inode *i
    16.4          unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
    16.5          pgd_t *pgd = pgd_offset_k(m2pv);
    16.6          pmd_t *pmd = pmd_offset(pgd, m2pv);
    16.7 -        unsigned long m2p_start_mfn = pmd_val(*pmd) >> PAGE_SHIFT;
    16.8 +        unsigned long m2p_start_mfn = pmd_val_ma(*pmd) >> PAGE_SHIFT;
    16.9          ret = put_user(m2p_start_mfn, (unsigned long *)data) ? -EFAULT: 0;
   16.10      }
   16.11      break;
    17.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c	Tue Mar 08 22:57:09 2005 +0000
    17.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/usbback/interface.c	Wed Mar 09 20:19:23 2005 +0000
    17.3 @@ -194,8 +194,8 @@ void usbif_connect(usbif_be_connect_t *c
    17.4      }
    17.5  
    17.6      sring = (usbif_sring_t *)vma->addr;
    17.7 -    SHARED_RING_INIT(USBIF_RING, sring);
    17.8 -    BACK_RING_INIT(USBIF_RING, &up->usb_ring, sring);
    17.9 +    SHARED_RING_INIT(sring);
   17.10 +    BACK_RING_INIT(&up->usb_ring, sring);
   17.11  
   17.12      up->evtchn        = evtchn;
   17.13      up->irq           = bind_evtchn_to_irq(evtchn);
    18.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c	Tue Mar 08 22:57:09 2005 +0000
    18.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/usbback/usbback.c	Wed Mar 09 20:19:23 2005 +0000
    18.3 @@ -398,7 +398,7 @@ static int do_usb_io_op(usbif_priv_t *up
    18.4      
    18.5      /* Take items off the comms ring, taking care not to overflow. */
    18.6      for ( i = usb_ring->req_cons; 
    18.7 -          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(USBIF_RING, usb_ring, i);
    18.8 +          (i != rp) && !RING_REQUEST_CONS_OVERFLOW(usb_ring, i);
    18.9            i++ )
   18.10      {
   18.11          if ( (max_to_do-- == 0) || (NR_PENDING_REQS == MAX_PENDING_REQS) )
   18.12 @@ -407,7 +407,7 @@ static int do_usb_io_op(usbif_priv_t *up
   18.13              break;
   18.14          }
   18.15  
   18.16 -        req = RING_GET_REQUEST(USBIF_RING, usb_ring, i);
   18.17 +        req = RING_GET_REQUEST(usb_ring, i);
   18.18          
   18.19          switch ( req->operation )
   18.20          {
   18.21 @@ -808,7 +808,7 @@ static void make_response(usbif_priv_t *
   18.22  
   18.23      /* Place on the response ring for the relevant domain. */ 
   18.24      spin_lock_irqsave(&up->usb_ring_lock, flags);
   18.25 -    resp = RING_GET_RESPONSE(USBIF_RING, usb_ring, usb_ring->rsp_prod_pvt);
   18.26 +    resp = RING_GET_RESPONSE(usb_ring, usb_ring->rsp_prod_pvt);
   18.27      resp->id        = id;
   18.28      resp->operation = op;
   18.29      resp->status    = st;
   18.30 @@ -819,7 +819,7 @@ static void make_response(usbif_priv_t *
   18.31      dump_response(resp);
   18.32  
   18.33      usb_ring->rsp_prod_pvt++;
   18.34 -    RING_PUSH_RESPONSES(USBIF_RING, usb_ring);
   18.35 +    RING_PUSH_RESPONSES(usb_ring);
   18.36      spin_unlock_irqrestore(&up->usb_ring_lock, flags);
   18.37  
   18.38      /* Kick the relevant domain. */
    19.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c	Tue Mar 08 22:57:09 2005 +0000
    19.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/usbfront/usbfront.c	Wed Mar 09 20:19:23 2005 +0000
    19.3 @@ -214,7 +214,7 @@ static int xhci_queue_req(struct urb *ur
    19.4  #endif
    19.5          
    19.6  
    19.7 -        if ( RING_FULL(USBIF_RING, usb_ring) )
    19.8 +        if ( RING_FULL(usb_ring) )
    19.9          {
   19.10                  printk(KERN_WARNING
   19.11                         "xhci_queue_req(): USB ring full, not queuing request\n");
   19.12 @@ -222,7 +222,7 @@ static int xhci_queue_req(struct urb *ur
   19.13          }
   19.14  
   19.15          /* Stick something in the shared communications ring. */
   19.16 -	req = RING_GET_REQUEST(USBIF_RING, usb_ring, usb_ring->req_prod_pvt);
   19.17 +	req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
   19.18  
   19.19          req->operation       = USBIF_OP_IO;
   19.20          req->port            = 0; /* We don't care what the port is. */
   19.21 @@ -251,7 +251,7 @@ static int xhci_queue_req(struct urb *ur
   19.22                  memset(req->setup, 0, 8);
   19.23          
   19.24          usb_ring->req_prod_pvt++;
   19.25 -        RING_PUSH_REQUESTS(USBIF_RING, usb_ring);
   19.26 +        RING_PUSH_REQUESTS(usb_ring);
   19.27  
   19.28  	notify_via_evtchn(xhci->evtchn);
   19.29  
   19.30 @@ -277,7 +277,7 @@ static inline usbif_request_t *xhci_queu
   19.31  	       usbif->resp_prod, xhci->usb_resp_cons);
   19.32  #endif
   19.33          
   19.34 -        if ( RING_FULL(USBIF_RING, usb_ring) )
   19.35 +        if ( RING_FULL(usb_ring) )
   19.36          {
   19.37                  printk(KERN_WARNING
   19.38                         "xhci_queue_probe(): ring full, not queuing request\n");
   19.39 @@ -285,7 +285,7 @@ static inline usbif_request_t *xhci_queu
   19.40          }
   19.41  
   19.42          /* Stick something in the shared communications ring. */
   19.43 -        req = RING_GET_REQUEST(USBIF_RING, usb_ring, usb_ring->req_prod_pvt);
   19.44 +        req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
   19.45  
   19.46          memset(req, sizeof(*req), 0);
   19.47  
   19.48 @@ -293,7 +293,7 @@ static inline usbif_request_t *xhci_queu
   19.49          req->port            = port;
   19.50  
   19.51          usb_ring->req_prod_pvt++;
   19.52 -        RING_PUSH_REQUESTS(USBIF_RING, usb_ring);
   19.53 +        RING_PUSH_REQUESTS(usb_ring);
   19.54  
   19.55  	notify_via_evtchn(xhci->evtchn);
   19.56  
   19.57 @@ -313,7 +313,7 @@ static int xhci_port_reset(usbif_vdev_t 
   19.58          xhci->awaiting_reset = 1;
   19.59          
   19.60          /* Stick something in the shared communications ring. */
   19.61 -	req = RING_GET_REQUEST(USBIF_RING, usb_ring, usb_ring->req_prod_pvt);
   19.62 +	req = RING_GET_REQUEST(usb_ring, usb_ring->req_prod_pvt);
   19.63  
   19.64          memset(req, sizeof(*req), 0);
   19.65  
   19.66 @@ -321,7 +321,7 @@ static int xhci_port_reset(usbif_vdev_t 
   19.67          req->port            = port;
   19.68          
   19.69          usb_ring->req_prod_pvt++;
   19.70 -	RING_PUSH_REQUESTS(USBIF_RING, usb_ring);
   19.71 +	RING_PUSH_REQUESTS(usb_ring);
   19.72  
   19.73  	notify_via_evtchn(xhci->evtchn);
   19.74  
   19.75 @@ -427,7 +427,7 @@ static void xhci_drain_ring(void)
   19.76          /* Take items off the comms ring, taking care not to overflow. */
   19.77          for ( i = usb_ring->rsp_cons; i != rp; i++ )
   19.78          {
   19.79 -            resp = RING_GET_RESPONSE(USBIF_RING, usb_ring, i);
   19.80 +            resp = RING_GET_RESPONSE(usb_ring, i);
   19.81              
   19.82              /* May need to deal with batching and with putting a ceiling on
   19.83                 the number dispatched for performance and anti-dos reasons */
   19.84 @@ -1497,8 +1497,8 @@ static void usbif_status_change(usbif_fe
   19.85  
   19.86          /* Move from CLOSED to DISCONNECTED state. */
   19.87          sring = (usbif_sring_t *)__get_free_page(GFP_KERNEL);
   19.88 -        SHARED_RING_INIT(USBIF_RING, sring);
   19.89 -        FRONT_RING_INIT(USBIF_RING, &xhci->usb_ring, sring);
   19.90 +        SHARED_RING_INIT(sring);
   19.91 +        FRONT_RING_INIT(&xhci->usb_ring, sring);
   19.92          xhci->state  = USBIF_STATE_DISCONNECTED;
   19.93  
   19.94          /* Construct an interface-CONNECT message for the domain controller. */
    20.1 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h	Tue Mar 08 22:57:09 2005 +0000
    20.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h	Wed Mar 09 20:19:23 2005 +0000
    20.3 @@ -110,6 +110,7 @@ static inline unsigned long pmd_val(pmd_
    20.4  	if (ret) ret = machine_to_phys(ret);
    20.5  	return ret;
    20.6  }
    20.7 +#define pmd_val_ma(x)   ((x).pmd)
    20.8  #define pgd_val(x)	({ BUG(); (unsigned long)0; })
    20.9  #define pgprot_val(x)	((x).pgprot)
   20.10  
    21.1 --- a/linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h	Tue Mar 08 22:57:09 2005 +0000
    21.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h	Wed Mar 09 20:19:23 2005 +0000
    21.3 @@ -32,4 +32,8 @@ unsigned long
    21.4  gnttab_end_foreign_transfer(
    21.5      grant_ref_t ref);
    21.6  
    21.7 +int
    21.8 +gnttab_query_foreign_access( 
    21.9 +    grant_ref_t ref );
   21.10 +
   21.11  #endif /* __ASM_GNTTAB_H__ */
    22.1 --- a/linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h	Tue Mar 08 22:57:09 2005 +0000
    22.2 +++ b/linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h	Wed Mar 09 20:19:23 2005 +0000
    22.3 @@ -536,7 +536,7 @@ HYPERVISOR_grant_table_op(
    22.4      __asm__ __volatile__ (
    22.5          TRAP_INSTR
    22.6          : "=a" (ret), "=b" (ign1), "=c" (ign2), "=d" (ign3)
    22.7 -	: "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (count), "3" (uop)
    22.8 +	: "0" (__HYPERVISOR_grant_table_op), "1" (cmd), "2" (uop), "3" (count)
    22.9  	: "memory" );
   22.10  
   22.11      return ret;
    23.1 --- a/netbsd-2.0-xen-sparse/Makefile	Tue Mar 08 22:57:09 2005 +0000
    23.2 +++ b/netbsd-2.0-xen-sparse/Makefile	Wed Mar 09 20:19:23 2005 +0000
    23.3 @@ -7,18 +7,19 @@
    23.4  TOPDIR		?= $(shell pwd)
    23.5  NETBSD_RELEASE	?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR)))
    23.6  NETBSD_VER	?= $(patsubst netbsd-%-xen%,%,$(notdir $(TOPDIR)))
    23.7 +NETBSD_KERNEL	?= XEN
    23.8  
    23.9  clean:
   23.10 -	@mkdir -p compile/XEN
   23.11 -	cd compile/XEN && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen cleandir
   23.12 +	@mkdir -p compile/$(NETBSD_KERNEL)
   23.13 +	cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen cleandir
   23.14  
   23.15  config:
   23.16 -	@mkdir -p compile/XEN
   23.17 -	cd compile/XEN && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbconfig-xen XEN
   23.18 +	@mkdir -p compile/$(NETBSD_KERNEL)
   23.19 +	cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbconfig-xen $(NETBSD_KERNEL)
   23.20  
   23.21  netbsd:
   23.22 -	cd compile/XEN && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen dependall
   23.23 +	cd compile/$(NETBSD_KERNEL) && TOPDIR=$(TOPDIR) NETBSD_VER=$(NETBSD_VER) ../../nbmake-xen dependall
   23.24  
   23.25  install:
   23.26  	@mkdir -p $(dir $(INSTALL_PATH)/$(INSTALL_NAME))
   23.27 -	install -c compile/XEN/netbsd $(INSTALL_PATH)/$(INSTALL_NAME)
   23.28 +	install -c compile/$(NETBSD_KERNEL)/netbsd $(INSTALL_PATH)/$(INSTALL_NAME)
    24.1 --- a/netbsd-2.0-xen-sparse/mkbuildtree	Tue Mar 08 22:57:09 2005 +0000
    24.2 +++ b/netbsd-2.0-xen-sparse/mkbuildtree	Wed Mar 09 20:19:23 2005 +0000
    24.3 @@ -85,23 +85,37 @@ popd >/dev/null
    24.4  # Get absolute path to the source directory
    24.5  AS=`pwd`
    24.6  
    24.7 +# Get name of sparse directory
    24.8 +SDN=$(basename $AS)
    24.9 +
   24.10  # Get path to source, relative to destination
   24.11  abs_to_rel ${AD} ${AS}
   24.12  RS=$DESTPATH
   24.13  
   24.14  # Remove old copies of files and directories at the destination
   24.15 -for i in `find . -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
   24.16 +for i in `find sys -type f -o -type l` ; do rm -f ${AD}/${i#./} ; done
   24.17  
   24.18  # We now work from the destination directory
   24.19  cd ${AD}
   24.20  
   24.21  # Remove old symlinks
   24.22 -for i in `find . -type l`; do rm -f $i; done
   24.23 +find sys -type l | while read f
   24.24 +do
   24.25 +  case $(readlink $f) in
   24.26 +  */$SDN/*)
   24.27 +    rm -f $f
   24.28 +    ;;
   24.29 +  esac
   24.30 +done
   24.31  
   24.32 -# Create symlinks of files and directories which exist in the sparse source
   24.33 -relative_lndir ${RS}
   24.34 -rm -f mkbuildtree
   24.35 -
   24.36 +if [ -f ${AD}/BUILDING ]; then
   24.37 +  # Create symlinks of files and directories which exist in the sparse source
   24.38 +  (cd sys && relative_lndir ../${RS}/sys)
   24.39 +else
   24.40 +  # Create symlinks of files and directories which exist in the sparse source
   24.41 +  relative_lndir ${RS}
   24.42 +  rm -f mkbuildtree
   24.43 +fi
   24.44  
   24.45  # Create links to the shared definitions of the Xen interface
   24.46  rm -rf ${AD}/sys/arch/xen/include/xen-public
    25.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN	Tue Mar 08 22:57:09 2005 +0000
    25.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN	Wed Mar 09 20:19:23 2005 +0000
    25.3 @@ -153,7 +153,7 @@ include	"arch/xen/conf/GENERIC.local"
    25.4  pseudo-device	ccd		4	# concatenated/striped disk devices
    25.5  #pseudo-device	cgd		4	# cryptographic disk devices
    25.6  #pseudo-device	md		1	# memory disk device (ramdisk)
    25.7 -#pseudo-device	vnd		4	# disk-like interface to files
    25.8 +pseudo-device	vnd		4	# disk-like interface to files
    25.9  
   25.10  pseudo-device	bpfilter	8	# Berkeley packet filter
   25.11  pseudo-device	ipfilter		# IP filter (firewall) and NAT
    26.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Tue Mar 08 22:57:09 2005 +0000
    26.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Wed Mar 09 20:19:23 2005 +0000
    26.3 @@ -249,6 +249,7 @@
    26.4  
    26.5  #include <machine/xen.h>
    26.6  #define __HYPERVISOR_yield		   8
    26.7 +#define __SCHEDOP_yield			   0
    26.8  
    26.9  	.space 512
   26.10  tmpstk:
   26.11 @@ -1592,6 +1593,7 @@ idle_loop:
   26.12  	jmp	idle_start
   26.13  4:
   26.14  	movl	$__HYPERVISOR_yield,%eax
   26.15 +	movl	$__SCHEDOP_yield,%ebx
   26.16  	TRAP_INSTR
   26.17  NENTRY(mpidle)
   26.18  idle_start:	
    27.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c	Tue Mar 08 22:57:09 2005 +0000
    27.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/machdep.c	Wed Mar 09 20:19:23 2005 +0000
    27.3 @@ -854,6 +854,10 @@ haltsys:
    27.4  	if (howto & RB_HALT) {
    27.5  		printf("\n");
    27.6  		printf("The operating system has halted.\n");
    27.7 +
    27.8 +		/* XXX cngetc() below doesn't work, shutdown machine for now */
    27.9 +		HYPERVISOR_shutdown();
   27.10 +
   27.11  		printf("Please press any key to reboot.\n\n");
   27.12  
   27.13  #ifdef BEEP_ONHALT
    28.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c	Tue Mar 08 22:57:09 2005 +0000
    28.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/hypervisor.c	Wed Mar 09 20:19:23 2005 +0000
    28.3 @@ -52,7 +52,6 @@
    28.4  #include <machine/hypervisor.h>
    28.5  #include <machine/evtchn.h>
    28.6  
    28.7 -#ifdef DOM0OPS
    28.8  #include <sys/dirent.h>
    28.9  #include <sys/stat.h>
   28.10  #include <sys/tree.h>
   28.11 @@ -60,7 +59,6 @@
   28.12  #include <miscfs/specfs/specdev.h>
   28.13  #include <miscfs/kernfs/kernfs.h>
   28.14  #include <machine/kernfs_machdep.h>
   28.15 -#endif
   28.16  
   28.17  #if NXENNET > 0
   28.18  #include <net/if.h>
   28.19 @@ -187,9 +185,9 @@ hypervisor_attach(parent, self, aux)
   28.20  	hac.hac_xennpx.xa_device = "npx";
   28.21  	config_found(self, &hac.hac_xennpx, hypervisor_print);
   28.22  #endif
   28.23 +	xenkernfs_init();
   28.24  #ifdef DOM0OPS
   28.25  	if (xen_start_info.flags & SIF_PRIVILEGED) {
   28.26 -		xenkernfs_init();
   28.27  		xenprivcmd_init();
   28.28  		xenmachmem_init();
   28.29  		xenvfr_init();
   28.30 @@ -222,8 +220,7 @@ hypervisor_notify_via_evtchn(unsigned in
   28.31  	(void)HYPERVISOR_event_channel_op(&op);
   28.32  }
   28.33  
   28.34 -#ifdef DOM0OPS
   28.35 -
   28.36 +#define	READ_MODE	(S_IRUSR|S_IRGRP|S_IROTH)
   28.37  #define DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
   28.38  
   28.39  kernfs_parentdir_t *kernxen_pkt;
   28.40 @@ -237,5 +234,9 @@ xenkernfs_init()
   28.41  	KERNFS_INITENTRY(dkt, DT_DIR, "xen", NULL, KFSsubdir, VDIR, DIR_MODE);
   28.42  	kernfs_addentry(NULL, dkt);
   28.43  	kernxen_pkt = KERNFS_ENTOPARENTDIR(dkt);
   28.44 +
   28.45 +	KERNFS_ALLOCENTRY(dkt, M_TEMP, M_WAITOK);
   28.46 +	KERNFS_INITENTRY(dkt, DT_REG, "cmdline", xen_start_info.cmd_line,
   28.47 +	    KFSstring, VREG, READ_MODE);
   28.48 +	kernfs_addentry(kernxen_pkt, dkt);
   28.49  }
   28.50 -#endif
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs.h	Wed Mar 09 20:19:23 2005 +0000
    29.3 @@ -0,0 +1,205 @@
    29.4 +/*	$NetBSD: kernfs.h,v 1.20.2.3 2004/05/23 10:46:05 tron Exp $	*/
    29.5 +
    29.6 +/*
    29.7 + * Copyright (c) 1992, 1993
    29.8 + *	The Regents of the University of California.  All rights reserved.
    29.9 + *
   29.10 + * This code is derived from software donated to Berkeley by
   29.11 + * Jan-Simon Pendry.
   29.12 + *
   29.13 + * Redistribution and use in source and binary forms, with or without
   29.14 + * modification, are permitted provided that the following conditions
   29.15 + * are met:
   29.16 + * 1. Redistributions of source code must retain the above copyright
   29.17 + *    notice, this list of conditions and the following disclaimer.
   29.18 + * 2. Redistributions in binary form must reproduce the above copyright
   29.19 + *    notice, this list of conditions and the following disclaimer in the
   29.20 + *    documentation and/or other materials provided with the distribution.
   29.21 + * 3. Neither the name of the University nor the names of its contributors
   29.22 + *    may be used to endorse or promote products derived from this software
   29.23 + *    without specific prior written permission.
   29.24 + *
   29.25 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29.26 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29.27 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29.28 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29.29 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29.30 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29.31 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29.32 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   29.33 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29.34 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   29.35 + * SUCH DAMAGE.
   29.36 + *
   29.37 + *	@(#)kernfs.h	8.6 (Berkeley) 3/29/95
   29.38 + */
   29.39 +
   29.40 +#define	_PATH_KERNFS	"/kern"		/* Default mountpoint */
   29.41 +
   29.42 +#ifdef _KERNEL
   29.43 +#include <sys/queue.h>
   29.44 +
   29.45 +/*
   29.46 + * The different types of node in a kernfs filesystem
   29.47 + */
   29.48 +typedef enum {
   29.49 +	KFSkern,		/* the filesystem itself (.) */
   29.50 +	KFSroot,		/* the filesystem root (..) */
   29.51 +	KFSnull,		/* none aplicable */
   29.52 +	KFStime,		/* boottime */
   29.53 +	KFSint,			/* integer */
   29.54 +	KFSstring,		/* string */
   29.55 +	KFShostname,	/* hostname */
   29.56 +	KFSavenrun,		/* loadavg */
   29.57 +	KFSdevice,		/* device file (rootdev/rrootdev) */
   29.58 +	KFSmsgbuf,		/* msgbuf */
   29.59 +	KFSipsecsadir,	/* ipsec security association (top dir) */
   29.60 +	KFSipsecspdir,	/* ipsec security policy (top dir) */
   29.61 +	KFSipsecsa,		/* ipsec security association entry */
   29.62 +	KFSipsecsp,		/* ipsec security policy entry */
   29.63 +	KFSsubdir,		/* directory */
   29.64 +	KFSlasttype,		/* last used type */
   29.65 +	KFSmaxtype = (1<<6) - 1	/* last possible type */
   29.66 +} kfstype;
   29.67 +
   29.68 +/*
   29.69 + * Control data for the kern file system.
   29.70 + */
   29.71 +struct kern_target {
   29.72 +	u_char		kt_type;
   29.73 +	u_char		kt_namlen;
   29.74 +	const char	*kt_name;
   29.75 +	void		*kt_data;
   29.76 +	kfstype		kt_tag;
   29.77 +	u_char		kt_vtype;
   29.78 +	mode_t		kt_mode;
   29.79 +};
   29.80 +
   29.81 +struct dyn_kern_target {
   29.82 +	struct kern_target		dkt_kt;
   29.83 +	SIMPLEQ_ENTRY(dyn_kern_target)	dkt_queue;
   29.84 +};
   29.85 +
   29.86 +struct kernfs_subdir {
   29.87 +	SIMPLEQ_HEAD(,dyn_kern_target)	ks_entries;
   29.88 +	unsigned int			ks_nentries;
   29.89 +	unsigned int			ks_dirs;
   29.90 +	const struct kern_target	*ks_parent;
   29.91 +};
   29.92 +
   29.93 +struct kernfs_node {
   29.94 +	LIST_ENTRY(kernfs_node) kfs_hash; /* hash chain */
   29.95 +	TAILQ_ENTRY(kernfs_node) kfs_list; /* flat list */
   29.96 +	struct vnode	*kfs_vnode;	/* vnode associated with this pfsnode */
   29.97 +	kfstype		kfs_type;	/* type of procfs node */
   29.98 +	mode_t		kfs_mode;	/* mode bits for stat() */
   29.99 +	long		kfs_fileno;	/* unique file id */
  29.100 +	u_int32_t	kfs_value;	/* SA id or SP id (KFSint) */
  29.101 +	const struct kern_target *kfs_kt;
  29.102 +	void		*kfs_v;		/* pointer to secasvar/secpolicy/mbuf */
  29.103 +	long		kfs_cookie;	/* fileno cookie */
  29.104 +};
  29.105 +
  29.106 +struct kernfs_mount {
  29.107 +	TAILQ_HEAD(, kernfs_node) nodelist;
  29.108 +	long fileno_cookie;
  29.109 +};
  29.110 +
  29.111 +#define UIO_MX	32
  29.112 +
  29.113 +#define KERNFS_FILENO(kt, typ, cookie) \
  29.114 +	((kt >= &kern_targets[0] && kt < &kern_targets[static_nkern_targets]) \
  29.115 +	    ? 2 + ((kt) - &kern_targets[0]) \
  29.116 +	      : (((cookie + 1) << 6) | (typ)))
  29.117 +#define KERNFS_TYPE_FILENO(typ, cookie) \
  29.118 +	(((cookie + 1) << 6) | (typ))
  29.119 +
  29.120 +#define VFSTOKERNFS(mp)	((struct kernfs_mount *)((mp)->mnt_data))
  29.121 +#define	VTOKERN(vp)	((struct kernfs_node *)(vp)->v_data)
  29.122 +#define KERNFSTOV(kfs)	((kfs)->kfs_vnode)
  29.123 +
  29.124 +extern const struct kern_target kern_targets[];
  29.125 +extern int nkern_targets;
  29.126 +extern const int static_nkern_targets;
  29.127 +extern int (**kernfs_vnodeop_p) __P((void *));
  29.128 +extern struct vfsops kernfs_vfsops;
  29.129 +extern dev_t rrootdev;
  29.130 +
  29.131 +struct secasvar;
  29.132 +struct secpolicy;
  29.133 +
  29.134 +int kernfs_root __P((struct mount *, struct vnode **));
  29.135 +
  29.136 +void kernfs_hashinit __P((void));
  29.137 +void kernfs_hashreinit __P((void));
  29.138 +void kernfs_hashdone __P((void));
  29.139 +int kernfs_freevp __P((struct vnode *));
  29.140 +int kernfs_allocvp __P((struct mount *, struct vnode **, kfstype,
  29.141 +	const struct kern_target *, u_int32_t));
  29.142 +
  29.143 +void kernfs_revoke_sa __P((struct secasvar *));
  29.144 +void kernfs_revoke_sp __P((struct secpolicy *));
  29.145 +
  29.146 +/*
  29.147 + * Data types for the kernfs file operations.
  29.148 + */
  29.149 +typedef enum {
  29.150 +	KERNFS_XREAD,
  29.151 +	KERNFS_XWRITE,
  29.152 +	KERNFS_FILEOP_CLOSE,
  29.153 +	KERNFS_FILEOP_GETATTR,
  29.154 +	KERNFS_FILEOP_IOCTL,
  29.155 +	KERNFS_FILEOP_MMAP,
  29.156 +	KERNFS_FILEOP_OPEN,
  29.157 +	KERNFS_FILEOP_READ,
  29.158 +	KERNFS_FILEOP_WRITE,
  29.159 +} kfsfileop;
  29.160 +
  29.161 +struct kernfs_fileop {
  29.162 +	kfstype				kf_type;
  29.163 +	kfsfileop			kf_fileop;
  29.164 +	union {
  29.165 +		void			*_kf_genop;
  29.166 +		int			(*_kf_vop)(void *);
  29.167 +		int			(*_kf_xread)
  29.168 +			(const struct kernfs_node *, int, char **, size_t,
  29.169 +			 size_t *);
  29.170 +		int			(*_kf_xwrite)
  29.171 +			(const struct kernfs_node *, char *, size_t);
  29.172 +	} _kf_opfn;
  29.173 +	SPLAY_ENTRY(kernfs_fileop)	kf_node;
  29.174 +};
  29.175 +#define	kf_genop	_kf_opfn
  29.176 +#define	kf_vop		_kf_opfn._kf_vop
  29.177 +#define	kf_xwrite	_kf_opfn._kf_xwrite
  29.178 +#define	kf_xread	_kf_opfn._kf_xread
  29.179 +
  29.180 +typedef struct kern_target kernfs_parentdir_t;
  29.181 +typedef struct dyn_kern_target kernfs_entry_t;
  29.182 +
  29.183 +/*
  29.184 + * Functions for adding kernfs datatypes and nodes.
  29.185 + */
  29.186 +kfstype kernfs_alloctype(int, const struct kernfs_fileop *);
  29.187 +#define	KERNFS_ALLOCTYPE(kf) kernfs_alloctype(sizeof((kf)) / \
  29.188 +	sizeof((kf)[0]), (kf))
  29.189 +#define	KERNFS_ALLOCENTRY(dkt, m_type, m_flags)				\
  29.190 +	dkt = (struct dyn_kern_target *)malloc(				\
  29.191 +		sizeof(struct dyn_kern_target), (m_type), (m_flags))
  29.192 +#define	KERNFS_INITENTRY(dkt, type, name, data, tag, vtype, mode) do {	\
  29.193 +	(dkt)->dkt_kt.kt_type = (type);					\
  29.194 +	(dkt)->dkt_kt.kt_namlen = strlen((name));			\
  29.195 +	(dkt)->dkt_kt.kt_name = (name);					\
  29.196 +	(dkt)->dkt_kt.kt_data = (data);					\
  29.197 +	(dkt)->dkt_kt.kt_tag = (tag);					\
  29.198 +	(dkt)->dkt_kt.kt_vtype = (vtype);				\
  29.199 +	(dkt)->dkt_kt.kt_mode = (mode);					\
  29.200 +} while (/*CONSTCOND*/0)
  29.201 +#define	KERNFS_ENTOPARENTDIR(dkt) &(dkt)->dkt_kt
  29.202 +int kernfs_addentry __P((kernfs_parentdir_t *, kernfs_entry_t *));
  29.203 +
  29.204 +#ifdef SYSCTL_SETUP_PROTO
  29.205 +SYSCTL_SETUP_PROTO(sysctl_vfs_kernfs_setup);
  29.206 +#endif /* SYSCTL_SETUP_PROTO */
  29.207 +
  29.208 +#endif /* _KERNEL */
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/netbsd-2.0-xen-sparse/sys/miscfs/kernfs/kernfs_vnops.c	Wed Mar 09 20:19:23 2005 +0000
    30.3 @@ -0,0 +1,1583 @@
    30.4 +/*	$NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron Exp $	*/
    30.5 +
    30.6 +/*
    30.7 + * Copyright (c) 1992, 1993
    30.8 + *	The Regents of the University of California.  All rights reserved.
    30.9 + *
   30.10 + * This code is derived from software donated to Berkeley by
   30.11 + * Jan-Simon Pendry.
   30.12 + *
   30.13 + * Redistribution and use in source and binary forms, with or without
   30.14 + * modification, are permitted provided that the following conditions
   30.15 + * are met:
   30.16 + * 1. Redistributions of source code must retain the above copyright
   30.17 + *    notice, this list of conditions and the following disclaimer.
   30.18 + * 2. Redistributions in binary form must reproduce the above copyright
   30.19 + *    notice, this list of conditions and the following disclaimer in the
   30.20 + *    documentation and/or other materials provided with the distribution.
   30.21 + * 3. Neither the name of the University nor the names of its contributors
   30.22 + *    may be used to endorse or promote products derived from this software
   30.23 + *    without specific prior written permission.
   30.24 + *
   30.25 + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30.26 + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30.27 + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30.28 + * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30.29 + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30.30 + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30.31 + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30.32 + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30.33 + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   30.34 + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30.35 + * SUCH DAMAGE.
   30.36 + *
   30.37 + *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
   30.38 + */
   30.39 +
   30.40 +/*
   30.41 + * Kernel parameter filesystem (/kern)
   30.42 + */
   30.43 +
   30.44 +#include <sys/cdefs.h>
   30.45 +__KERNEL_RCSID(0, "$NetBSD: kernfs_vnops.c,v 1.98.2.3 2004/05/15 13:35:27 tron Exp $");
   30.46 +
   30.47 +#ifdef _KERNEL_OPT
   30.48 +#include "opt_ipsec.h"
   30.49 +#endif
   30.50 +
   30.51 +#include <sys/param.h>
   30.52 +#include <sys/systm.h>
   30.53 +#include <sys/kernel.h>
   30.54 +#include <sys/vmmeter.h>
   30.55 +#include <sys/time.h>
   30.56 +#include <sys/proc.h>
   30.57 +#include <sys/vnode.h>
   30.58 +#include <sys/malloc.h>
   30.59 +#include <sys/file.h>
   30.60 +#include <sys/stat.h>
   30.61 +#include <sys/mount.h>
   30.62 +#include <sys/namei.h>
   30.63 +#include <sys/buf.h>
   30.64 +#include <sys/dirent.h>
   30.65 +#include <sys/msgbuf.h>
   30.66 +
   30.67 +#include <miscfs/genfs/genfs.h>
   30.68 +#include <miscfs/kernfs/kernfs.h>
   30.69 +
   30.70 +#ifdef IPSEC
   30.71 +#include <sys/mbuf.h>
   30.72 +#include <net/route.h>
   30.73 +#include <netinet/in.h>
   30.74 +#include <netinet6/ipsec.h>
   30.75 +#include <netkey/key.h>
   30.76 +#endif
   30.77 +
   30.78 +#include <uvm/uvm_extern.h>
   30.79 +
   30.80 +#define KSTRING	256		/* Largest I/O available via this filesystem */
   30.81 +#define	UIO_MX 32
   30.82 +
   30.83 +#define	READ_MODE	(S_IRUSR|S_IRGRP|S_IROTH)
   30.84 +#define	WRITE_MODE	(S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
   30.85 +#define	UREAD_MODE	(S_IRUSR)
   30.86 +#define	DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
   30.87 +#define	UDIR_MODE	(S_IRUSR|S_IXUSR)
   30.88 +
   30.89 +#define N(s) sizeof(s)-1, s
   30.90 +const struct kern_target kern_targets[] = {
   30.91 +/* NOTE: The name must be less than UIO_MX-16 chars in length */
   30.92 +     /*        name            data          tag           type  ro/rw */
   30.93 +     { DT_DIR, N("."),         0,            KFSkern,        VDIR, DIR_MODE   },
   30.94 +     { DT_DIR, N(".."),        0,            KFSroot,        VDIR, DIR_MODE   },
   30.95 +     { DT_REG, N("boottime"),  &boottime.tv_sec, KFSint,     VREG, READ_MODE  },
   30.96 +			/* XXX cast away const */
   30.97 +     { DT_REG, N("copyright"), (void *)copyright,
   30.98 +     					     KFSstring,      VREG, READ_MODE  },
   30.99 +     { DT_REG, N("hostname"),  0,            KFShostname,    VREG, WRITE_MODE },
  30.100 +     { DT_REG, N("hz"),        &hz,          KFSint,         VREG, READ_MODE  },
  30.101 +#ifdef IPSEC
  30.102 +     { DT_DIR, N("ipsecsa"),   0,	     KFSipsecsadir,  VDIR, UDIR_MODE  },
  30.103 +     { DT_DIR, N("ipsecsp"),   0,	     KFSipsecspdir,  VDIR, UDIR_MODE  },
  30.104 +#endif
  30.105 +     { DT_REG, N("loadavg"),   0,            KFSavenrun,     VREG, READ_MODE  },
  30.106 +     { DT_REG, N("msgbuf"),    0,	     KFSmsgbuf,      VREG, READ_MODE  },
  30.107 +     { DT_REG, N("pagesize"),  &uvmexp.pagesize, KFSint,     VREG, READ_MODE  },
  30.108 +     { DT_REG, N("physmem"),   &physmem,     KFSint,         VREG, READ_MODE  },
  30.109 +#if 0
  30.110 +     { DT_DIR, N("root"),      0,            KFSnull,        VDIR, DIR_MODE   },
  30.111 +#endif
  30.112 +     { DT_BLK, N("rootdev"),   &rootdev,     KFSdevice,      VBLK, READ_MODE  },
  30.113 +     { DT_CHR, N("rrootdev"),  &rrootdev,    KFSdevice,      VCHR, READ_MODE  },
  30.114 +     { DT_REG, N("time"),      0,            KFStime,        VREG, READ_MODE  },
  30.115 +			/* XXX cast away const */
  30.116 +     { DT_REG, N("version"),   (void *)version,
  30.117 +     					     KFSstring,      VREG, READ_MODE  },
  30.118 +};
  30.119 +const struct kern_target subdir_targets[] = {
  30.120 +/* NOTE: The name must be less than UIO_MX-16 chars in length */
  30.121 +     /*        name            data          tag           type  ro/rw */
  30.122 +     { DT_DIR, N("."),         0,            KFSsubdir,      VDIR, DIR_MODE   },
  30.123 +     { DT_DIR, N(".."),        0,            KFSkern,        VDIR, DIR_MODE   },
  30.124 +};
  30.125 +#ifdef IPSEC
  30.126 +const struct kern_target ipsecsa_targets[] = {
  30.127 +/* NOTE: The name must be less than UIO_MX-16 chars in length */
  30.128 +     /*        name            data          tag           type  ro/rw */
  30.129 +     { DT_DIR, N("."),         0,            KFSipsecsadir,  VDIR, DIR_MODE   },
  30.130 +     { DT_DIR, N(".."),        0,            KFSkern,        VDIR, DIR_MODE   },
  30.131 +};
  30.132 +const struct kern_target ipsecsp_targets[] = {
  30.133 +/* NOTE: The name must be less than UIO_MX-16 chars in length */
  30.134 +     /*        name            data          tag           type  ro/rw */
  30.135 +     { DT_DIR, N("."),         0,            KFSipsecspdir,  VDIR, DIR_MODE   },
  30.136 +     { DT_DIR, N(".."),        0,            KFSkern,        VDIR, DIR_MODE   },
  30.137 +};
  30.138 +const struct kern_target ipsecsa_kt =
  30.139 +     { DT_DIR, N(""),          0,            KFSipsecsa,     VREG, UREAD_MODE };
  30.140 +const struct kern_target ipsecsp_kt =
  30.141 +     { DT_DIR, N(""),          0,            KFSipsecsp,     VREG, UREAD_MODE };
  30.142 +#endif
  30.143 +#undef N
  30.144 +SIMPLEQ_HEAD(,dyn_kern_target) dyn_kern_targets =
  30.145 +	SIMPLEQ_HEAD_INITIALIZER(dyn_kern_targets);
  30.146 +int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
  30.147 +const int static_nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
  30.148 +#ifdef IPSEC
  30.149 +int nipsecsa_targets = sizeof(ipsecsa_targets) / sizeof(ipsecsa_targets[0]);
  30.150 +int nipsecsp_targets = sizeof(ipsecsp_targets) / sizeof(ipsecsp_targets[0]);
  30.151 +int nkern_dirs = 4; /* 2 extra subdirs */
  30.152 +#else
  30.153 +int nkern_dirs = 2;
  30.154 +#endif
  30.155 +
  30.156 +int kernfs_try_fileop(kfstype, kfsfileop, void *, int);
  30.157 +int kernfs_try_xread(kfstype, const struct kernfs_node *, int, char **,
  30.158 +    size_t, size_t *, int);
  30.159 +int kernfs_try_xwrite(kfstype, const struct kernfs_node *, char *,
  30.160 +    size_t, int);
  30.161 +
  30.162 +static int kernfs_default_xread(void *v);
  30.163 +static int kernfs_default_xwrite(void *v);
  30.164 +static int kernfs_default_fileop_getattr(void *);
  30.165 +
  30.166 +/* must include all fileop's */
  30.167 +const struct kernfs_fileop kernfs_default_fileops[] = {
  30.168 +  { .kf_fileop = KERNFS_XREAD },
  30.169 +  { .kf_fileop = KERNFS_XWRITE },
  30.170 +  { .kf_fileop = KERNFS_FILEOP_OPEN },
  30.171 +  { .kf_fileop = KERNFS_FILEOP_GETATTR,
  30.172 +    .kf_genop = {kernfs_default_fileop_getattr} },
  30.173 +  { .kf_fileop = KERNFS_FILEOP_IOCTL },
  30.174 +  { .kf_fileop = KERNFS_FILEOP_MMAP },
  30.175 +  { .kf_fileop = KERNFS_FILEOP_CLOSE },
  30.176 +  { .kf_fileop = KERNFS_FILEOP_READ, .kf_genop = {kernfs_default_xread} },
  30.177 +  { .kf_fileop = KERNFS_FILEOP_WRITE, .kf_genop = {kernfs_default_xwrite} },
  30.178 +};
  30.179 +
  30.180 +int	kernfs_lookup	__P((void *));
  30.181 +#define	kernfs_create	genfs_eopnotsupp
  30.182 +#define	kernfs_mknod	genfs_eopnotsupp
  30.183 +int	kernfs_open	__P((void *));
  30.184 +int	kernfs_close	__P((void *));
  30.185 +int	kernfs_access	__P((void *));
  30.186 +int	kernfs_getattr	__P((void *));
  30.187 +int	kernfs_setattr	__P((void *));
  30.188 +int	kernfs_read	__P((void *));
  30.189 +int	kernfs_write	__P((void *));
  30.190 +#define	kernfs_fcntl	genfs_fcntl
  30.191 +int	kernfs_ioctl	__P((void *));
  30.192 +#define	kernfs_poll	genfs_poll
  30.193 +#define kernfs_revoke	genfs_revoke
  30.194 +int	kernfs_mmap	__P((void *));
  30.195 +#define	kernfs_fsync	genfs_nullop
  30.196 +#define	kernfs_seek	genfs_nullop
  30.197 +#define	kernfs_remove	genfs_eopnotsupp
  30.198 +int	kernfs_link	__P((void *));
  30.199 +#define	kernfs_rename	genfs_eopnotsupp
  30.200 +#define	kernfs_mkdir	genfs_eopnotsupp
  30.201 +#define	kernfs_rmdir	genfs_eopnotsupp
  30.202 +int	kernfs_symlink	__P((void *));
  30.203 +int	kernfs_readdir	__P((void *));
  30.204 +#define	kernfs_readlink	genfs_eopnotsupp
  30.205 +#define	kernfs_abortop	genfs_abortop
  30.206 +int	kernfs_inactive	__P((void *));
  30.207 +int	kernfs_reclaim	__P((void *));
  30.208 +#define	kernfs_lock	genfs_lock
  30.209 +#define	kernfs_unlock	genfs_unlock
  30.210 +#define	kernfs_bmap	genfs_badop
  30.211 +#define	kernfs_strategy	genfs_badop
  30.212 +int	kernfs_print	__P((void *));
  30.213 +#define	kernfs_islocked	genfs_islocked
  30.214 +int	kernfs_pathconf	__P((void *));
  30.215 +#define	kernfs_advlock	genfs_einval
  30.216 +#define	kernfs_blkatoff	genfs_eopnotsupp
  30.217 +#define	kernfs_valloc	genfs_eopnotsupp
  30.218 +#define	kernfs_vfree	genfs_nullop
  30.219 +#define	kernfs_truncate	genfs_eopnotsupp
  30.220 +#define	kernfs_update	genfs_nullop
  30.221 +#define	kernfs_bwrite	genfs_eopnotsupp
  30.222 +#define	kernfs_putpages	genfs_putpages
  30.223 +
  30.224 +static int	kernfs_xread __P((struct kernfs_node *, int, char **, size_t, size_t *));
  30.225 +static int	kernfs_xwrite __P((const struct kernfs_node *, char *, size_t));
  30.226 +
  30.227 +int (**kernfs_vnodeop_p) __P((void *));
  30.228 +const struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
  30.229 +	{ &vop_default_desc, vn_default_error },
  30.230 +	{ &vop_lookup_desc, kernfs_lookup },		/* lookup */
  30.231 +	{ &vop_create_desc, kernfs_create },		/* create */
  30.232 +	{ &vop_mknod_desc, kernfs_mknod },		/* mknod */
  30.233 +	{ &vop_open_desc, kernfs_open },		/* open */
  30.234 +	{ &vop_close_desc, kernfs_close },		/* close */
  30.235 +	{ &vop_access_desc, kernfs_access },		/* access */
  30.236 +	{ &vop_getattr_desc, kernfs_getattr },		/* getattr */
  30.237 +	{ &vop_setattr_desc, kernfs_setattr },		/* setattr */
  30.238 +	{ &vop_read_desc, kernfs_read },		/* read */
  30.239 +	{ &vop_write_desc, kernfs_write },		/* write */
  30.240 +	{ &vop_fcntl_desc, kernfs_fcntl },		/* fcntl */
  30.241 +	{ &vop_ioctl_desc, kernfs_ioctl },		/* ioctl */
  30.242 +	{ &vop_poll_desc, kernfs_poll },		/* poll */
  30.243 +	{ &vop_revoke_desc, kernfs_revoke },		/* revoke */
  30.244 +	{ &vop_mmap_desc, kernfs_mmap },		/* mmap */
  30.245 +	{ &vop_fsync_desc, kernfs_fsync },		/* fsync */
  30.246 +	{ &vop_seek_desc, kernfs_seek },		/* seek */
  30.247 +	{ &vop_remove_desc, kernfs_remove },		/* remove */
  30.248 +	{ &vop_link_desc, kernfs_link },		/* link */
  30.249 +	{ &vop_rename_desc, kernfs_rename },		/* rename */
  30.250 +	{ &vop_mkdir_desc, kernfs_mkdir },		/* mkdir */
  30.251 +	{ &vop_rmdir_desc, kernfs_rmdir },		/* rmdir */
  30.252 +	{ &vop_symlink_desc, kernfs_symlink },		/* symlink */
  30.253 +	{ &vop_readdir_desc, kernfs_readdir },		/* readdir */
  30.254 +	{ &vop_readlink_desc, kernfs_readlink },	/* readlink */
  30.255 +	{ &vop_abortop_desc, kernfs_abortop },		/* abortop */
  30.256 +	{ &vop_inactive_desc, kernfs_inactive },	/* inactive */
  30.257 +	{ &vop_reclaim_desc, kernfs_reclaim },		/* reclaim */
  30.258 +	{ &vop_lock_desc, kernfs_lock },		/* lock */
  30.259 +	{ &vop_unlock_desc, kernfs_unlock },		/* unlock */
  30.260 +	{ &vop_bmap_desc, kernfs_bmap },		/* bmap */
  30.261 +	{ &vop_strategy_desc, kernfs_strategy },	/* strategy */
  30.262 +	{ &vop_print_desc, kernfs_print },		/* print */
  30.263 +	{ &vop_islocked_desc, kernfs_islocked },	/* islocked */
  30.264 +	{ &vop_pathconf_desc, kernfs_pathconf },	/* pathconf */
  30.265 +	{ &vop_advlock_desc, kernfs_advlock },		/* advlock */
  30.266 +	{ &vop_blkatoff_desc, kernfs_blkatoff },	/* blkatoff */
  30.267 +	{ &vop_valloc_desc, kernfs_valloc },		/* valloc */
  30.268 +	{ &vop_vfree_desc, kernfs_vfree },		/* vfree */
  30.269 +	{ &vop_truncate_desc, kernfs_truncate },	/* truncate */
  30.270 +	{ &vop_update_desc, kernfs_update },		/* update */
  30.271 +	{ &vop_bwrite_desc, kernfs_bwrite },		/* bwrite */
  30.272 +	{ &vop_putpages_desc, kernfs_putpages },	/* putpages */
  30.273 +	{ NULL, NULL }
  30.274 +};
  30.275 +const struct vnodeopv_desc kernfs_vnodeop_opv_desc =
  30.276 +	{ &kernfs_vnodeop_p, kernfs_vnodeop_entries };
  30.277 +
  30.278 +static __inline int
  30.279 +kernfs_fileop_compare(struct kernfs_fileop *a, struct kernfs_fileop *b)
  30.280 +{
  30.281 +	if (a->kf_type < b->kf_type)
  30.282 +		return -1;
  30.283 +	if (a->kf_type > b->kf_type)
  30.284 +		return 1;
  30.285 +	if (a->kf_fileop < b->kf_fileop)
  30.286 +		return -1;
  30.287 +	if (a->kf_fileop > b->kf_fileop)
  30.288 +		return 1;
  30.289 +	return (0);
  30.290 +}
  30.291 +
  30.292 +SPLAY_HEAD(kfsfileoptree, kernfs_fileop) kfsfileoptree =
  30.293 +	SPLAY_INITIALIZER(kfsfileoptree);
  30.294 +SPLAY_PROTOTYPE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);
  30.295 +SPLAY_GENERATE(kfsfileoptree, kernfs_fileop, kf_node, kernfs_fileop_compare);
  30.296 +
  30.297 +kfstype
  30.298 +kernfs_alloctype(int nkf, const struct kernfs_fileop *kf)
  30.299 +{
  30.300 +	static u_char nextfreetype = KFSlasttype;
  30.301 +	struct kernfs_fileop *dkf, *fkf, skf;
  30.302 +	int i;
  30.303 +
  30.304 +	/* XXX need to keep track of dkf's memory if we support
  30.305 +           deallocating types */
  30.306 +	dkf = malloc(sizeof(kernfs_default_fileops), M_TEMP, M_WAITOK);
  30.307 +	memcpy(dkf, kernfs_default_fileops, sizeof(kernfs_default_fileops));
  30.308 +
  30.309 +	for (i = 0; i < sizeof(kernfs_default_fileops) /
  30.310 +		     sizeof(kernfs_default_fileops[0]); i++) {
  30.311 +		dkf[i].kf_type = nextfreetype;
  30.312 +		SPLAY_INSERT(kfsfileoptree, &kfsfileoptree, &dkf[i]);
  30.313 +	}
  30.314 +
  30.315 +	for (i = 0; i < nkf; i++) {
  30.316 +		skf.kf_type = nextfreetype;
  30.317 +		skf.kf_fileop = kf[i].kf_fileop;
  30.318 +		if ((fkf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf)))
  30.319 +			fkf->kf_genop = kf[i].kf_genop;
  30.320 +	}
  30.321 +
  30.322 +	return nextfreetype++;
  30.323 +}
  30.324 +
  30.325 +int
  30.326 +kernfs_try_fileop(kfstype type, kfsfileop fileop, void *v, int error)
  30.327 +{
  30.328 +	const struct kernfs_fileop *kf;
  30.329 +	struct kernfs_fileop skf;
  30.330 +
  30.331 +	skf.kf_type = type;
  30.332 +	skf.kf_fileop = fileop;
  30.333 +	kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
  30.334 +	if (kf == NULL)
  30.335 +		kf = &kernfs_default_fileops[fileop];
  30.336 +	if (kf->kf_vop)
  30.337 +		return kf->kf_vop(v);
  30.338 +	return error;
  30.339 +}
  30.340 +
  30.341 +int
  30.342 +kernfs_try_xread(kfstype type, const struct kernfs_node *kfs, int off,
  30.343 +    char **bufp, size_t len, size_t *wrlen, int error)
  30.344 +{
  30.345 +	const struct kernfs_fileop *kf;
  30.346 +	struct kernfs_fileop skf;
  30.347 +
  30.348 +	skf.kf_type = type;
  30.349 +	skf.kf_fileop = KERNFS_XREAD;
  30.350 +	kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
  30.351 +	if (kf == NULL)
  30.352 +		kf = &kernfs_default_fileops[KERNFS_XREAD];
  30.353 +	if (kf->kf_xread)
  30.354 +		return kf->kf_xread(kfs, off, bufp, len, wrlen);
  30.355 +	*wrlen = 0;
  30.356 +	return error;
  30.357 +}
  30.358 +
  30.359 +int
  30.360 +kernfs_try_xwrite(kfstype type, const struct kernfs_node *kfs, char *buf,
  30.361 +    size_t len, int error)
  30.362 +{
  30.363 +	const struct kernfs_fileop *kf;
  30.364 +	struct kernfs_fileop skf;
  30.365 +
  30.366 +	skf.kf_type = type;
  30.367 +	skf.kf_fileop = KERNFS_XWRITE;
  30.368 +	kf = SPLAY_FIND(kfsfileoptree, &kfsfileoptree, &skf);
  30.369 +	if (kf == NULL)
  30.370 +		kf = &kernfs_default_fileops[KERNFS_XWRITE];
  30.371 +	if (kf->kf_xwrite)
  30.372 +		return kf->kf_xwrite(kfs, buf, len);
  30.373 +	return error;
  30.374 +}
  30.375 +
  30.376 +int
  30.377 +kernfs_addentry(kernfs_parentdir_t *pkt, kernfs_entry_t *dkt)
  30.378 +{
  30.379 +	struct kernfs_subdir *ks, *parent;
  30.380 +
  30.381 +	if (pkt == NULL) {
  30.382 +		SIMPLEQ_INSERT_TAIL(&dyn_kern_targets, dkt, dkt_queue);
  30.383 +		nkern_targets++;
  30.384 +		if (dkt->dkt_kt.kt_vtype == VDIR)
  30.385 +			nkern_dirs++;
  30.386 +	} else {
  30.387 +		parent = (struct kernfs_subdir *)pkt->kt_data;
  30.388 +		SIMPLEQ_INSERT_TAIL(&parent->ks_entries, dkt, dkt_queue);
  30.389 +		parent->ks_nentries++;
  30.390 +		if (dkt->dkt_kt.kt_vtype == VDIR)
  30.391 +			parent->ks_dirs++;
  30.392 +	}
  30.393 +	if (dkt->dkt_kt.kt_vtype == VDIR && dkt->dkt_kt.kt_data == NULL) {
  30.394 +		ks = malloc(sizeof(struct kernfs_subdir),
  30.395 +		    M_TEMP, M_WAITOK);
  30.396 +		SIMPLEQ_INIT(&ks->ks_entries);
  30.397 +		ks->ks_nentries = 2; /* . and .. */
  30.398 +		ks->ks_dirs = 2;
  30.399 +		ks->ks_parent = pkt ? pkt : &kern_targets[0];
  30.400 +		dkt->dkt_kt.kt_data = ks;
  30.401 +	}
  30.402 +	return 0;
  30.403 +}
  30.404 +
  30.405 +static int
  30.406 +kernfs_xread(kfs, off, bufp, len, wrlen)
  30.407 +	struct kernfs_node *kfs;
  30.408 +	int off;
  30.409 +	char **bufp;
  30.410 +	size_t len;
  30.411 +	size_t *wrlen;
  30.412 +{
  30.413 +	const struct kern_target *kt;
  30.414 +#ifdef IPSEC
  30.415 +	struct mbuf *m;
  30.416 +#endif
  30.417 +
  30.418 +	kt = kfs->kfs_kt;
  30.419 +
  30.420 +	switch (kfs->kfs_type) {
  30.421 +	case KFStime: {
  30.422 +		struct timeval tv;
  30.423 +
  30.424 +		microtime(&tv);
  30.425 +		snprintf(*bufp, len, "%ld %ld\n", tv.tv_sec, tv.tv_usec);
  30.426 +		break;
  30.427 +	}
  30.428 +
  30.429 +	case KFSint: {
  30.430 +		int *ip = kt->kt_data;
  30.431 +
  30.432 +		snprintf(*bufp, len, "%d\n", *ip);
  30.433 +		break;
  30.434 +	}
  30.435 +
  30.436 +	case KFSstring: {
  30.437 +		char *cp = kt->kt_data;
  30.438 +
  30.439 +		*bufp = cp;
  30.440 +		break;
  30.441 +	}
  30.442 +
  30.443 +	case KFSmsgbuf: {
  30.444 +		long n;
  30.445 +
  30.446 +		/*
  30.447 +		 * deal with cases where the message buffer has
  30.448 +		 * become corrupted.
  30.449 +		 */
  30.450 +		if (!msgbufenabled || msgbufp->msg_magic != MSG_MAGIC) {
  30.451 +			msgbufenabled = 0;
  30.452 +			return (ENXIO);
  30.453 +		}
  30.454 +
  30.455 +		/*
  30.456 +		 * Note that reads of /kern/msgbuf won't necessarily yield
  30.457 +		 * consistent results, if the message buffer is modified
  30.458 +		 * while the read is in progress.  The worst that can happen
  30.459 +		 * is that incorrect data will be read.  There's no way
  30.460 +		 * that this can crash the system unless the values in the
  30.461 +		 * message buffer header are corrupted, but that'll cause
  30.462 +		 * the system to die anyway.
  30.463 +		 */
  30.464 +		if (off >= msgbufp->msg_bufs) {
  30.465 +			*wrlen = 0;
  30.466 +			return (0);
  30.467 +		}
  30.468 +		n = msgbufp->msg_bufx + off;
  30.469 +		if (n >= msgbufp->msg_bufs)
  30.470 +			n -= msgbufp->msg_bufs;
  30.471 +		len = min(msgbufp->msg_bufs - n, msgbufp->msg_bufs - off);
  30.472 +		*bufp = msgbufp->msg_bufc + n;
  30.473 +		*wrlen = len;
  30.474 +		return (0);
  30.475 +	}
  30.476 +
  30.477 +	case KFShostname: {
  30.478 +		char *cp = hostname;
  30.479 +		int xlen = hostnamelen;
  30.480 +
  30.481 +		if (xlen >= (len - 2))
  30.482 +			return (EINVAL);
  30.483 +
  30.484 +		memcpy(*bufp, cp, xlen);
  30.485 +		(*bufp)[xlen] = '\n';
  30.486 +		(*bufp)[xlen+1] = '\0';
  30.487 +		len = strlen(*bufp);
  30.488 +		break;
  30.489 +	}
  30.490 +
  30.491 +	case KFSavenrun:
  30.492 +		averunnable.fscale = FSCALE;
  30.493 +		snprintf(*bufp, len, "%d %d %d %ld\n",
  30.494 +		    averunnable.ldavg[0], averunnable.ldavg[1],
  30.495 +		    averunnable.ldavg[2], averunnable.fscale);
  30.496 +		break;
  30.497 +
  30.498 +#ifdef IPSEC
  30.499 +	case KFSipsecsa:
  30.500 +		/*
  30.501 +		 * Note that SA configuration could be changed during the
  30.502 +		 * read operation, resulting in garbled output.
  30.503 +		 */
  30.504 +		m = key_setdumpsa_spi(htonl(kfs->kfs_value));
  30.505 +		if (!m)
  30.506 +			return (ENOBUFS);
  30.507 +		if (off >= m->m_pkthdr.len) {
  30.508 +			*wrlen = 0;
  30.509 +			m_freem(m);
  30.510 +			return (0);
  30.511 +		}
  30.512 +		if (len > m->m_pkthdr.len - off)
  30.513 +			len = m->m_pkthdr.len - off;
  30.514 +		m_copydata(m, off, len, *bufp);
  30.515 +		*wrlen = len;
  30.516 +		m_freem(m);
  30.517 +		return (0);
  30.518 +
  30.519 +	case KFSipsecsp:
  30.520 +		/*
  30.521 +		 * Note that SP configuration could be changed during the
  30.522 +		 * read operation, resulting in garbled output.
  30.523 +		 */
  30.524 +		if (!kfs->kfs_v) {
  30.525 +			struct secpolicy *sp;
  30.526 +
  30.527 +			sp = key_getspbyid(kfs->kfs_value);
  30.528 +			if (sp)
  30.529 +				kfs->kfs_v = sp;
  30.530 +			else
  30.531 +				return (ENOENT);
  30.532 +		}
  30.533 +		m = key_setdumpsp((struct secpolicy *)kfs->kfs_v,
  30.534 +		    SADB_X_SPDGET, 0, 0);
  30.535 +		if (!m)
  30.536 +			return (ENOBUFS);
  30.537 +		if (off >= m->m_pkthdr.len) {
  30.538 +			*wrlen = 0;
  30.539 +			m_freem(m);
  30.540 +			return (0);
  30.541 +		}
  30.542 +		if (len > m->m_pkthdr.len - off)
  30.543 +			len = m->m_pkthdr.len - off;
  30.544 +		m_copydata(m, off, len, *bufp);
  30.545 +		*wrlen = len;
  30.546 +		m_freem(m);
  30.547 +		return (0);
  30.548 +#endif
  30.549 +
  30.550 +	default:
  30.551 +		return kernfs_try_xread(kfs->kfs_type, kfs, off, bufp, len,
  30.552 +		    wrlen, 0);
  30.553 +	}
  30.554 +
  30.555 +	len = strlen(*bufp);
  30.556 +	if (len <= off)
  30.557 +		*wrlen = 0;
  30.558 +	else {
  30.559 +		*bufp += off;
  30.560 +		*wrlen = len - off;
  30.561 +	}
  30.562 +	return (0);
  30.563 +}
  30.564 +
  30.565 +static int
  30.566 +kernfs_xwrite(kfs, buf, len)
  30.567 +	const struct kernfs_node *kfs;
  30.568 +	char *buf;
  30.569 +	size_t len;
  30.570 +{
  30.571 +
  30.572 +	switch (kfs->kfs_type) {
  30.573 +	case KFShostname:
  30.574 +		if (buf[len-1] == '\n')
  30.575 +			--len;
  30.576 +		memcpy(hostname, buf, len);
  30.577 +		hostname[len] = '\0';
  30.578 +		hostnamelen = (size_t) len;
  30.579 +		return (0);
  30.580 +
  30.581 +	default:
  30.582 +		return kernfs_try_xwrite(kfs->kfs_type, kfs, buf, len, EIO);
  30.583 +	}
  30.584 +}
  30.585 +
  30.586 +
  30.587 +/*
  30.588 + * vp is the current namei directory
  30.589 + * ndp is the name to locate in that directory...
  30.590 + */
  30.591 +int
  30.592 +kernfs_lookup(v)
  30.593 +	void *v;
  30.594 +{
  30.595 +	struct vop_lookup_args /* {
  30.596 +		struct vnode * a_dvp;
  30.597 +		struct vnode ** a_vpp;
  30.598 +		struct componentname * a_cnp;
  30.599 +	} */ *ap = v;
  30.600 +	struct componentname *cnp = ap->a_cnp;
  30.601 +	struct vnode **vpp = ap->a_vpp;
  30.602 +	struct vnode *dvp = ap->a_dvp;
  30.603 +	const char *pname = cnp->cn_nameptr;
  30.604 +	const struct kernfs_node *kfs;
  30.605 +	const struct kern_target *kt;
  30.606 +	const struct dyn_kern_target *dkt;
  30.607 +	const struct kernfs_subdir *ks;
  30.608 +	int error, i, wantpunlock;
  30.609 +#ifdef IPSEC
  30.610 +	char *ep;
  30.611 +	u_int32_t id;
  30.612 +#endif
  30.613 +
  30.614 +	*vpp = NULLVP;
  30.615 +	cnp->cn_flags &= ~PDIRUNLOCK;
  30.616 +
  30.617 +	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
  30.618 +		return (EROFS);
  30.619 +
  30.620 +	if (cnp->cn_namelen == 1 && *pname == '.') {
  30.621 +		*vpp = dvp;
  30.622 +		VREF(dvp);
  30.623 +		return (0);
  30.624 +	}
  30.625 +
  30.626 +	wantpunlock = (~cnp->cn_flags & (LOCKPARENT | ISLASTCN));
  30.627 +	kfs = VTOKERN(dvp);
  30.628 +	switch (kfs->kfs_type) {
  30.629 +	case KFSkern:
  30.630 +		/*
  30.631 +		 * Shouldn't get here with .. in the root node.
  30.632 +		 */
  30.633 +		if (cnp->cn_flags & ISDOTDOT)
  30.634 +			return (EIO);
  30.635 +
  30.636 +		for (i = 0; i < static_nkern_targets; i++) {
  30.637 +			kt = &kern_targets[i];
  30.638 +			if (cnp->cn_namelen == kt->kt_namlen &&
  30.639 +			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
  30.640 +				goto found;
  30.641 +		}
  30.642 +		SIMPLEQ_FOREACH(dkt, &dyn_kern_targets, dkt_queue) {
  30.643 +			if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
  30.644 +			    memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) {
  30.645 +				kt = &dkt->dkt_kt;
  30.646 +				goto found;
  30.647 +			}
  30.648 +		}
  30.649 +		break;
  30.650 +
  30.651 +	found:
  30.652 +		error = kernfs_allocvp(dvp->v_mount, vpp, kt->kt_tag, kt, 0);
  30.653 +		if ((error == 0) && wantpunlock) {
  30.654 +			VOP_UNLOCK(dvp, 0);
  30.655 +			cnp->cn_flags |= PDIRUNLOCK;
  30.656 +		}
  30.657 +		return (error);
  30.658 +
  30.659 +	case KFSsubdir:
  30.660 +		ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
  30.661 +		if (cnp->cn_flags & ISDOTDOT) {
  30.662 +			kt = ks->ks_parent;
  30.663 +			goto found;
  30.664 +		}
  30.665 +
  30.666 +		SIMPLEQ_FOREACH(dkt, &ks->ks_entries, dkt_queue) {
  30.667 +			if (cnp->cn_namelen == dkt->dkt_kt.kt_namlen &&
  30.668 +			    memcmp(dkt->dkt_kt.kt_name, pname, cnp->cn_namelen) == 0) {
  30.669 +				kt = &dkt->dkt_kt;
  30.670 +				goto found;
  30.671 +			}
  30.672 +		}
  30.673 +		break;
  30.674 +
  30.675 +#ifdef IPSEC
  30.676 +	case KFSipsecsadir:
  30.677 +		if (cnp->cn_flags & ISDOTDOT) {
  30.678 +			kt = &kern_targets[0];
  30.679 +			goto found;
  30.680 +		}
  30.681 +
  30.682 +		for (i = 2; i < nipsecsa_targets; i++) {
  30.683 +			kt = &ipsecsa_targets[i];
  30.684 +			if (cnp->cn_namelen == kt->kt_namlen &&
  30.685 +			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
  30.686 +				goto found;
  30.687 +		}
  30.688 +
  30.689 +		ep = NULL;
  30.690 +		id = strtoul(pname, &ep, 10);
  30.691 +		if (!ep || *ep || ep == pname)
  30.692 +			break;
  30.693 +
  30.694 +		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsa, &ipsecsa_kt, id);
  30.695 +		if ((error == 0) && wantpunlock) {
  30.696 +			VOP_UNLOCK(dvp, 0);
  30.697 +			cnp->cn_flags |= PDIRUNLOCK;
  30.698 +		}
  30.699 +		return (error);
  30.700 +
  30.701 +	case KFSipsecspdir:
  30.702 +		if (cnp->cn_flags & ISDOTDOT) {
  30.703 +			kt = &kern_targets[0];
  30.704 +			goto found;
  30.705 +		}
  30.706 +
  30.707 +		for (i = 2; i < nipsecsp_targets; i++) {
  30.708 +			kt = &ipsecsp_targets[i];
  30.709 +			if (cnp->cn_namelen == kt->kt_namlen &&
  30.710 +			    memcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
  30.711 +				goto found;
  30.712 +		}
  30.713 +
  30.714 +		ep = NULL;
  30.715 +		id = strtoul(pname, &ep, 10);
  30.716 +		if (!ep || *ep || ep == pname)
  30.717 +			break;
  30.718 +
  30.719 +		error = kernfs_allocvp(dvp->v_mount, vpp, KFSipsecsp, &ipsecsp_kt, id);
  30.720 +		if ((error == 0) && wantpunlock) {
  30.721 +			VOP_UNLOCK(dvp, 0);
  30.722 +			cnp->cn_flags |= PDIRUNLOCK;
  30.723 +		}
  30.724 +		return (error);
  30.725 +#endif
  30.726 +
  30.727 +	default:
  30.728 +		return (ENOTDIR);
  30.729 +	}
  30.730 +
  30.731 +	return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
  30.732 +}
  30.733 +
  30.734 +int
  30.735 +kernfs_open(v)
  30.736 +	void *v;
  30.737 +{
  30.738 +	struct vop_open_args /* {
  30.739 +		struct vnode *a_vp;
  30.740 +		int a_mode;
  30.741 +		struct ucred *a_cred;
  30.742 +		struct proc *a_p;
  30.743 +	} */ *ap = v;
  30.744 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
  30.745 +#ifdef IPSEC
  30.746 +	struct mbuf *m;
  30.747 +	struct secpolicy *sp;
  30.748 +#endif
  30.749 +
  30.750 +	switch (kfs->kfs_type) {
  30.751 +#ifdef IPSEC
  30.752 +	case KFSipsecsa:
  30.753 +		m = key_setdumpsa_spi(htonl(kfs->kfs_value));
  30.754 +		if (m) {
  30.755 +			m_freem(m);
  30.756 +			return (0);
  30.757 +		} else
  30.758 +			return (ENOENT);
  30.759 +
  30.760 +	case KFSipsecsp:
  30.761 +		sp = key_getspbyid(kfs->kfs_value);
  30.762 +		if (sp) {
  30.763 +			kfs->kfs_v = sp;
  30.764 +			return (0);
  30.765 +		} else
  30.766 +			return (ENOENT);
  30.767 +#endif
  30.768 +
  30.769 +	default:
  30.770 +		return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_OPEN,
  30.771 +		    v, 0);
  30.772 +	}
  30.773 +}
  30.774 +
  30.775 +int
  30.776 +kernfs_close(v)
  30.777 +	void *v;
  30.778 +{
  30.779 +	struct vop_close_args /* {
  30.780 +		struct vnode *a_vp;
  30.781 +		int a_fflag;
  30.782 +		struct ucred *a_cred;
  30.783 +		struct proc *a_p;
  30.784 +	} */ *ap = v;
  30.785 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
  30.786 +
  30.787 +	switch (kfs->kfs_type) {
  30.788 +#ifdef IPSEC
  30.789 +	case KFSipsecsp:
  30.790 +		key_freesp((struct secpolicy *)kfs->kfs_v);
  30.791 +		break;
  30.792 +#endif
  30.793 +
  30.794 +	default:
  30.795 +		return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_CLOSE,
  30.796 +		    v, 0);
  30.797 +	}
  30.798 +
  30.799 +	return (0);
  30.800 +}
  30.801 +
  30.802 +int
  30.803 +kernfs_access(v)
  30.804 +	void *v;
  30.805 +{
  30.806 +	struct vop_access_args /* {
  30.807 +		struct vnode *a_vp;
  30.808 +		int a_mode;
  30.809 +		struct ucred *a_cred;
  30.810 +		struct proc *a_p;
  30.811 +	} */ *ap = v;
  30.812 +	struct vattr va;
  30.813 +	int error;
  30.814 +
  30.815 +	if ((error = VOP_GETATTR(ap->a_vp, &va, ap->a_cred, ap->a_p)) != 0)
  30.816 +		return (error);
  30.817 +
  30.818 +	return (vaccess(va.va_type, va.va_mode, va.va_uid, va.va_gid,
  30.819 +	    ap->a_mode, ap->a_cred));
  30.820 +}
  30.821 +
  30.822 +static int
  30.823 +kernfs_default_fileop_getattr(v)
  30.824 +	void *v;
  30.825 +{
  30.826 +	struct vop_getattr_args /* {
  30.827 +		struct vnode *a_vp;
  30.828 +		struct vattr *a_vap;
  30.829 +		struct ucred *a_cred;
  30.830 +		struct proc *a_p;
  30.831 +	} */ *ap = v;
  30.832 +	struct vattr *vap = ap->a_vap;
  30.833 +
  30.834 +	vap->va_nlink = 1;
  30.835 +	vap->va_bytes = vap->va_size = 0;
  30.836 +
  30.837 +	return 0;
  30.838 +}
  30.839 +
  30.840 +int
  30.841 +kernfs_getattr(v)
  30.842 +	void *v;
  30.843 +{
  30.844 +	struct vop_getattr_args /* {
  30.845 +		struct vnode *a_vp;
  30.846 +		struct vattr *a_vap;
  30.847 +		struct ucred *a_cred;
  30.848 +		struct proc *a_p;
  30.849 +	} */ *ap = v;
  30.850 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
  30.851 +	struct kernfs_subdir *ks;
  30.852 +	struct vattr *vap = ap->a_vap;
  30.853 +	int error = 0;
  30.854 +	char strbuf[KSTRING], *buf;
  30.855 +	size_t nread, total;
  30.856 +
  30.857 +	VATTR_NULL(vap);
  30.858 +	vap->va_type = ap->a_vp->v_type;
  30.859 +	vap->va_uid = 0;
  30.860 +	vap->va_gid = 0;
  30.861 +	vap->va_mode = kfs->kfs_mode;
  30.862 +	vap->va_fileid = kfs->kfs_fileno;
  30.863 +	vap->va_flags = 0;
  30.864 +	vap->va_size = 0;
  30.865 +	vap->va_blocksize = DEV_BSIZE;
  30.866 +	/*
  30.867 +	 * Make all times be current TOD, except for the "boottime" node.
  30.868 +	 * Avoid microtime(9), it's slow.
  30.869 +	 * We don't guard the read from time(9) with splclock(9) since we
  30.870 +	 * don't actually need to be THAT sure the access is atomic. 
  30.871 +	 */
  30.872 +	if (kfs->kfs_kt && kfs->kfs_kt->kt_namlen == 8 && 
  30.873 +	    !memcmp(kfs->kfs_kt->kt_name, "boottime", 8)) {
  30.874 +		TIMEVAL_TO_TIMESPEC(&boottime, &vap->va_ctime);
  30.875 +	} else {
  30.876 +		TIMEVAL_TO_TIMESPEC(&time, &vap->va_ctime);
  30.877 +	}
  30.878 +	vap->va_atime = vap->va_mtime = vap->va_ctime;
  30.879 +	vap->va_gen = 0;
  30.880 +	vap->va_flags = 0;
  30.881 +	vap->va_rdev = 0;
  30.882 +	vap->va_bytes = 0;
  30.883 +
  30.884 +	switch (kfs->kfs_type) {
  30.885 +	case KFSkern:
  30.886 +		vap->va_nlink = nkern_dirs;
  30.887 +		vap->va_bytes = vap->va_size = DEV_BSIZE;
  30.888 +		break;
  30.889 +
  30.890 +	case KFSroot:
  30.891 +		vap->va_nlink = 1;
  30.892 +		vap->va_bytes = vap->va_size = DEV_BSIZE;
  30.893 +		break;
  30.894 +
  30.895 +	case KFSsubdir:
  30.896 +		ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
  30.897 +		vap->va_nlink = ks->ks_dirs;
  30.898 +		vap->va_bytes = vap->va_size = DEV_BSIZE;
  30.899 +		break;
  30.900 +
  30.901 +	case KFSnull:
  30.902 +	case KFStime:
  30.903 +	case KFSint:
  30.904 +	case KFSstring:
  30.905 +	case KFShostname:
  30.906 +	case KFSavenrun:
  30.907 +	case KFSdevice:
  30.908 +	case KFSmsgbuf:
  30.909 +#ifdef IPSEC
  30.910 +	case KFSipsecsa:
  30.911 +	case KFSipsecsp:
  30.912 +#endif
  30.913 +		vap->va_nlink = 1;
  30.914 +		total = 0;
  30.915 +		do {
  30.916 +			buf = strbuf;
  30.917 +			error = kernfs_xread(kfs, total, &buf,
  30.918 +			    sizeof(strbuf), &nread);
  30.919 +			total += nread;
  30.920 +		} while (error == 0 && nread != 0);
  30.921 +		vap->va_bytes = vap->va_size = total;
  30.922 +		break;
  30.923 +
  30.924 +#ifdef IPSEC
  30.925 +	case KFSipsecsadir:
  30.926 +	case KFSipsecspdir:
  30.927 +		vap->va_nlink = 2;
  30.928 +		vap->va_bytes = vap->va_size = DEV_BSIZE;
  30.929 +		break;
  30.930 +#endif
  30.931 +
  30.932 +	default:
  30.933 +		error = kernfs_try_fileop(kfs->kfs_type,
  30.934 +		    KERNFS_FILEOP_GETATTR, v, EINVAL);
  30.935 +		break;
  30.936 +	}
  30.937 +
  30.938 +	return (error);
  30.939 +}
  30.940 +
  30.941 +/*ARGSUSED*/
  30.942 +int
  30.943 +kernfs_setattr(v)
  30.944 +	void *v;
  30.945 +{
  30.946 +
  30.947 +	/*
  30.948 +	 * Silently ignore attribute changes.
  30.949 +	 * This allows for open with truncate to have no
  30.950 +	 * effect until some data is written.  I want to
  30.951 +	 * do it this way because all writes are atomic.
  30.952 +	 */
  30.953 +	return (0);
  30.954 +}
  30.955 +
  30.956 +static int
  30.957 +kernfs_default_xread(v)
  30.958 +	void *v;
  30.959 +{
  30.960 +	struct vop_read_args /* {
  30.961 +		struct vnode *a_vp;
  30.962 +		struct uio *a_uio;
  30.963 +		int  a_ioflag;
  30.964 +		struct ucred *a_cred;
  30.965 +	} */ *ap = v;
  30.966 +	struct uio *uio = ap->a_uio;
  30.967 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
  30.968 +	char strbuf[KSTRING], *buf;
  30.969 +	off_t off;
  30.970 +	size_t len;
  30.971 +	int error;
  30.972 +
  30.973 +	if (ap->a_vp->v_type == VDIR)
  30.974 +		return (EOPNOTSUPP);
  30.975 +
  30.976 +	off = uio->uio_offset;
  30.977 +	buf = strbuf;
  30.978 +	if ((error = kernfs_xread(kfs, off, &buf, sizeof(strbuf), &len)) == 0)
  30.979 +		error = uiomove(buf, len, uio);
  30.980 +	return (error);
  30.981 +}
  30.982 +
  30.983 +int
  30.984 +kernfs_read(v)
  30.985 +	void *v;
  30.986 +{
  30.987 +	struct vop_read_args /* {
  30.988 +		struct vnode *a_vp;
  30.989 +		struct uio *a_uio;
  30.990 +		int  a_ioflag;
  30.991 +		struct ucred *a_cred;
  30.992 +	} */ *ap = v;
  30.993 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
  30.994 +
  30.995 +	return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_READ, v, 0);
  30.996 +}
  30.997 +
  30.998 +static int
  30.999 +kernfs_default_xwrite(v)
 30.1000 +	void *v;
 30.1001 +{
 30.1002 +	struct vop_write_args /* {
 30.1003 +		struct vnode *a_vp;
 30.1004 +		struct uio *a_uio;
 30.1005 +		int  a_ioflag;
 30.1006 +		struct ucred *a_cred;
 30.1007 +	} */ *ap = v;
 30.1008 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1009 +	struct uio *uio = ap->a_uio;
 30.1010 +	int error, xlen;
 30.1011 +	char strbuf[KSTRING];
 30.1012 +
 30.1013 +	if (uio->uio_offset != 0)
 30.1014 +		return (EINVAL);
 30.1015 +
 30.1016 +	xlen = min(uio->uio_resid, KSTRING-1);
 30.1017 +	if ((error = uiomove(strbuf, xlen, uio)) != 0)
 30.1018 +		return (error);
 30.1019 +
 30.1020 +	if (uio->uio_resid != 0)
 30.1021 +		return (EIO);
 30.1022 +
 30.1023 +	strbuf[xlen] = '\0';
 30.1024 +	xlen = strlen(strbuf);
 30.1025 +	return (kernfs_xwrite(kfs, strbuf, xlen));
 30.1026 +}
 30.1027 +
 30.1028 +int
 30.1029 +kernfs_write(v)
 30.1030 +	void *v;
 30.1031 +{
 30.1032 +	struct vop_write_args /* {
 30.1033 +		struct vnode *a_vp;
 30.1034 +		struct uio *a_uio;
 30.1035 +		int  a_ioflag;
 30.1036 +		struct ucred *a_cred;
 30.1037 +	} */ *ap = v;
 30.1038 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1039 +
 30.1040 +	return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_WRITE, v, 0);
 30.1041 +}
 30.1042 +
 30.1043 +int
 30.1044 +kernfs_ioctl(v)
 30.1045 +	void *v;
 30.1046 +{
 30.1047 +	struct vop_ioctl_args /* {
 30.1048 +		const struct vnodeop_desc *a_desc;
 30.1049 +		struct vnode *a_vp;
 30.1050 +		u_long a_command;
 30.1051 +		void *a_data;
 30.1052 +		int a_fflag;
 30.1053 +		struct ucred *a_cred;
 30.1054 +		struct proc *a_p;
 30.1055 +	} */ *ap = v;
 30.1056 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1057 +
 30.1058 +	return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_IOCTL, v,
 30.1059 +	    EPASSTHROUGH);
 30.1060 +}
 30.1061 +
 30.1062 +int
 30.1063 +kernfs_mmap(v)
 30.1064 +	void *v;
 30.1065 +{
 30.1066 +	struct vop_mmap_args /* {
 30.1067 +		const struct vnodeop_desc *a_desc;
 30.1068 +		struct vnode *a_vp;
 30.1069 +		int a_fflags;
 30.1070 +		struct ucred *a_cred;
 30.1071 +		struct proc *a_p;
 30.1072 +	} */ *ap = v;
 30.1073 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1074 +
 30.1075 +	return kernfs_try_fileop(kfs->kfs_type, KERNFS_FILEOP_MMAP, v, 0);
 30.1076 +}
 30.1077 +
 30.1078 +static int
 30.1079 +kernfs_setdirentfileno_kt(struct dirent *d, const struct kern_target *kt,
 30.1080 +    u_int32_t value, struct vop_readdir_args *ap)
 30.1081 +{
 30.1082 +	struct kernfs_node *kfs;
 30.1083 +	struct vnode *vp;
 30.1084 +	int error;
 30.1085 +
 30.1086 +	if ((error = kernfs_allocvp(ap->a_vp->v_mount, &vp, kt->kt_tag, kt,
 30.1087 +	    value)) != 0)
 30.1088 +		return error;
 30.1089 +	if (kt->kt_tag == KFSdevice) {
 30.1090 +		struct vattr va;
 30.1091 +		if ((error = VOP_GETATTR(vp, &va, ap->a_cred,
 30.1092 +		    ap->a_uio->uio_segflg == UIO_USERSPACE ?
 30.1093 +		    ap->a_uio->uio_procp : &proc0)) != 0)
 30.1094 +			return (error);
 30.1095 +		d->d_fileno = va.va_fileid;
 30.1096 +	} else {
 30.1097 +		kfs = VTOKERN(vp);
 30.1098 +		d->d_fileno = kfs->kfs_fileno;
 30.1099 +	}
 30.1100 +	vput(vp);
 30.1101 +	return 0;
 30.1102 +}
 30.1103 +
 30.1104 +static int
 30.1105 +kernfs_setdirentfileno(struct dirent *d, off_t entry,
 30.1106 +    struct kernfs_node *thisdir_kfs, const struct kern_target *parent_kt,
 30.1107 +    const struct kern_target *kt, struct vop_readdir_args *ap)
 30.1108 +{
 30.1109 +	const struct kern_target *ikt;
 30.1110 +	int error;
 30.1111 +
 30.1112 +	switch (entry) {
 30.1113 +	case 0:
 30.1114 +		d->d_fileno = thisdir_kfs->kfs_fileno;
 30.1115 +		return 0;
 30.1116 +	case 1:
 30.1117 +		ikt = parent_kt;
 30.1118 +		break;
 30.1119 +	default:
 30.1120 +		ikt = kt;
 30.1121 +		break;
 30.1122 +	}
 30.1123 +	if (ikt != thisdir_kfs->kfs_kt) {
 30.1124 +		if ((error = kernfs_setdirentfileno_kt(d, ikt, 0, ap)) != 0)
 30.1125 +			return error;
 30.1126 +	} else
 30.1127 +		d->d_fileno = thisdir_kfs->kfs_fileno;
 30.1128 +	return 0;
 30.1129 +}
 30.1130 +
 30.1131 +int
 30.1132 +kernfs_readdir(v)
 30.1133 +	void *v;
 30.1134 +{
 30.1135 +	struct vop_readdir_args /* {
 30.1136 +		struct vnode *a_vp;
 30.1137 +		struct uio *a_uio;
 30.1138 +		struct ucred *a_cred;
 30.1139 +		int *a_eofflag;
 30.1140 +		off_t **a_cookies;
 30.1141 +		int a_*ncookies;
 30.1142 +	} */ *ap = v;
 30.1143 +	struct uio *uio = ap->a_uio;
 30.1144 +	struct dirent d;
 30.1145 +	struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1146 +	const struct kern_target *kt;
 30.1147 +	const struct dyn_kern_target *dkt = NULL;
 30.1148 +	const struct kernfs_subdir *ks;
 30.1149 +	off_t i, j;
 30.1150 +	int error;
 30.1151 +	off_t *cookies = NULL;
 30.1152 +	int ncookies = 0, n;
 30.1153 +#ifdef IPSEC
 30.1154 +	struct secasvar *sav, *sav2;
 30.1155 +	struct secpolicy *sp;
 30.1156 +#endif
 30.1157 +
 30.1158 +	if (uio->uio_resid < UIO_MX)
 30.1159 +		return (EINVAL);
 30.1160 +	if (uio->uio_offset < 0)
 30.1161 +		return (EINVAL);
 30.1162 +
 30.1163 +	error = 0;
 30.1164 +	i = uio->uio_offset;
 30.1165 +	memset(&d, 0, sizeof(d));
 30.1166 +	d.d_reclen = UIO_MX;
 30.1167 +	ncookies = uio->uio_resid / UIO_MX;
 30.1168 +
 30.1169 +	switch (kfs->kfs_type) {
 30.1170 +	case KFSkern:
 30.1171 +		if (i >= nkern_targets)
 30.1172 +			return (0);
 30.1173 +
 30.1174 +		if (ap->a_ncookies) {
 30.1175 +			ncookies = min(ncookies, (nkern_targets - i));
 30.1176 +			cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
 30.1177 +			    M_WAITOK);
 30.1178 +			*ap->a_cookies = cookies;
 30.1179 +		}
 30.1180 +
 30.1181 +		n = 0;
 30.1182 +		for (; i < nkern_targets && uio->uio_resid >= UIO_MX; i++) {
 30.1183 +			if (i < static_nkern_targets)
 30.1184 +				kt = &kern_targets[i];
 30.1185 +			else {
 30.1186 +				if (dkt == NULL) {
 30.1187 +					dkt = SIMPLEQ_FIRST(&dyn_kern_targets);
 30.1188 +					for (j = static_nkern_targets; j < i &&
 30.1189 +						     dkt != NULL; j++)
 30.1190 +						dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
 30.1191 +					if (j != i)
 30.1192 +						break;
 30.1193 +				} else {
 30.1194 +					dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
 30.1195 +					if (dkt == NULL)
 30.1196 +						break;
 30.1197 +				}
 30.1198 +				kt = &dkt->dkt_kt;
 30.1199 +			}
 30.1200 +			if (kt->kt_tag == KFSdevice) {
 30.1201 +				dev_t *dp = kt->kt_data;
 30.1202 +				struct vnode *fvp;
 30.1203 +
 30.1204 +				if (*dp == NODEV ||
 30.1205 +				    !vfinddev(*dp, kt->kt_vtype, &fvp))
 30.1206 +					continue;
 30.1207 +			}
 30.1208 +			d.d_namlen = kt->kt_namlen;
 30.1209 +			if ((error = kernfs_setdirentfileno(&d, i, kfs,
 30.1210 +			    &kern_targets[0], kt, ap)) != 0)
 30.1211 +				break;
 30.1212 +			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 30.1213 +			d.d_type = kt->kt_type;
 30.1214 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1215 +				break;
 30.1216 +			if (cookies)
 30.1217 +				*cookies++ = i + 1;
 30.1218 +			n++;
 30.1219 +		}
 30.1220 +		ncookies = n;
 30.1221 +		break;
 30.1222 +
 30.1223 +	case KFSroot:
 30.1224 +		if (i >= 2)
 30.1225 +			return 0;
 30.1226 +
 30.1227 +		if (ap->a_ncookies) {
 30.1228 +			ncookies = min(ncookies, (2 - i));
 30.1229 +			cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
 30.1230 +			    M_WAITOK);
 30.1231 +			*ap->a_cookies = cookies;
 30.1232 +		}
 30.1233 +
 30.1234 +		n = 0;
 30.1235 +		for (; i < 2 && uio->uio_resid >= UIO_MX; i++) {
 30.1236 +			kt = &kern_targets[i];
 30.1237 +			d.d_namlen = kt->kt_namlen;
 30.1238 +			d.d_fileno = KERNFS_FILENO(kt, kt->kt_tag, 0);
 30.1239 +			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 30.1240 +			d.d_type = kt->kt_type;
 30.1241 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1242 +				break;
 30.1243 +			if (cookies)
 30.1244 +				*cookies++ = i + 1;
 30.1245 +			n++;
 30.1246 +		}
 30.1247 +		ncookies = n;
 30.1248 +		break;
 30.1249 +
 30.1250 +	case KFSsubdir:
 30.1251 +		ks = (struct kernfs_subdir *)kfs->kfs_kt->kt_data;
 30.1252 +		if (i >= ks->ks_nentries)
 30.1253 +			return (0);
 30.1254 +
 30.1255 +		if (ap->a_ncookies) {
 30.1256 +			ncookies = min(ncookies, (ks->ks_nentries - i));
 30.1257 +			cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
 30.1258 +			    M_WAITOK);
 30.1259 +			*ap->a_cookies = cookies;
 30.1260 +		}
 30.1261 +
 30.1262 +		dkt = SIMPLEQ_FIRST(&ks->ks_entries);
 30.1263 +		for (j = 0; j < i && dkt != NULL; j++)
 30.1264 +			dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
 30.1265 +		n = 0;
 30.1266 +		for (; i < ks->ks_nentries && uio->uio_resid >= UIO_MX; i++) {
 30.1267 +			if (i < 2)
 30.1268 +				kt = &subdir_targets[i];
 30.1269 +			else {
 30.1270 +				/* check if ks_nentries lied to us */
 30.1271 +				if (dkt == NULL)
 30.1272 +					break;
 30.1273 +				kt = &dkt->dkt_kt;
 30.1274 +				dkt = SIMPLEQ_NEXT(dkt, dkt_queue);
 30.1275 +			}
 30.1276 +			if (kt->kt_tag == KFSdevice) {
 30.1277 +				dev_t *dp = kt->kt_data;
 30.1278 +				struct vnode *fvp;
 30.1279 +
 30.1280 +				if (*dp == NODEV ||
 30.1281 +				    !vfinddev(*dp, kt->kt_vtype, &fvp))
 30.1282 +					continue;
 30.1283 +			}
 30.1284 +			d.d_namlen = kt->kt_namlen;
 30.1285 +			if ((error = kernfs_setdirentfileno(&d, i, kfs,
 30.1286 +			    ks->ks_parent, kt, ap)) != 0)
 30.1287 +				break;
 30.1288 +			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 30.1289 +			d.d_type = kt->kt_type;
 30.1290 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1291 +				break;
 30.1292 +			if (cookies)
 30.1293 +				*cookies++ = i + 1;
 30.1294 +			n++;
 30.1295 +		}
 30.1296 +		ncookies = n;
 30.1297 +		break;
 30.1298 +
 30.1299 +#ifdef IPSEC
 30.1300 +	case KFSipsecsadir:
 30.1301 +		/* count SA in the system */
 30.1302 +		n = 0;
 30.1303 +		TAILQ_FOREACH(sav, &satailq, tailq) {
 30.1304 +			for (sav2 = TAILQ_FIRST(&satailq);
 30.1305 +			    sav2 != sav;
 30.1306 +			    sav2 = TAILQ_NEXT(sav2, tailq)) {
 30.1307 +				if (sav->spi == sav2->spi) {
 30.1308 +					/* multiple SA with same SPI */
 30.1309 +					break;
 30.1310 +				}
 30.1311 +			}
 30.1312 +			if (sav == sav2 || sav->spi != sav2->spi)
 30.1313 +				n++;
 30.1314 +		}
 30.1315 +
 30.1316 +		if (i >= nipsecsa_targets + n)
 30.1317 +			return (0);
 30.1318 +
 30.1319 +		if (ap->a_ncookies) {
 30.1320 +			ncookies = min(ncookies, (n - i));
 30.1321 +			cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
 30.1322 +			    M_WAITOK);
 30.1323 +			*ap->a_cookies = cookies;
 30.1324 +		}
 30.1325 +
 30.1326 +		n = 0;
 30.1327 +		for (; i < nipsecsa_targets && uio->uio_resid >= UIO_MX; i++) {
 30.1328 +			kt = &ipsecsa_targets[i];
 30.1329 +			d.d_namlen = kt->kt_namlen;
 30.1330 +			if ((error = kernfs_setdirentfileno(&d, i, kfs,
 30.1331 +			    &kern_targets[0], kt, ap)) != 0)
 30.1332 +				break;
 30.1333 +			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 30.1334 +			d.d_type = kt->kt_type;
 30.1335 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1336 +				break;
 30.1337 +			if (cookies)
 30.1338 +				*cookies++ = i + 1;
 30.1339 +			n++;
 30.1340 +		}
 30.1341 +		if (error) {
 30.1342 +			ncookies = n;
 30.1343 +			break;
 30.1344 +		}
 30.1345 +
 30.1346 +		TAILQ_FOREACH(sav, &satailq, tailq) {
 30.1347 +			for (sav2 = TAILQ_FIRST(&satailq);
 30.1348 +			    sav2 != sav;
 30.1349 +			    sav2 = TAILQ_NEXT(sav2, tailq)) {
 30.1350 +				if (sav->spi == sav2->spi) {
 30.1351 +					/* multiple SA with same SPI */
 30.1352 +					break;
 30.1353 +				}
 30.1354 +			}
 30.1355 +			if (sav != sav2 && sav->spi == sav2->spi)
 30.1356 +				continue;
 30.1357 +			if (uio->uio_resid < UIO_MX)
 30.1358 +				break;
 30.1359 +			if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsa_kt,
 30.1360 +			    sav->spi, ap)) != 0)
 30.1361 +				break;
 30.1362 +			d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
 30.1363 +			    "%u", ntohl(sav->spi));
 30.1364 +			d.d_type = DT_REG;
 30.1365 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1366 +				break;
 30.1367 +			if (cookies)
 30.1368 +				*cookies++ = i + 1;
 30.1369 +			n++;
 30.1370 +			i++;
 30.1371 +		}
 30.1372 +		ncookies = n;
 30.1373 +		break;
 30.1374 +
 30.1375 +	case KFSipsecspdir:
 30.1376 +		/* count SP in the system */
 30.1377 +		n = 0;
 30.1378 +		TAILQ_FOREACH(sp, &sptailq, tailq)
 30.1379 +			n++;
 30.1380 +
 30.1381 +		if (i >= nipsecsp_targets + n)
 30.1382 +			return (0);
 30.1383 +
 30.1384 +		if (ap->a_ncookies) {
 30.1385 +			ncookies = min(ncookies, (n - i));
 30.1386 +			cookies = malloc(ncookies * sizeof(off_t), M_TEMP,
 30.1387 +			    M_WAITOK);
 30.1388 +			*ap->a_cookies = cookies;
 30.1389 +		}
 30.1390 +
 30.1391 +		n = 0;
 30.1392 +		for (; i < nipsecsp_targets && uio->uio_resid >= UIO_MX; i++) {
 30.1393 +			kt = &ipsecsp_targets[i];
 30.1394 +			d.d_namlen = kt->kt_namlen;
 30.1395 +			if ((error = kernfs_setdirentfileno(&d, i, kfs,
 30.1396 +			    &kern_targets[0], kt, ap)) != 0)
 30.1397 +				break;
 30.1398 +			memcpy(d.d_name, kt->kt_name, kt->kt_namlen + 1);
 30.1399 +			d.d_type = kt->kt_type;
 30.1400 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1401 +				break;
 30.1402 +			if (cookies)
 30.1403 +				*cookies++ = i + 1;
 30.1404 +			n++;
 30.1405 +		}
 30.1406 +		if (error) {
 30.1407 +			ncookies = n;
 30.1408 +			break;
 30.1409 +		}
 30.1410 +
 30.1411 +		TAILQ_FOREACH(sp, &sptailq, tailq) {
 30.1412 +			if (uio->uio_resid < UIO_MX)
 30.1413 +				break;
 30.1414 +			if ((error = kernfs_setdirentfileno_kt(&d, &ipsecsp_kt,
 30.1415 +			    sp->id, ap)) != 0)
 30.1416 +				break;
 30.1417 +			d.d_namlen = snprintf(d.d_name, sizeof(d.d_name),
 30.1418 +			    "%u", sp->id);
 30.1419 +			d.d_type = DT_REG;
 30.1420 +			if ((error = uiomove((caddr_t)&d, UIO_MX, uio)) != 0)
 30.1421 +				break;
 30.1422 +			if (cookies)
 30.1423 +				*cookies++ = i + 1;
 30.1424 +			n++;
 30.1425 +			i++;
 30.1426 +		}
 30.1427 +		ncookies = n;
 30.1428 +		break;
 30.1429 +#endif
 30.1430 +
 30.1431 +	default:
 30.1432 +		error = ENOTDIR;
 30.1433 +		break;
 30.1434 +	}
 30.1435 +
 30.1436 +	if (ap->a_ncookies) {
 30.1437 +		if (error) {
 30.1438 +			if (cookies)
 30.1439 +				free(*ap->a_cookies, M_TEMP);
 30.1440 +			*ap->a_ncookies = 0;
 30.1441 +			*ap->a_cookies = NULL;
 30.1442 +		} else
 30.1443 +			*ap->a_ncookies = ncookies;
 30.1444 +	}
 30.1445 +
 30.1446 +	uio->uio_offset = i;
 30.1447 +	return (error);
 30.1448 +}
 30.1449 +
 30.1450 +int
 30.1451 +kernfs_inactive(v)
 30.1452 +	void *v;
 30.1453 +{
 30.1454 +	struct vop_inactive_args /* {
 30.1455 +		struct vnode *a_vp;
 30.1456 +		struct proc *a_p;
 30.1457 +	} */ *ap = v;
 30.1458 +	struct vnode *vp = ap->a_vp;
 30.1459 +	const struct kernfs_node *kfs = VTOKERN(ap->a_vp);
 30.1460 +#ifdef IPSEC
 30.1461 +	struct mbuf *m;
 30.1462 +	struct secpolicy *sp;
 30.1463 +#endif
 30.1464 +
 30.1465 +	VOP_UNLOCK(vp, 0);
 30.1466 +	switch (kfs->kfs_type) {
 30.1467 +#ifdef IPSEC
 30.1468 +	case KFSipsecsa:
 30.1469 +		m = key_setdumpsa_spi(htonl(kfs->kfs_value));
 30.1470 +		if (m)
 30.1471 +			m_freem(m);
 30.1472 +		else
 30.1473 +			vgone(vp);
 30.1474 +		break;
 30.1475 +	case KFSipsecsp:
 30.1476 +		sp = key_getspbyid(kfs->kfs_value);
 30.1477 +		if (sp)
 30.1478 +			key_freesp(sp);
 30.1479 +		else {
 30.1480 +			/* should never happen as we hold a refcnt */
 30.1481 +			vgone(vp);
 30.1482 +		}
 30.1483 +		break;
 30.1484 +#endif
 30.1485 +	default:
 30.1486 +		break;
 30.1487 +	}
 30.1488 +	return (0);
 30.1489 +}
 30.1490 +
 30.1491 +int
 30.1492 +kernfs_reclaim(v)
 30.1493 +	void *v;
 30.1494 +{
 30.1495 +	struct vop_reclaim_args /* {
 30.1496 +		struct vnode *a_vp;
 30.1497 +	} */ *ap = v;
 30.1498 +
 30.1499 +	return (kernfs_freevp(ap->a_vp));
 30.1500 +}
 30.1501 +
 30.1502 +/*
 30.1503 + * Return POSIX pathconf information applicable to special devices.
 30.1504 + */
 30.1505 +int
 30.1506 +kernfs_pathconf(v)
 30.1507 +	void *v;
 30.1508 +{
 30.1509 +	struct vop_pathconf_args /* {
 30.1510 +		struct vnode *a_vp;
 30.1511 +		int a_name;
 30.1512 +		register_t *a_retval;
 30.1513 +	} */ *ap = v;
 30.1514 +
 30.1515 +	switch (ap->a_name) {
 30.1516 +	case _PC_LINK_MAX:
 30.1517 +		*ap->a_retval = LINK_MAX;
 30.1518 +		return (0);
 30.1519 +	case _PC_MAX_CANON:
 30.1520 +		*ap->a_retval = MAX_CANON;
 30.1521 +		return (0);
 30.1522 +	case _PC_MAX_INPUT:
 30.1523 +		*ap->a_retval = MAX_INPUT;
 30.1524 +		return (0);
 30.1525 +	case _PC_PIPE_BUF:
 30.1526 +		*ap->a_retval = PIPE_BUF;
 30.1527 +		return (0);
 30.1528 +	case _PC_CHOWN_RESTRICTED:
 30.1529 +		*ap->a_retval = 1;
 30.1530 +		return (0);
 30.1531 +	case _PC_VDISABLE:
 30.1532 +		*ap->a_retval = _POSIX_VDISABLE;
 30.1533 +		return (0);
 30.1534 +	case _PC_SYNC_IO:
 30.1535 +		*ap->a_retval = 1;
 30.1536 +		return (0);
 30.1537 +	default:
 30.1538 +		return (EINVAL);
 30.1539 +	}
 30.1540 +	/* NOTREACHED */
 30.1541 +}
 30.1542 +
 30.1543 +/*
 30.1544 + * Print out the contents of a /dev/fd vnode.
 30.1545 + */
 30.1546 +/* ARGSUSED */
 30.1547 +int
 30.1548 +kernfs_print(v)
 30.1549 +	void *v;
 30.1550 +{
 30.1551 +
 30.1552 +	printf("tag VT_KERNFS, kernfs vnode\n");
 30.1553 +	return (0);
 30.1554 +}
 30.1555 +
 30.1556 +int
 30.1557 +kernfs_link(v) 
 30.1558 +	void *v;
 30.1559 +{
 30.1560 +	struct vop_link_args /* {
 30.1561 +		struct vnode *a_dvp;
 30.1562 +		struct vnode *a_vp;  
 30.1563 +		struct componentname *a_cnp;
 30.1564 +	} */ *ap = v;
 30.1565 + 
 30.1566 +	VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
 30.1567 +	vput(ap->a_dvp);
 30.1568 +	return (EROFS);
 30.1569 +}
 30.1570 +
 30.1571 +int
 30.1572 +kernfs_symlink(v)
 30.1573 +	void *v;
 30.1574 +{
 30.1575 +	struct vop_symlink_args /* {
 30.1576 +		struct vnode *a_dvp;
 30.1577 +		struct vnode **a_vpp;
 30.1578 +		struct componentname *a_cnp;
 30.1579 +		struct vattr *a_vap;
 30.1580 +		char *a_target;
 30.1581 +	} */ *ap = v;
 30.1582 +  
 30.1583 +	VOP_ABORTOP(ap->a_dvp, ap->a_cnp);
 30.1584 +	vput(ap->a_dvp);
 30.1585 +	return (EROFS);
 30.1586 +}
    31.1 --- a/tools/blktap/blktaplib.c	Tue Mar 08 22:57:09 2005 +0000
    31.2 +++ b/tools/blktap/blktaplib.c	Wed Mar 09 20:19:23 2005 +0000
    31.3 @@ -22,7 +22,6 @@
    31.4  #include <string.h>
    31.5  #include <unistd.h>
    31.6                                                                       
    31.7 -
    31.8  #define __COMPILING_BLKTAP_LIB
    31.9  #include "blktaplib.h"
   31.10  
   31.11 @@ -37,20 +36,14 @@
   31.12  
   31.13  #define BLKTAP_IOCTL_KICK 1
   31.14  
   31.15 -// this is in the header now
   31.16 -//DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t);
   31.17 -
   31.18  void got_sig_bus();
   31.19  void got_sig_int();
   31.20  
   31.21 -
   31.22  /* in kernel these are opposite, but we are a consumer now. */
   31.23  blkif_back_ring_t  fe_ring; /* slightly counterintuitive ;) */
   31.24  blkif_front_ring_t be_ring; 
   31.25  ctrl_back_ring_t   ctrl_ring;
   31.26  
   31.27 -
   31.28 -
   31.29  unsigned long mmap_vstart = 0;
   31.30  char *blktap_mem;
   31.31  int fd = 0;
   31.32 @@ -60,8 +53,6 @@ int fd = 0;
   31.33  #define BLKTAP_MMAP_PAGES \
   31.34      ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE)
   31.35  #define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES)
   31.36 -
   31.37 -
   31.38      
   31.39  int bad_count = 0;
   31.40  void bad(void)
   31.41 @@ -208,7 +199,7 @@ inline int write_req_to_be_ring(blkif_re
   31.42      blkif_request_t *req_d;
   31.43  
   31.44      //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring);
   31.45 -    req_d = RING_GET_REQUEST(BLKIF_RING, &be_ring, be_ring.req_prod_pvt);
   31.46 +    req_d = RING_GET_REQUEST(&be_ring, be_ring.req_prod_pvt);
   31.47      memcpy(req_d, req, sizeof(blkif_request_t));
   31.48      wmb();
   31.49      be_ring.req_prod_pvt++;
   31.50 @@ -221,7 +212,7 @@ inline int write_rsp_to_fe_ring(blkif_re
   31.51      blkif_response_t *rsp_d;
   31.52  
   31.53      //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring);
   31.54 -    rsp_d = RING_GET_RESPONSE(BLKIF_RING, &fe_ring, fe_ring.rsp_prod_pvt);
   31.55 +    rsp_d = RING_GET_RESPONSE(&fe_ring, fe_ring.rsp_prod_pvt);
   31.56      memcpy(rsp_d, rsp, sizeof(blkif_response_t));
   31.57      wmb();
   31.58      fe_ring.rsp_prod_pvt++;
   31.59 @@ -251,7 +242,7 @@ void blktap_inject_response(blkif_respon
   31.60  {
   31.61      apply_rsp_hooks(rsp);
   31.62      write_rsp_to_fe_ring(rsp);
   31.63 -    RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
   31.64 +    RING_PUSH_RESPONSES(&fe_ring);
   31.65      ioctl(fd, BLKTAP_IOCTL_KICK_FE);
   31.66  }
   31.67  
   31.68 @@ -371,13 +362,13 @@ int blktap_listen(void)
   31.69  
   31.70      /* assign the rings to the mapped memory */
   31.71      csring = (ctrl_sring_t *)blktap_mem;
   31.72 -    BACK_RING_INIT(CTRL_RING, &ctrl_ring, csring);
   31.73 +    BACK_RING_INIT(&ctrl_ring, csring);
   31.74      
   31.75      sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE);
   31.76 -    FRONT_RING_INIT(BLKIF_RING, &be_ring, sring);
   31.77 +    FRONT_RING_INIT(&be_ring, sring);
   31.78      
   31.79      sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE));
   31.80 -    BACK_RING_INIT(BLKIF_RING, &fe_ring, sring);
   31.81 +    BACK_RING_INIT(&fe_ring, sring);
   31.82  
   31.83      mmap_vstart = (unsigned long)blktap_mem + (BLKTAP_RING_PAGES << PAGE_SHIFT);
   31.84      
   31.85 @@ -431,7 +422,7 @@ int blktap_listen(void)
   31.86              rmb();
   31.87              for (i = ctrl_ring.req_cons; i < rp; i++)
   31.88              {
   31.89 -                msg = RING_GET_REQUEST(CTRL_RING, &ctrl_ring, i);
   31.90 +                msg = RING_GET_REQUEST(&ctrl_ring, i);
   31.91  
   31.92                  ctrl_hook = ctrl_hook_chain;
   31.93                  while (ctrl_hook != NULL)
   31.94 @@ -444,18 +435,18 @@ int blktap_listen(void)
   31.95              }
   31.96              /* Using this as a unidirectional ring. */
   31.97              ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i;
   31.98 -            RING_PUSH_RESPONSES(CTRL_RING, &ctrl_ring);
   31.99 +            RING_PUSH_RESPONSES(&ctrl_ring);
  31.100              
  31.101              /* empty the fe_ring */
  31.102              notify_fe = 0;
  31.103 -            notify_be = RING_HAS_UNCONSUMED_REQUESTS(BLKIF_RING, &fe_ring);
  31.104 +            notify_be = RING_HAS_UNCONSUMED_REQUESTS(&fe_ring);
  31.105              rp = fe_ring.sring->req_prod;
  31.106              rmb();
  31.107              for (i = fe_ring.req_cons; i != rp; i++)
  31.108              {
  31.109                  int done = 0; /* stop forwarding this request */
  31.110  
  31.111 -                req = RING_GET_REQUEST(BLKIF_RING, &fe_ring, i);
  31.112 +                req = RING_GET_REQUEST(&fe_ring, i);
  31.113  
  31.114                  DPRINTF("copying an fe request\n");
  31.115  
  31.116 @@ -489,13 +480,13 @@ int blktap_listen(void)
  31.117              fe_ring.req_cons = i;
  31.118  
  31.119              /* empty the be_ring */
  31.120 -            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(BLKIF_RING, &be_ring);
  31.121 +            notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(&be_ring);
  31.122              rp = be_ring.sring->rsp_prod;
  31.123              rmb();
  31.124              for (i = be_ring.rsp_cons; i != rp; i++)
  31.125              {
  31.126  
  31.127 -                rsp = RING_GET_RESPONSE(BLKIF_RING, &be_ring, i);
  31.128 +                rsp = RING_GET_RESPONSE(&be_ring, i);
  31.129  
  31.130                  DPRINTF("copying a be request\n");
  31.131  
  31.132 @@ -508,13 +499,13 @@ int blktap_listen(void)
  31.133  
  31.134              if (notify_be) {
  31.135                  DPRINTF("notifying be\n");
  31.136 -                RING_PUSH_REQUESTS(BLKIF_RING, &be_ring);
  31.137 +                RING_PUSH_REQUESTS(&be_ring);
  31.138                  ioctl(fd, BLKTAP_IOCTL_KICK_BE);
  31.139              }
  31.140  
  31.141              if (notify_fe) {
  31.142                  DPRINTF("notifying fe\n");
  31.143 -                RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring);
  31.144 +                RING_PUSH_RESPONSES(&fe_ring);
  31.145                  ioctl(fd, BLKTAP_IOCTL_KICK_FE);
  31.146              }
  31.147          }        
    32.1 --- a/tools/libxc/Makefile	Tue Mar 08 22:57:09 2005 +0000
    32.2 +++ b/tools/libxc/Makefile	Wed Mar 09 20:19:23 2005 +0000
    32.3 @@ -20,6 +20,7 @@ SRCS     += xc_atropos.c
    32.4  SRCS     += xc_bvtsched.c
    32.5  SRCS     += xc_domain.c
    32.6  SRCS     += xc_evtchn.c
    32.7 +SRCS     += xc_gnttab.c
    32.8  SRCS     += xc_io.c
    32.9  SRCS     += xc_linux_build.c
   32.10  SRCS     += xc_plan9_build.c
    33.1 --- a/tools/libxc/xc.h	Tue Mar 08 22:57:09 2005 +0000
    33.2 +++ b/tools/libxc/xc.h	Wed Mar 09 20:19:23 2005 +0000
    33.3 @@ -411,4 +411,61 @@ void *xc_map_foreign_batch(int xc_handle
    33.4  int xc_get_pfn_list(int xc_handle, u32 domid, unsigned long *pfn_buf, 
    33.5  		    unsigned long max_pfns);
    33.6  
    33.7 +/*\
    33.8 + *  GRANT TABLE FUNCTIONS
    33.9 +\*/ 
   33.10 +
   33.11 +/**
   33.12 + * This function opens a handle to the more restricted grant table hypervisor
   33.13 + * interface. This may be used where the standard interface is not
   33.14 + * available because the domain is not privileged.
   33.15 + * This function can  be called multiple times within a single process.
   33.16 + * Multiple processes can have an open hypervisor interface at the same time.
   33.17 + *
   33.18 + * Each call to this function should have a corresponding call to
   33.19 + * xc_grant_interface_close().
   33.20 + *
   33.21 + * This function can fail if a Xen-enabled kernel is not currently running.
   33.22 + *
   33.23 + * @return a handle to the hypervisor grant table interface or -1 on failure
   33.24 + */
   33.25 +int xc_grant_interface_open(void);
   33.26 +
   33.27 +/**
   33.28 + * This function closes an open grant table hypervisor interface.
   33.29 + *
   33.30 + * This function can fail if the handle does not represent an open interface or
   33.31 + * if there were problems closing the interface.
   33.32 + *
   33.33 + * @parm xc_handle a handle to an open grant table hypervisor interface
   33.34 + * @return 0 on success, -1 otherwise.
   33.35 + */
   33.36 +int xc_grant_interface_close(int xc_handle);
   33.37 +
   33.38 +int xc_gnttab_map_grant_ref(int         xc_handle,
   33.39 +                            memory_t    host_virt_addr,
   33.40 +                            u32         dom,
   33.41 +                            u16         ref,
   33.42 +                            u16         flags,
   33.43 +                            s16        *handle,
   33.44 +                            memory_t   *dev_bus_addr);
   33.45 +
   33.46 +int xc_gnttab_unmap_grant_ref(int       xc_handle,
   33.47 +                              memory_t  host_virt_addr,
   33.48 +                              memory_t  dev_bus_addr,
   33.49 +                              u16       handle,
   33.50 +                              s16      *status);
   33.51 +
   33.52 +int xc_gnttab_setup_table(int        xc_handle,
   33.53 +                          u32        dom,
   33.54 +                          u16        nr_frames,
   33.55 +                          s16       *status,
   33.56 +                          memory_t **frame_list);
   33.57 +
   33.58 +/* Grant debug builds only: */
   33.59 +int xc_gnttab_dump_table(int        xc_handle,
   33.60 +                         u32        dom,
   33.61 +                         s16       *status);
   33.62 +
   33.63 +
   33.64  #endif /* __XC_H__ */
    34.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    34.2 +++ b/tools/libxc/xc_gnttab.c	Wed Mar 09 20:19:23 2005 +0000
    34.3 @@ -0,0 +1,143 @@
    34.4 +/******************************************************************************
    34.5 + * xc_gnttab.c
    34.6 + * 
    34.7 + * API for manipulating and accessing grant tables
    34.8 + * 
    34.9 + * Copyright (c) 2005 Christopher Clark
   34.10 + * based on xc_evtchn.c Copyright (c) 2004, K A Fraser.
   34.11 + */
   34.12 +
   34.13 +#include "xc_private.h"
   34.14 +#include "xen/grant_table.h"
   34.15 +
   34.16 +static int
   34.17 +do_gnttab_op( int xc_handle,
   34.18 +              unsigned long cmd,
   34.19 +              gnttab_op_t *op,
   34.20 +              unsigned long count )
   34.21 +{
   34.22 +    int ret = -1;
   34.23 +    privcmd_hypercall_t hypercall;
   34.24 +
   34.25 +    hypercall.op     = __HYPERVISOR_grant_table_op;
   34.26 +    hypercall.arg[0] = cmd;
   34.27 +    hypercall.arg[1] = (unsigned long)(op);
   34.28 +    hypercall.arg[2] = count;
   34.29 +
   34.30 +    if ( mlock(op, sizeof(*op)) != 0 )
   34.31 +    {
   34.32 +        PERROR("Could not lock memory for Xen hypercall");
   34.33 +        goto out1;
   34.34 +    }
   34.35 +
   34.36 +    if ( (ret = do_xen_hypercall(xc_handle, &hypercall)) < 0 )
   34.37 +    {
   34.38 +        printf("do_gnttab_op: hypercall returned error %d\n", ret);
   34.39 +        goto out2;
   34.40 +    }
   34.41 +
   34.42 + out2: (void)munlock(op, sizeof(*op));
   34.43 + out1: return ret;
   34.44 +}
   34.45 +
   34.46 +
   34.47 +int xc_gnttab_map_grant_ref(int         xc_handle,
   34.48 +                            memory_t    host_virt_addr,
   34.49 +                            u32         dom,
   34.50 +                            u16         ref,
   34.51 +                            u16         flags,
   34.52 +                            s16        *handle,
   34.53 +                            memory_t   *dev_bus_addr)
   34.54 +{
   34.55 +    gnttab_op_t     op;
   34.56 +    int             rc;
   34.57 +
   34.58 +    op.u.map_grant_ref.host_virt_addr = host_virt_addr;
   34.59 +    op.u.map_grant_ref.dom            = (domid_t)dom;
   34.60 +    op.u.map_grant_ref.ref            = ref;
   34.61 +    op.u.map_grant_ref.flags          = flags;
   34.62 + 
   34.63 +    if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_map_grant_ref, &op, 1)) == 0 )
   34.64 +    {
   34.65 +        *handle         = op.u.map_grant_ref.handle;
   34.66 +        *dev_bus_addr   = op.u.map_grant_ref.dev_bus_addr;
   34.67 +    }
   34.68 +
   34.69 +    return rc;
   34.70 +}
   34.71 +
   34.72 +
   34.73 +int xc_gnttab_unmap_grant_ref(int       xc_handle,
   34.74 +                              memory_t  host_virt_addr,
   34.75 +                              memory_t  dev_bus_addr,
   34.76 +                              u16       handle,
   34.77 +                              s16      *status)
   34.78 +{
   34.79 +    gnttab_op_t     op;
   34.80 +    int             rc;
   34.81 +
   34.82 +    op.u.unmap_grant_ref.host_virt_addr = host_virt_addr;
   34.83 +    op.u.unmap_grant_ref.dev_bus_addr   = dev_bus_addr;
   34.84 +    op.u.unmap_grant_ref.handle         = handle;
   34.85 + 
   34.86 +    if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_unmap_grant_ref, &op, 1)) == 0 )
   34.87 +        *status = op.u.unmap_grant_ref.status;
   34.88 +
   34.89 +    return rc;
   34.90 +}
   34.91 +
   34.92 +int xc_gnttab_setup_table(int        xc_handle,
   34.93 +                          u32        dom,
   34.94 +                          u16        nr_frames,
   34.95 +                          s16       *status,
   34.96 +                          memory_t **frame_list)
   34.97 +{
   34.98 +    gnttab_op_t     op;
   34.99 +    int             rc;
  34.100 +    int             i;
  34.101 +
  34.102 +    op.u.setup_table.dom        = (domid_t)dom;
  34.103 +    op.u.setup_table.nr_frames  = nr_frames;
  34.104 + 
  34.105 +    if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_setup_table, &op, 1)) == 0 )
  34.106 +    {
  34.107 +        *status = op.u.setup_table.status;
  34.108 +        for ( i = 0; i < nr_frames; i++ )
  34.109 +        {
  34.110 +            (*frame_list)[i] = op.u.setup_table.frame_list[i];
  34.111 +        }
  34.112 +    }
  34.113 +
  34.114 +    return rc;
  34.115 +}
  34.116 +
  34.117 +int xc_gnttab_dump_table(int        xc_handle,
  34.118 +                         u32        dom,
  34.119 +                         s16       *status)
  34.120 +{
  34.121 +    gnttab_op_t     op;
  34.122 +    int             rc;
  34.123 +
  34.124 +    op.u.dump_table.dom = (domid_t)dom;
  34.125 + 
  34.126 +    printf("xc_gnttab_dump_table: domain %d\n", dom);
  34.127 +
  34.128 +    if ( (rc = do_gnttab_op(xc_handle, GNTTABOP_dump_table, &op, 1)) == 0 )
  34.129 +        *status = op.u.dump_table.status;
  34.130 +
  34.131 +    return rc;
  34.132 +}
  34.133 +
  34.134 +int xc_grant_interface_open(void)
  34.135 +{
  34.136 +    int fd = open("/proc/xen/grant", O_RDWR);
  34.137 +    if ( fd == -1 )
  34.138 +        PERROR("Could not obtain handle on grant command interface");
  34.139 +    return fd;
  34.140 +
  34.141 +}
  34.142 +
  34.143 +int xc_grant_interface_close(int xc_grant_handle)
  34.144 +{
  34.145 +    return close(xc_grant_handle);
  34.146 +}
    35.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    35.2 +++ b/tools/tests/Makefile	Wed Mar 09 20:19:23 2005 +0000
    35.3 @@ -0,0 +1,22 @@
    35.4 +
    35.5 +XEN_ROOT=../..
    35.6 +include $(XEN_ROOT)/tools/Rules.mk
    35.7 +
    35.8 +TARGET := test_x86_emulator
    35.9 +
   35.10 +CC     := gcc
   35.11 +CFLAGS := -O2 -Wall -Werror -D__TEST_HARNESS__
   35.12 +
   35.13 +$(TARGET): x86_emulate.o test_x86_emulator.o
   35.14 +	$(CC) -o $@ $^
   35.15 +
   35.16 +clean:
   35.17 +	rm -rf $(TARGET) *.o *~ core
   35.18 +
   35.19 +install:
   35.20 +
   35.21 +x86_emulate.o: $(XEN_ROOT)/xen/arch/x86/x86_emulate.c
   35.22 +	$(CC) $(CFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
   35.23 +
   35.24 +%.o: %.c
   35.25 +	$(CC) $(CFLAGS) -I$(XEN_ROOT)/xen/include -c -o $@ $<
    36.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    36.2 +++ b/tools/tests/test_x86_emulator.c	Wed Mar 09 20:19:23 2005 +0000
    36.3 @@ -0,0 +1,168 @@
    36.4 +
    36.5 +#include <stdio.h>
    36.6 +#include <stdlib.h>
    36.7 +#include <string.h>
    36.8 +#include <stdint.h>
    36.9 +typedef uint8_t            u8;
   36.10 +typedef uint16_t           u16;
   36.11 +typedef uint32_t           u32;
   36.12 +typedef uint64_t           u64;
   36.13 +typedef int8_t             s8;
   36.14 +typedef int16_t            s16;
   36.15 +typedef int32_t            s32;
   36.16 +typedef int64_t            s64;
   36.17 +#include <public/xen.h>
   36.18 +#include <asm-x86/x86_emulate.h>
   36.19 +
   36.20 +static int read_any(
   36.21 +    unsigned long addr,
   36.22 +    unsigned long *val,
   36.23 +    unsigned int bytes)
   36.24 +{
   36.25 +    switch ( bytes )
   36.26 +    {
   36.27 +    case 1: *val = *(u8 *)addr; break;
   36.28 +    case 2: *val = *(u16 *)addr; break;
   36.29 +    case 4: *val = *(u32 *)addr; break;
   36.30 +    case 8: *val = *(unsigned long *)addr; break;
   36.31 +    }
   36.32 +    return 0;
   36.33 +}
   36.34 +
   36.35 +static int write_any(
   36.36 +    unsigned long addr,
   36.37 +    unsigned long val,
   36.38 +    unsigned int bytes)
   36.39 +{
   36.40 +    switch ( bytes )
   36.41 +    {
   36.42 +    case 1: *(u8 *)addr = (u8)val; break;
   36.43 +    case 2: *(u16 *)addr = (u16)val; break;
   36.44 +    case 4: *(u32 *)addr = (u32)val; break;
   36.45 +    case 8: *(unsigned long *)addr = val; break;
   36.46 +    }
   36.47 +    return 0;
   36.48 +}
   36.49 +
   36.50 +static int cmpxchg_any(
   36.51 +    unsigned long addr,
   36.52 +    unsigned long old,
   36.53 +    unsigned long new,
   36.54 +    unsigned long *seen,
   36.55 +    unsigned int bytes)
   36.56 +{
   36.57 +    *seen = old;
   36.58 +    switch ( bytes )
   36.59 +    {
   36.60 +    case 1: *(u8 *)addr = (u8)new; break;
   36.61 +    case 2: *(u16 *)addr = (u16)new; break;
   36.62 +    case 4: *(u32 *)addr = (u32)new; break;
   36.63 +    case 8: *(unsigned long *)addr = new; break;
   36.64 +    }
   36.65 +    return 0;
   36.66 +}
   36.67 +
   36.68 +static struct x86_mem_emulator emulops = {
   36.69 +    read_any, write_any, read_any, write_any, cmpxchg_any
   36.70 +};
   36.71 +
   36.72 +int main(int argc, char **argv)
   36.73 +{
   36.74 +    struct xen_regs regs;
   36.75 +    char instr[] = { 0x01, 0x08 }; /* add %ecx,(%eax) */
   36.76 +    unsigned int res = 0x7FFFFFFF;
   36.77 +    unsigned long cr2;
   36.78 +    int rc;
   36.79 +
   36.80 +    printf("%-40s", "Testing addl %%ecx,(%%eax)...");
   36.81 +    instr[0] = 0x01; instr[1] = 0x08;
   36.82 +    regs.eflags = 0x200;
   36.83 +    regs.eip    = (unsigned long)&instr[0];
   36.84 +    regs.ecx    = 0x12345678;
   36.85 +    cr2         = (unsigned long)&res;
   36.86 +    res         = 0x7FFFFFFF;
   36.87 +    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
   36.88 +    if ( (rc != 0) || 
   36.89 +         (res != 0x92345677) || 
   36.90 +         (regs.eflags != 0xa94) ||
   36.91 +         (regs.eip != (unsigned long)&instr[2]) )
   36.92 +        goto fail;
   36.93 +    printf("okay\n");
   36.94 +
   36.95 +    printf("%-40s", "Testing xorl (%%eax),%%ecx...");
   36.96 +    instr[0] = 0x33; instr[1] = 0x08;
   36.97 +    regs.eflags = 0x200;
   36.98 +    regs.eip    = (unsigned long)&instr[0];
   36.99 +#ifdef __x86_64__
  36.100 +    regs.ecx    = 0xFFFFFFFF12345678UL;
  36.101 +#else
  36.102 +    regs.ecx    = 0x12345678UL;
  36.103 +#endif
  36.104 +    cr2         = (unsigned long)&res;
  36.105 +    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);
  36.106 +    if ( (rc != 0) || 
  36.107 +         (res != 0x92345677) || 
  36.108 +         (regs.ecx != 0x8000000FUL) ||
  36.109 +         (regs.eip != (unsigned long)&instr[2]) )
  36.110 +        goto fail;
  36.111 +    printf("okay\n");
  36.112 +
  36.113 +    printf("%-40s", "Testing lock cmpxchgb %%cl,(%%eax)...");
  36.114 +    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb0; instr[3] = 0x08;
  36.115 +    regs.eflags = 0x200;
  36.116 +    regs.eip    = (unsigned long)&instr[0];
  36.117 +    regs.eax    = 0x92345677UL;
  36.118 +    regs.ecx    = 0xAA;
  36.119 +    cr2         = (unsigned long)&res;
  36.120 +    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
  36.121 +    if ( (rc != 0) || 
  36.122 +         (res != 0x923456AA) || 
  36.123 +         (regs.eflags != 0x244) ||
  36.124 +         (regs.eax != 0x92345677UL) ||
  36.125 +         (regs.eip != (unsigned long)&instr[4]) )
  36.126 +        goto fail;
  36.127 +    printf("okay\n");
  36.128 +
  36.129 +    printf("%-40s", "Testing lock cmpxchgl %%ecx,(%%eax)...");
  36.130 +    instr[0] = 0xf0; instr[1] = 0x0f; instr[2] = 0xb1; instr[3] = 0x08;
  36.131 +    regs.eflags = 0x200;
  36.132 +    regs.eip    = (unsigned long)&instr[0];
  36.133 +    regs.eax    = 0x923456AAUL;
  36.134 +    regs.ecx    = 0xDDEEFF00L;
  36.135 +    cr2         = (unsigned long)&res;
  36.136 +    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
  36.137 +    if ( (rc != 0) || 
  36.138 +         (res != 0xDDEEFF00) || 
  36.139 +         (regs.eflags != 0x244) ||
  36.140 +         (regs.eax != 0x923456AAUL) ||
  36.141 +         (regs.eip != (unsigned long)&instr[4]) )
  36.142 +        goto fail;
  36.143 +    printf("okay\n");
  36.144 +
  36.145 +    printf("%-40s", "Testing rep movsw...");
  36.146 +    instr[0] = 0xf3; instr[1] = 0x66; instr[2] = 0xa5;
  36.147 +    res         = 0x22334455;
  36.148 +    regs.eflags = 0x200;
  36.149 +    regs.ecx    = 23;
  36.150 +    regs.eip    = (unsigned long)&instr[0];
  36.151 +    regs.esi    = (unsigned long)&res + 0;
  36.152 +    regs.edi    = (unsigned long)&res + 2;
  36.153 +    regs.error_code = 0; /* read fault */
  36.154 +    cr2         = regs.esi;
  36.155 +    rc = x86_emulate_memop(&regs, cr2, &emulops, 4);    
  36.156 +    if ( (rc != 0) || 
  36.157 +         (res != 0x44554455) ||
  36.158 +         (regs.eflags != 0x200) ||
  36.159 +         (regs.ecx != 22) || 
  36.160 +         (regs.esi != ((unsigned long)&res + 2)) ||
  36.161 +         (regs.edi != ((unsigned long)&res + 4)) ||
  36.162 +         (regs.eip != (unsigned long)&instr[0]) )
  36.163 +        goto fail;
  36.164 +    printf("okay\n");
  36.165 +
  36.166 +    return 0;
  36.167 +
  36.168 + fail:
  36.169 +    printf("failed!\n");
  36.170 +    return 1;
  36.171 +}
    37.1 --- a/tools/x2d2/Makefile	Tue Mar 08 22:57:09 2005 +0000
    37.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.3 @@ -1,22 +0,0 @@
    37.4 -XEN_ROOT=../..
    37.5 -include $(XEN_ROOT)/tools/Rules.mk
    37.6 -
    37.7 -CC       = gcc
    37.8 -CFLAGS  += -Wall -Werror -O3 -fno-strict-aliasing
    37.9 -
   37.10 -CFLAGS  += -I $(XEN_XC)
   37.11 -CFLAGS  += -I $(XEN_LIBXC)
   37.12 -CFLAGS  += -I $(XEN_LIBXUTIL)
   37.13 -
   37.14 -HDRS     = $(wildcard *.h)
   37.15 -OBJS     = $(patsubst %.c,%.o,$(wildcard *.c))
   37.16 -
   37.17 -BIN      = minixend
   37.18 -
   37.19 -all: $(BIN)
   37.20 -
   37.21 -clean:
   37.22 -	$(RM) *.a *.so *.o *.rpm $(BIN)
   37.23 -
   37.24 -$(BIN): $(OBJS)
   37.25 -	$(CC) $(CFLAGS) $^ -o $@ -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -lxc -lxutil -lpthread
    38.1 --- a/tools/x2d2/cntrl_con.c	Tue Mar 08 22:57:09 2005 +0000
    38.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.3 @@ -1,457 +0,0 @@
    38.4 -#define _GNU_SOURCE
    38.5 -
    38.6 -#include <sys/types.h>
    38.7 -#include <sys/ioctl.h>
    38.8 -#include <sys/socket.h>
    38.9 -#include <assert.h>
   38.10 -#include <ctype.h>
   38.11 -#include <err.h>
   38.12 -#include <errno.h>
   38.13 -#include <pthread.h>
   38.14 -#include <netinet/in.h>
   38.15 -#include <stdarg.h>
   38.16 -#include <stddef.h>
   38.17 -#include <stdio.h>
   38.18 -#include <stdlib.h>
   38.19 -#include <string.h>
   38.20 -#include <unistd.h>
   38.21 -
   38.22 -#include "minixend.h"
   38.23 -
   38.24 -struct command {
   38.25 -	const char *name;
   38.26 -	void (*func)(struct open_connection *oc, const struct command *c,
   38.27 -		     const char *, const char *);
   38.28 -};
   38.29 -
   38.30 -static void
   38.31 -domain_created(const char *name, int mem_kb, int domid)
   38.32 -{
   38.33 -	struct domain *d;
   38.34 -	d = xmalloc(sizeof(*d));
   38.35 -	d->domid = domid;
   38.36 -	d->name = xstrdup(name);
   38.37 -	d->mem_kb = mem_kb;
   38.38 -	d->state = DOM_STATE_CREATED;
   38.39 -	d->control_evtchn = -1; /* Not connected yet. */
   38.40 -
   38.41 -	memcpy(d->netif_mac, "\xaa\x00\x00\x02\x00\x00", 6);
   38.42 -	d->netif_mac[5] = d->domid;
   38.43 -
   38.44 -	pthread_mutex_init(&d->mux, NULL);
   38.45 -	pthread_cond_init(&d->cond, NULL);
   38.46 -	pthread_create(&d->thread, NULL, domain_thread_func, d);
   38.47 -
   38.48 -	list_insert_after(&d->domain_list, &head_domain);
   38.49 -}
   38.50 -
   38.51 -static struct domain *
   38.52 -find_domain(int domain_id)
   38.53 -{
   38.54 -	struct domain *d;
   38.55 -
   38.56 -	foreach_domain(d) {
   38.57 -		if (d->domid == domain_id)
   38.58 -			return d;
   38.59 -	}
   38.60 -	return NULL;
   38.61 -}
   38.62 -
   38.63 -static int
   38.64 -free_event_port(struct domain *d, int port)
   38.65 -{
   38.66 -	if (d == NULL)
   38.67 -		return xc_evtchn_close(xc_handle, DOMID_SELF, port);
   38.68 -	else
   38.69 -		return xc_evtchn_close(xc_handle, d->domid, port);
   38.70 -}
   38.71 -
   38.72 -static char *
   38.73 -readline(struct open_connection *oc)
   38.74 -{
   38.75 -	char *end;
   38.76 -	char *res;
   38.77 -	int line_length;
   38.78 -
   38.79 -	if (oc->state == OC_STATE_ERROR)
   38.80 -		return NULL;
   38.81 -
   38.82 -	end = memchr(oc->buf, '\r', oc->buf_used);
   38.83 -	assert(end != NULL);
   38.84 -	line_length = end - oc->buf;
   38.85 -
   38.86 -	res = xmalloc(line_length + 1);
   38.87 -	memcpy(res, oc->buf, line_length);
   38.88 -	res[line_length] = 0;
   38.89 -	memmove(oc->buf, oc->buf + line_length + 2,
   38.90 -		oc->buf_used - line_length - 2);
   38.91 -
   38.92 -	oc->buf_used -= line_length + 2;
   38.93 -
   38.94 -	if (memchr(oc->buf, '\n', oc->buf_used))
   38.95 -		oc->state = OC_STATE_COMMAND_PENDING;
   38.96 -	else
   38.97 -		oc->state = OC_STATE_CONNECTED;
   38.98 -
   38.99 -	return res;
  38.100 -}
  38.101 -
  38.102 -static unsigned long
  38.103 -find_domain_shared_info_mfn(struct domain *d)
  38.104 -{
  38.105 -	xc_dominfo_t info;
  38.106 -
  38.107 -	xc_domain_getinfo(xc_handle, d->domid, 1, &info);
  38.108 -	return info.shared_info_frame;
  38.109 -}
  38.110 -
  38.111 -static void
  38.112 -send_message(struct open_connection *oc, const char *fmt, ...)
  38.113 -{
  38.114 -	char *buf;
  38.115 -	va_list ap;
  38.116 -	int size;
  38.117 -	int off;
  38.118 -	ssize_t r;
  38.119 -
  38.120 -	if (oc->state == OC_STATE_ERROR)
  38.121 -		return;
  38.122 -
  38.123 -	va_start(ap, fmt);
  38.124 -	size = vasprintf(&buf, fmt, ap);
  38.125 -	va_end(ap);
  38.126 -	if (size < 0)
  38.127 -		err(1, "preparing response to a query");
  38.128 -	assert(buf[0] == 'E' || buf[0] == 'N');
  38.129 -	assert(isdigit(buf[1]));
  38.130 -	assert(isdigit(buf[2]));
  38.131 -	assert(buf[3] == ' ' || buf[3] == '\n');
  38.132 -
  38.133 -	off = 0;
  38.134 -	while (off < size) {
  38.135 -		r = write(oc->fd, buf + off, size - off);
  38.136 -		if (r < 0) {
  38.137 -			warn("sending response to remote");
  38.138 -			oc->state = OC_STATE_ERROR;
  38.139 -			free(buf);
  38.140 -			return;
  38.141 -		}
  38.142 -		off += r;
  38.143 -	}
  38.144 -	free(buf);
  38.145 -}
  38.146 -
  38.147 -static void
  38.148 -default_command_handler(struct open_connection *oc, const struct command *ign,
  38.149 -			const char *buf, const char *args)
  38.150 -{
  38.151 -	warnx("bad command %s", buf);
  38.152 -	send_message(oc, "E00 unknown command %s\n", buf);
  38.153 -}
  38.154 -
  38.155 -static void
  38.156 -create_command_handler(struct open_connection *oc, const struct command *ign,
  38.157 -		       const char *buf, const char *args)
  38.158 -{
  38.159 -	char *name;
  38.160 -	unsigned mem_kb;
  38.161 -	int r;
  38.162 -	u32 domid = -1;
  38.163 -
  38.164 -	r = sscanf(args, "%d %a[^\n]", &mem_kb, &name);
  38.165 -	if (r != 2) {
  38.166 -		send_message(oc, "E01 failed to parse %s\n", args);
  38.167 -		return;
  38.168 -	}
  38.169 -	r = xc_domain_create(xc_handle, mem_kb, -1, 0, &domid);
  38.170 -	if (r < 0) {
  38.171 -		send_message(oc, "E02 creating domain (%s)\n",
  38.172 -			     strerror(errno));
  38.173 -		free(name);
  38.174 -		return;
  38.175 -	}
  38.176 -
  38.177 -	domain_created(name, mem_kb, domid);
  38.178 -
  38.179 -	send_message(oc, "N00 %d\n", domid);
  38.180 -	free(name);
  38.181 -}
  38.182 -
  38.183 -static void
  38.184 -build_command_handler(struct open_connection *oc, const struct command *ign,
  38.185 -		      const char *buf, const char *args)
  38.186 -{
  38.187 -	struct domain *d;
  38.188 -	int domain_id;
  38.189 -	char *image, *cmdline;
  38.190 -	int event_ports[2];
  38.191 -	int r;
  38.192 -
  38.193 -	r = sscanf(args, "%d %a[^\t] %a[^\n]", &domain_id,
  38.194 -		   &image, &cmdline);
  38.195 -	if (r != 3) {
  38.196 -		send_message(oc, "E03 failed to parse %s\n", args);
  38.197 -		return;
  38.198 -	}
  38.199 -	d = find_domain(domain_id);
  38.200 -	if (d == NULL) {
  38.201 -		send_message(oc, "E04 unknown domain %d\n", domain_id);
  38.202 -		goto out;
  38.203 -	}
  38.204 -	if (d->state != DOM_STATE_CREATED) {
  38.205 -		send_message(oc, "E05 domain %d in bad state\n", domain_id);
  38.206 -		goto out;
  38.207 -	}
  38.208 -
  38.209 -	r = allocate_event_channel(d, event_ports);
  38.210 -	if (r < 0) {
  38.211 -		send_message(oc, "E06 allocating control event channel: %s\n",
  38.212 -			     strerror(errno));
  38.213 -		goto out;
  38.214 -	}
  38.215 -
  38.216 -	r = xc_linux_build(xc_handle, domain_id, image, NULL, cmdline,
  38.217 -			   event_ports[1], 0);
  38.218 -	if (r < 0) {
  38.219 -		send_message(oc, "E07 building domain: %s\n",
  38.220 -			     strerror(errno));
  38.221 -		free_event_port(NULL, event_ports[0]);
  38.222 -		free_event_port(d, event_ports[1]);
  38.223 -		goto out;
  38.224 -	}
  38.225 -
  38.226 -	if (ioctl(evtchn_fd, EVTCHN_BIND, event_ports[0]) < 0)
  38.227 -		err(1, "binding to event control event channel");
  38.228 -
  38.229 -	d->shared_info_mfn = find_domain_shared_info_mfn(d);
  38.230 -	d->shared_info = map_domain_mem(d, d->shared_info_mfn);
  38.231 -	if (d->shared_info == NULL)
  38.232 -		err(1, "maping domain shared info page at %lx.\n",
  38.233 -		    d->shared_info_mfn);
  38.234 -	d->ctrl_if = (control_if_t *)((unsigned)d->shared_info + 2048);
  38.235 -
  38.236 -	d->control_evtchn = event_ports[0];
  38.237 -	d->state = DOM_STATE_PAUSED;
  38.238 -
  38.239 -	send_message(oc, "N00\n");
  38.240 -
  38.241 - out:
  38.242 -	free(image);
  38.243 -	free(cmdline);
  38.244 -	return;
  38.245 -}
  38.246 -
  38.247 -static void
  38.248 -unpause_command_handler(struct open_connection *oc,
  38.249 -			const struct command *ign,
  38.250 -			const char *buf,
  38.251 -			const char *args)
  38.252 -{
  38.253 -	int domain_id;
  38.254 -	int r;
  38.255 -	struct domain *d;
  38.256 -
  38.257 -	r = sscanf(args, "%d", &domain_id);
  38.258 -	if (r != 1) {
  38.259 -		send_message(oc, "E08 cannot parse %s\n", args);
  38.260 -		return;
  38.261 -	}
  38.262 -	d = find_domain(domain_id);
  38.263 -	if (d == NULL) {
  38.264 -		send_message(oc, "E09 cannot find domain %d\n", domain_id);
  38.265 -		return;
  38.266 -	}
  38.267 -	if (d->state != DOM_STATE_PAUSED) {
  38.268 -		send_message(oc, "E10 domain not paused\n");
  38.269 -		return;
  38.270 -	}
  38.271 -
  38.272 -	r = xc_domain_unpause(xc_handle, d->domid);
  38.273 -	if (r < 0) {
  38.274 -		send_message(oc, "E11 unpausing domain: %s\n",
  38.275 -			     strerror(errno));
  38.276 -		return;
  38.277 -	}
  38.278 -
  38.279 -	d->state = DOM_STATE_RUNNING;
  38.280 -	send_message(oc, "N00\n");
  38.281 -}
  38.282 -
  38.283 -static void
  38.284 -console_command_handler(struct open_connection *oc,
  38.285 -			const struct command *ign,
  38.286 -			const char *buf,
  38.287 -			const char *args)
  38.288 -{
  38.289 -	int domain_id;
  38.290 -	struct domain *d;
  38.291 -	int r;
  38.292 -	struct sockaddr_in name;
  38.293 -	socklen_t namelen;
  38.294 -
  38.295 -	r = sscanf(args, "%d", &domain_id);
  38.296 -	if (r != 1) {
  38.297 -		send_message(oc, "E12 cannot parse %s\n", args);
  38.298 -		return;
  38.299 -	}
  38.300 -	d = find_domain(domain_id);
  38.301 -	if (d == NULL) {
  38.302 -		send_message(oc, "E13 cannot find domain %d\n", domain_id);
  38.303 -		return;
  38.304 -	}
  38.305 -	if (d->cc != NULL) {
  38.306 -		send_message(oc, "E14 console already exists\n");
  38.307 -		return;
  38.308 -	}
  38.309 -
  38.310 -	d->cc = xmalloc(sizeof(*d->cc));
  38.311 -	d->cc->fd = socket(PF_INET, SOCK_STREAM, 0);
  38.312 -	if (d->cc->fd < 0)
  38.313 -		err(1, "creating console socket");
  38.314 -	d->cc->dom = d;
  38.315 -	d->cc->state = CC_STATE_PENDING;
  38.316 -	d->cc->buf_used = 0;
  38.317 -	d->cc->buf_allocated = 0;
  38.318 -	d->cc->buf = NULL;
  38.319 -
  38.320 -	r = listen(d->cc->fd, 1);
  38.321 -	if (r < 0)
  38.322 -		err(1, "listening on console socket");
  38.323 -	namelen = sizeof(name);
  38.324 -	r = getsockname(d->cc->fd, (struct sockaddr *)&name, &namelen);
  38.325 -	if (r < 0)
  38.326 -		err(1, "getting name of console socket");
  38.327 -	assert(name.sin_family == AF_INET);
  38.328 -	assert(namelen == sizeof(name));
  38.329 -	list_insert_after(&d->cc->list, &head_console);
  38.330 -	send_message(oc, "N00 %d\n", ntohs(name.sin_port));
  38.331 -}
  38.332 -
  38.333 -static void
  38.334 -plug_command_handler(struct open_connection *oc,
  38.335 -		     const struct command *ign,
  38.336 -		     const char *buf,
  38.337 -		     const char *args)
  38.338 -{
  38.339 -	unsigned domid;
  38.340 -	int r;
  38.341 -	struct domain *d;
  38.342 -
  38.343 -	r = sscanf(args, "%d", &domid);
  38.344 -	if (r != 1) {
  38.345 -		send_message(oc, "E15 cannot parse %s\n", args);
  38.346 -		return;
  38.347 -	}
  38.348 -	d = find_domain(domid);
  38.349 -	if (d == NULL) {
  38.350 -		send_message(oc, "E16 cannot find domain %d\n", domid);
  38.351 -		return;
  38.352 -	}
  38.353 -
  38.354 -	d->plugged = 1;
  38.355 -	send_message(oc, "N00\n");
  38.356 -	PRINTF(1, "set domain %d plug state to %d\n", d->domid, d->plugged);
  38.357 -}
  38.358 -
  38.359 -static void
  38.360 -destroy_command_handler(struct open_connection *oc,
  38.361 -			const struct command *ign,
  38.362 -			const char *buf,
  38.363 -			const char *args)
  38.364 -{
  38.365 -	unsigned domid;
  38.366 -	int r;
  38.367 -	struct domain *d;
  38.368 -
  38.369 -	r = sscanf(args, "%d", &domid);
  38.370 -	if (r != 1) {
  38.371 -		send_message(oc, "E17 cannot parse %s\n", args);
  38.372 -		return;
  38.373 -	}
  38.374 -	d = find_domain(domid);
  38.375 -	if (d == NULL) {
  38.376 -		send_message(oc, "E18 cannot find domain %d\n", domid);
  38.377 -		return;
  38.378 -	}
  38.379 -
  38.380 -	r = xc_domain_destroy(xc_handle, domid);
  38.381 -	if (r < 0) {
  38.382 -		send_message( oc, "E19 error destroying domain %d: %s\n",
  38.383 -			      domid, strerror(errno) );
  38.384 -		return;
  38.385 -	}
  38.386 -	d->state = DOM_STATE_DEAD;
  38.387 -
  38.388 -	send_message(oc, "N00\n");
  38.389 -}
  38.390 -
  38.391 -static void
  38.392 -list_command_handler(struct open_connection *oc,
  38.393 -		     const struct command *ign,
  38.394 -		     const char *buf,
  38.395 -		     const char *args)
  38.396 -{
  38.397 -	struct domain *d;
  38.398 -	static const char *const state_strings[] = {
  38.399 -		[DOM_STATE_CREATED] = "created",
  38.400 -		[DOM_STATE_PAUSED] = "paused",
  38.401 -		[DOM_STATE_RUNNING] = "running",
  38.402 -		[DOM_STATE_DEAD] = "dead"
  38.403 -	};
  38.404 -
  38.405 -	foreach_domain(d) {
  38.406 -		send_message(oc, "N01 %d %s %d %s\n",
  38.407 -			     d->domid,
  38.408 -			     d->name,
  38.409 -			     d->mem_kb,
  38.410 -			     state_strings[d->state]);
  38.411 -	}
  38.412 -	send_message(oc, "N00\n");
  38.413 -}
  38.414 -
  38.415 -static struct command
  38.416 -default_command = { NULL, default_command_handler };
  38.417 -
  38.418 -static struct command
  38.419 -commands[] = {
  38.420 -	{ "build", build_command_handler },
  38.421 -	{ "console", console_command_handler },
  38.422 -	{ "create", create_command_handler },
  38.423 -	{ "destroy", destroy_command_handler },
  38.424 -	{ "plug", plug_command_handler },
  38.425 -	{ "list", list_command_handler },
  38.426 -	{ "unpause", unpause_command_handler }
  38.427 -};
  38.428 -
  38.429 -void
  38.430 -process_command(struct open_connection *oc)
  38.431 -{
  38.432 -	char *buf, *b;
  38.433 -	int command_len;
  38.434 -	int x;
  38.435 -	struct command *cmd;
  38.436 -
  38.437 -	buf = readline(oc);
  38.438 -	if (buf == NULL)
  38.439 -		return;
  38.440 -	b = strchr(buf, ' ');
  38.441 -	if (b == NULL)
  38.442 -		command_len = strlen(buf);
  38.443 -	else
  38.444 -		command_len = b - buf;
  38.445 -	b = buf + command_len;
  38.446 -	while (b[0] && b[0] == ' ')
  38.447 -		b++;
  38.448 -
  38.449 -	cmd = &default_command;
  38.450 -	for (x = 0; x < sizeof(commands) / sizeof(commands[0]); x++) {
  38.451 -		if (strlen(commands[x].name) == command_len &&
  38.452 -		    memcmp(commands[x].name, buf, command_len) == 0) {
  38.453 -			cmd = &commands[x];
  38.454 -			break;
  38.455 -		}
  38.456 -	}
  38.457 -	cmd->func(oc, cmd, buf, b);
  38.458 -	free(buf);
  38.459 -	return;
  38.460 -}
    39.1 --- a/tools/x2d2/minixend.c	Tue Mar 08 22:57:09 2005 +0000
    39.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.3 @@ -1,939 +0,0 @@
    39.4 -#define _GNU_SOURCE
    39.5 -
    39.6 -#include <sys/types.h>
    39.7 -#include <sys/fcntl.h>
    39.8 -#include <sys/ioctl.h>
    39.9 -#include <sys/mman.h>
   39.10 -#include <sys/select.h>
   39.11 -#include <sys/socket.h>
   39.12 -#include <sys/wait.h>
   39.13 -#include <asm/page.h>
   39.14 -#include <assert.h>
   39.15 -#include <ctype.h>
   39.16 -#include <err.h>
   39.17 -#include <errno.h>
   39.18 -#include <netinet/in.h>
   39.19 -#include <printf.h>
   39.20 -#include <pthread.h>
   39.21 -#include <sched.h>
   39.22 -#include <stdarg.h>
   39.23 -#include <stddef.h>
   39.24 -#include <stdio.h>
   39.25 -#include <stdlib.h>
   39.26 -#include <string.h>
   39.27 -#include <unistd.h>
   39.28 -
   39.29 -#include "minixend.h"
   39.30 -
   39.31 -#define NETWORK_SCRIPT "/etc/xen/scripts/network"
   39.32 -#define VIFBRIDGE_SCRIPT "/etc/xen/scripts/vif-bridge"
   39.33 -
   39.34 -#define MINIXEND_PORT 5123
   39.35 -
   39.36 -#define mb() asm volatile ("" ::: "memory")
   39.37 -
   39.38 -static void send_control_message(int type, int subtype, int id,
   39.39 -				 int size, void *payload,
   39.40 -				 struct domain *target);
   39.41 -
   39.42 -struct list_head
   39.43 -head_domain = LIST_HEAD(&head_domain);
   39.44 -
   39.45 -static struct list_head
   39.46 -head_connection = LIST_HEAD(&head_connection);
   39.47 -
   39.48 -struct list_head
   39.49 -head_console = LIST_HEAD(&head_console);
   39.50 -
   39.51 -#define foreach_open_connection(d)                                          \
   39.52 -foreach_item(d, &head_connection, struct open_connection, connection_list)
   39.53 -
   39.54 -/* Not modified after initial start up */
   39.55 -static struct domain *dom0;
   39.56 -unsigned xc_handle;
   39.57 -static int listen_fd;
   39.58 -int evtchn_fd;
   39.59 -
   39.60 -static struct list_head
   39.61 -head_event_receiver = LIST_HEAD(&head_event_receiver);
   39.62 -
   39.63 -struct event_receiver {
   39.64 -	struct list_head list;
   39.65 -	int id;
   39.66 -	pthread_cond_t cond;
   39.67 -};
   39.68 -
   39.69 -/* We're protected by the dom0 mutex in here */
   39.70 -static struct event_receiver *
   39.71 -allocate_event_receiver(struct domain *d)
   39.72 -{
   39.73 -	static int next_message_id;
   39.74 -	struct event_receiver *work;
   39.75 -
   39.76 -	assert(d == dom0);
   39.77 -	work = xmalloc(sizeof(*work));
   39.78 -	work->id = next_message_id++;
   39.79 -	pthread_cond_init(&work->cond, NULL);
   39.80 -
   39.81 -	list_insert_after(&work->list, &head_event_receiver);
   39.82 -
   39.83 -	return work;
   39.84 -}
   39.85 -
   39.86 -static struct event_receiver *
   39.87 -find_event_receiver(int id)
   39.88 -{
   39.89 -	struct event_receiver *work;
   39.90 -	foreach_item(work, &head_event_receiver, struct event_receiver, list)
   39.91 -		if (work->id == id)
   39.92 -			return work;
   39.93 -	return NULL;
   39.94 -}
   39.95 -
   39.96 -static void
   39.97 -release_event_receiver(struct event_receiver *w)
   39.98 -{
   39.99 -	list_remove(&w->list);
  39.100 -	pthread_cond_destroy(&w->cond);
  39.101 -	free(w);
  39.102 -}
  39.103 -
  39.104 -/* Send a message to dom0, and then block awaiting a reply. */
  39.105 -/* Make sure we don't hold any domain mutexs */
  39.106 -static void
  39.107 -send_dom0_message_block(control_msg_t *msg)
  39.108 -{
  39.109 -	CONTROL_RING_IDX c;
  39.110 -	struct event_receiver *er;
  39.111 -	control_msg_t buf;
  39.112 -
  39.113 -	PRINTF(0, "sending message to dom0 and blocking for reply.\n");
  39.114 -	pthread_mutex_lock(&dom0->mux);
  39.115 -	PRINTF(0, "got dom0 lock.\n");
  39.116 -	er = allocate_event_receiver(dom0);
  39.117 -	PRINTF(0, "allocated evetn receiver.\n");
  39.118 -	msg->id = er->id;
  39.119 -	PRINTF(1, "sending message with id %d\n", msg->id);
  39.120 -	send_control_message(msg->type, msg->subtype,
  39.121 -			     msg->id, msg->length, msg->msg, dom0);
  39.122 -	xc_evtchn_send(xc_handle, dom0->control_evtchn);
  39.123 -
  39.124 -	PRINTF(0, "waiting for reply\n");
  39.125 -	pthread_cond_wait(&er->cond, &dom0->mux);
  39.126 -	PRINTF(0, "got reply\n");
  39.127 -
  39.128 -	c = dom0->rx_resp_cons % CONTROL_RING_SIZE;
  39.129 -	memcpy(&buf, &dom0->ctrl_if->rx_ring[c], sizeof(buf));
  39.130 -	assert(msg->id == buf.id);
  39.131 -	assert(msg->type == buf.type);
  39.132 -	assert(msg->subtype == buf.subtype);
  39.133 -	memcpy(msg, &buf, sizeof(*msg));
  39.134 -	dom0->rx_resp_cons++;
  39.135 -
  39.136 -	release_event_receiver(er);
  39.137 -
  39.138 -	pthread_mutex_unlock(&dom0->mux);
  39.139 -
  39.140 -	PRINTF(1, "got reply to message with id %d\n", msg->id);
  39.141 -}
  39.142 -
  39.143 -/* Allocate an interdomain event channel.  event_ports[0] is the
  39.144 -   local event port number, event_ports[1] the remote */
  39.145 -int
  39.146 -allocate_event_channel(struct domain *d, int event_ports[2])
  39.147 -{
  39.148 -	return xc_evtchn_bind_interdomain(xc_handle, DOMID_SELF,
  39.149 -					  d->domid, event_ports,
  39.150 -					  event_ports+1);
  39.151 -}
  39.152 -
  39.153 -static void
  39.154 -accept_new_connection(void)
  39.155 -{
  39.156 -	int fd;
  39.157 -	struct open_connection *oc;
  39.158 -
  39.159 -	fd = accept(listen_fd, NULL, NULL);
  39.160 -	if (fd < 0)
  39.161 -		return;
  39.162 -	oc = xmalloc(sizeof(*oc));
  39.163 -	oc->fd = fd;
  39.164 -	oc->state = OC_STATE_CONNECTED;
  39.165 -	oc->buf_used = 0;
  39.166 -	oc->buf_allocated = 16;
  39.167 -	oc->buf = xmalloc(oc->buf_allocated);
  39.168 -	list_insert_after(&oc->connection_list, &head_connection);
  39.169 -}
  39.170 -
  39.171 -static void
  39.172 -closedown_connection(struct open_connection *oc)
  39.173 -{
  39.174 -	close(oc->fd);
  39.175 -	assert(oc->buf);
  39.176 -	free(oc->buf);
  39.177 -	free(oc);
  39.178 -}
  39.179 -
  39.180 -#if 0
  39.181 -/* Hackl for the benefit of domain replay */
  39.182 -static unsigned
  39.183 -report_work(u32 *ptr, u32 val, unsigned dom, int do_direct)
  39.184 -{
  39.185 -	if (!do_direct) {
  39.186 -		int rc;
  39.187 -		asm("int $0x80" : "=a" (rc)
  39.188 -		    : "0" (264), "b" (ptr), "c" (val), "d" (dom));
  39.189 -		if (rc < 0) {
  39.190 -			errno = -rc;
  39.191 -			rc = -1;
  39.192 -		}
  39.193 -		return rc;
  39.194 -	} else {
  39.195 -		*ptr = val;
  39.196 -		return 0;
  39.197 -	}
  39.198 -}
  39.199 -#else
  39.200 -static unsigned
  39.201 -report_work(u32 *ptr, u32 val, unsigned dom, int do_direct)
  39.202 -{
  39.203 -	*ptr = val;
  39.204 -	return 0;
  39.205 -}
  39.206 -#endif
  39.207 -
  39.208 -static void
  39.209 -send_control_reply(const control_msg_t *msg, struct domain *d)
  39.210 -{
  39.211 -	CONTROL_RING_IDX c;
  39.212 -
  39.213 -	PRINTF(3,"Control reply, type %d:%d, length %d.\n",
  39.214 -	       msg->type, msg->subtype, msg->length);
  39.215 -	c = d->ctrl_if->tx_resp_prod % CONTROL_RING_SIZE;
  39.216 -	memcpy(&d->ctrl_if->tx_ring[c], msg, sizeof(*msg));
  39.217 -	report_work(&d->ctrl_if->tx_resp_prod,
  39.218 -		    d->ctrl_if->tx_resp_prod + 1,
  39.219 -		    d->domid,
  39.220 -		    0);
  39.221 -	PRINTF(4,"tx_resp_prod %ld.\n", d->ctrl_if->tx_resp_prod);
  39.222 -	assert(!d->plugged);
  39.223 -}
  39.224 -
  39.225 -static void
  39.226 -send_trivial_control_reply(const control_msg_t *msg, struct domain *d)
  39.227 -{
  39.228 -	control_msg_t rep;
  39.229 -
  39.230 -	memset(&rep, 0, sizeof(rep));
  39.231 -	rep.type = msg->type;
  39.232 -	rep.subtype = msg->subtype;
  39.233 -	rep.id = msg->id;
  39.234 -	send_control_reply(&rep, d);
  39.235 -}
  39.236 -
  39.237 -static void
  39.238 -process_console_control_message(control_msg_t *m, struct domain *d)
  39.239 -{
  39.240 -	int off;
  39.241 -	int r;
  39.242 -
  39.243 -	if (m->subtype != CMSG_CONSOLE_DATA) {
  39.244 -		warnx("unknown console message subtype %d",
  39.245 -		      m->subtype);
  39.246 -		return;
  39.247 -	}
  39.248 -
  39.249 -	if (m->length > 60) {
  39.250 -		warnx("truncating message from domain %d (was length %d)",
  39.251 -		      d->domid, m->length);
  39.252 -		m->length = 60;
  39.253 -	}
  39.254 -	PRINTF(1, "DOM%d: %.*s\n", d->domid, m->length, m->msg);
  39.255 -	send_trivial_control_reply(m, d);
  39.256 -
  39.257 -	if (d->cc) {
  39.258 -		PRINTF(5, "Have a console connection.\n");
  39.259 -		if (d->cc->state == CC_STATE_CONNECTED) {
  39.260 -			PRINTF(5, "Console is connected, sending directly.\n");
  39.261 -			for (off = 0; off < m->length; off += r) {
  39.262 -				r = write(d->cc->fd, m->msg + off,
  39.263 -					  m->length - off);
  39.264 -				if (r <= 0) {
  39.265 -					d->cc->state = CC_STATE_ERROR;
  39.266 -					break;
  39.267 -				}
  39.268 -			}
  39.269 -		} else {
  39.270 -			PRINTF(5, "Console not connected, buffering.\n");
  39.271 -			if (d->cc->buf_allocated == 0) {
  39.272 -				d->cc->buf_allocated = 60;
  39.273 -				d->cc->buf = xmalloc(d->cc->buf_allocated);
  39.274 -				d->cc->buf_used = 0;
  39.275 -			} else if (d->cc->buf_allocated <
  39.276 -				   d->cc->buf_used + m->length) {
  39.277 -				d->cc->buf_allocated += 60;
  39.278 -				d->cc->buf = xrealloc(d->cc->buf,
  39.279 -						      d->cc->buf_allocated);
  39.280 -			}
  39.281 -			assert(d->cc->buf_allocated >=
  39.282 -			       d->cc->buf_used + m->length);
  39.283 -			memcpy(d->cc->buf + d->cc->buf_used,
  39.284 -			       m->msg,
  39.285 -			       m->length);
  39.286 -			d->cc->buf_used += m->length;
  39.287 -		}
  39.288 -	}
  39.289 -}
  39.290 -
  39.291 -static void
  39.292 -process_blkif_fe_message(control_msg_t *m, struct domain *d)
  39.293 -{
  39.294 -	switch (m->subtype) {
  39.295 -	default:
  39.296 -		warnx("unknown blkif front end message subtype %d",
  39.297 -		      m->subtype);
  39.298 -	}
  39.299 -}
  39.300 -
  39.301 -static void
  39.302 -send_control_message(int type, int subtype, int id,
  39.303 -		     int size, void *payload, struct domain *target)
  39.304 -{
  39.305 -	control_msg_t msg;
  39.306 -	CONTROL_RING_IDX c;
  39.307 -
  39.308 -	msg.type = type;
  39.309 -	msg.subtype = subtype;
  39.310 -	msg.id = id;
  39.311 -	msg.length = size;
  39.312 -	memcpy(msg.msg, payload, size);
  39.313 -
  39.314 -	c = target->ctrl_if->rx_req_prod % CONTROL_RING_SIZE;
  39.315 -	memcpy(&target->ctrl_if->rx_ring[c], &msg, sizeof(msg));
  39.316 -	report_work(&target->ctrl_if->rx_req_prod,
  39.317 -		    target->ctrl_if->rx_req_prod + 1,
  39.318 -		    target->domid,
  39.319 -		    0);
  39.320 -	assert(!target->plugged);
  39.321 -}
  39.322 -
  39.323 -/* Procedure for bringing a new netif front end up:
  39.324 -
  39.325 -   -- Front end sends us NETIF_FE_DRIVER_STATUS_CHANGED
  39.326 -   -- We send back end NETIF_BE_CREATE, wait for a reply
  39.327 -   -- Back end creates a new netif for us, replies
  39.328 -   -- We send front end a NETIF_FE_DRIVER_STATUS_CHANGED message saying
  39.329 -      how many interfaces we've created for it
  39.330 -   -- We send front end a NETIF_FE_INTERFACE_STATUS_CHANGED for each
  39.331 -      netif created
  39.332 -   -- Front end sends us a NETIF_FE_INTERFACE_CONNECT for each netif
  39.333 -*/
  39.334 -static void
  39.335 -handle_netif_fe_driver_status(control_msg_t *m,
  39.336 -			      netif_fe_driver_status_t *sh,
  39.337 -			      struct domain *d)
  39.338 -{
  39.339 -	netif_fe_interface_status_t if_s;
  39.340 -	control_msg_t be_msg;
  39.341 -	netif_be_create_t *be = (void *)be_msg.msg;
  39.342 -	int r;
  39.343 -
  39.344 -	switch (sh->status) {
  39.345 -	case NETIF_DRIVER_STATUS_UP:
  39.346 -		/* Tell the back end about the new interface coming
  39.347 -		 * up. */
  39.348 -		if (d->created_netif_backend) {
  39.349 -			send_control_reply(m, d);
  39.350 -			send_control_message(CMSG_NETIF_FE,
  39.351 -					     CMSG_NETIF_FE_DRIVER_STATUS,
  39.352 -					     1,
  39.353 -					     sizeof(*sh),
  39.354 -					     sh,
  39.355 -					     d);
  39.356 -			return;
  39.357 -		}
  39.358 -		be_msg.type = CMSG_NETIF_BE;
  39.359 -		be_msg.subtype = CMSG_NETIF_BE_CREATE;
  39.360 -		be_msg.id = d->domid;
  39.361 -		be_msg.length = sizeof(*be);
  39.362 -		be->domid = d->domid;
  39.363 -		be->netif_handle = 0;
  39.364 -		memcpy(be->mac, d->netif_mac, 6);
  39.365 -
  39.366 -		PRINTF(2,"Telling back end about new front end.\n");
  39.367 -		pthread_mutex_unlock(&d->mux);
  39.368 -		send_dom0_message_block(&be_msg);
  39.369 -		pthread_mutex_lock(&d->mux);
  39.370 -		PRINTF(3,"Done.\n");
  39.371 -
  39.372 -		if (be->status != NETIF_BE_STATUS_OKAY) {
  39.373 -			/* Uh oh... can't bring back end
  39.374 -			 * up. */
  39.375 -			send_control_reply(m, d);
  39.376 -			send_control_message(CMSG_NETIF_FE,
  39.377 -					     CMSG_NETIF_FE_DRIVER_STATUS,
  39.378 -					     1,
  39.379 -					     sizeof(*sh),
  39.380 -					     sh,
  39.381 -					     d);
  39.382 -			return;
  39.383 -		}
  39.384 -		d->created_netif_backend = 1;
  39.385 -
  39.386 -		r = our_system(VIFBRIDGE_SCRIPT " up domain=%s mac=%.02x:%.02x:%.02x:%.02x:%.02x:%.02x vif=vif%d.0 bridge=xen-br0",
  39.387 -			       d->name,
  39.388 -			       d->netif_mac[0],
  39.389 -			       d->netif_mac[1],
  39.390 -			       d->netif_mac[2],
  39.391 -			       d->netif_mac[3],
  39.392 -			       d->netif_mac[4],
  39.393 -			       d->netif_mac[5],
  39.394 -			       d->domid);
  39.395 -		if (r != 0)
  39.396 -			warn("error %d running " VIFBRIDGE_SCRIPT, r);
  39.397 -
  39.398 -		/* Tell domain how many interfaces it has to deal
  39.399 -		 * with. */
  39.400 -		send_control_reply(m, d);
  39.401 -		send_control_message(CMSG_NETIF_FE,
  39.402 -				     CMSG_NETIF_FE_DRIVER_STATUS,
  39.403 -				     1,
  39.404 -				     sizeof(*sh),
  39.405 -				     sh,
  39.406 -				     d);
  39.407 -
  39.408 -		PRINTF(2,"Telling front end about its interfaces.\n");
  39.409 -		if_s.handle = 0;
  39.410 -		if_s.status = NETIF_INTERFACE_STATUS_DISCONNECTED;
  39.411 -		send_control_message(CMSG_NETIF_FE,
  39.412 -				     CMSG_NETIF_FE_INTERFACE_STATUS,
  39.413 -				     1,
  39.414 -				     sizeof(if_s),
  39.415 -				     &if_s,
  39.416 -				     d);
  39.417 -		PRINTF(3,"Done.\n");
  39.418 -
  39.419 -		break;
  39.420 -	default:
  39.421 -		warnx("unknown netif status %ld", sh->status);
  39.422 -		break;
  39.423 -	}
  39.424 -}
  39.425 -
  39.426 -static void
  39.427 -handle_netif_fe_interface_connect(control_msg_t *m,
  39.428 -				  netif_fe_interface_connect_t *ic,
  39.429 -				  struct domain *d)
  39.430 -{
  39.431 -	control_msg_t be_msg;
  39.432 -	netif_be_connect_t *bmsg = (void *)be_msg.msg;
  39.433 -	netif_fe_interface_status_t fmsg = {0};
  39.434 -	int evtchn_ports[2];
  39.435 -	int r;
  39.436 -
  39.437 -	PRINTF(4, "front end sent us an interface connect message.\n");
  39.438 -	send_trivial_control_reply(m, d);
  39.439 -
  39.440 -	r = xc_evtchn_bind_interdomain(xc_handle,
  39.441 -				       dom0->domid,
  39.442 -				       d->domid,
  39.443 -				       &evtchn_ports[0],
  39.444 -				       &evtchn_ports[1]);
  39.445 -	if (r < 0)
  39.446 -		err(1, "allocating network event channel");
  39.447 -
  39.448 -	be_msg.type = CMSG_NETIF_BE;
  39.449 -	be_msg.subtype = CMSG_NETIF_BE_CONNECT;
  39.450 -	be_msg.id = 0;
  39.451 -	be_msg.length = sizeof(*bmsg);
  39.452 -	bmsg->domid = d->domid;
  39.453 -	bmsg->netif_handle = ic->handle;
  39.454 -	bmsg->tx_shmem_frame = ic->tx_shmem_frame;
  39.455 -	bmsg->rx_shmem_frame = ic->rx_shmem_frame;
  39.456 -	bmsg->evtchn = evtchn_ports[0];
  39.457 -
  39.458 -	pthread_mutex_unlock(&d->mux);
  39.459 -	send_dom0_message_block(&be_msg);
  39.460 -	pthread_mutex_lock(&d->mux);
  39.461 -
  39.462 -	if (bmsg->status != NETIF_BE_STATUS_OKAY) {
  39.463 -		PRINTF(2, "error connected backend netif: %ld\n",
  39.464 -		       bmsg->status);
  39.465 -		abort(); /* Need to handle this */
  39.466 -	} else {
  39.467 -		PRINTF(3, "connect backend netif\n");
  39.468 -
  39.469 -		/* Tell the domain that we've connected it up. */
  39.470 -		fmsg.handle = ic->handle;
  39.471 -		fmsg.status = NETIF_INTERFACE_STATUS_CONNECTED;
  39.472 -		fmsg.evtchn = evtchn_ports[1];
  39.473 -		memcpy(fmsg.mac, d->netif_mac, 6);
  39.474 -
  39.475 -		send_control_message(CMSG_NETIF_FE,
  39.476 -				     CMSG_NETIF_FE_INTERFACE_STATUS,
  39.477 -				     0,
  39.478 -				     sizeof(fmsg),
  39.479 -				     &fmsg,
  39.480 -				     d);
  39.481 -	}
  39.482 -}
  39.483 -
  39.484 -static void
  39.485 -process_netif_fe_message(control_msg_t *m, struct domain *d)
  39.486 -{
  39.487 -	switch (m->subtype) {
  39.488 -	case CMSG_NETIF_FE_DRIVER_STATUS:
  39.489 -	{
  39.490 -		netif_fe_driver_status_t *sh =
  39.491 -			(netif_fe_driver_status_t *)m->msg;
  39.492 -		handle_netif_fe_driver_status(m, sh, d);
  39.493 -		break;
  39.494 -	}
  39.495 -	case CMSG_NETIF_FE_INTERFACE_CONNECT:
  39.496 -	{
  39.497 -		netif_fe_interface_connect_t *ic =
  39.498 -			(netif_fe_interface_connect_t *)m->msg;
  39.499 -		handle_netif_fe_interface_connect(m, ic, d);
  39.500 -		break;
  39.501 -	}
  39.502 -	default:
  39.503 -		warnx("unknown netif front end message subtype %d",
  39.504 -		      m->subtype);
  39.505 -	}
  39.506 -}
  39.507 -
  39.508 -static void
  39.509 -process_control_message(control_msg_t *msg, struct domain *d)
  39.510 -{
  39.511 -	control_msg_t m;
  39.512 -
  39.513 -	/* Don't want a malicous domain messing us about, so copy the
  39.514 -	   control mesasge into a local buffer. */
  39.515 -	memcpy(&m, msg, sizeof(m));
  39.516 -	switch (m.type) {
  39.517 -	case CMSG_CONSOLE:
  39.518 -		process_console_control_message(&m, d);
  39.519 -		break;
  39.520 -	case CMSG_BLKIF_FE:
  39.521 -		process_blkif_fe_message(&m, d);
  39.522 -		break;
  39.523 -	case CMSG_NETIF_FE:
  39.524 -		process_netif_fe_message(&m, d);
  39.525 -		break;
  39.526 -	default:
  39.527 -		warnx("unknown control message type %d", m.type);
  39.528 -	}
  39.529 -}
  39.530 -
  39.531 -static void
  39.532 -domain_did_control_event(struct domain *d)
  39.533 -{
  39.534 -	CONTROL_RING_IDX c;
  39.535 -
  39.536 -	/* Pick up and process control ring messages. */
  39.537 -	while (d->tx_req_cons != d->ctrl_if->tx_req_prod) {
  39.538 -		c = d->tx_req_cons % CONTROL_RING_SIZE;
  39.539 -		process_control_message(&d->ctrl_if->tx_ring[c], d);
  39.540 -		d->tx_req_cons++;
  39.541 -		assert(d->tx_req_cons <= d->ctrl_if->tx_req_prod);
  39.542 -		PRINTF(5, "req_cons %ld, req_prod %ld.\n",
  39.543 -		       d->tx_req_cons, d->ctrl_if->tx_req_prod);
  39.544 -	}
  39.545 -
  39.546 -	/* Take any replies off, and discard them. */
  39.547 -	if (d->rx_resp_cons != d->ctrl_if->rx_resp_prod)
  39.548 -		PRINTF(1, "discard %ld events\n",
  39.549 -		       d->ctrl_if->rx_resp_prod -
  39.550 -		       d->rx_resp_cons);
  39.551 -	d->rx_resp_cons = d->ctrl_if->rx_resp_prod;
  39.552 -}
  39.553 -
  39.554 -/* This is the main function for domain control threads */
  39.555 -void *
  39.556 -domain_thread_func(void *D)
  39.557 -{
  39.558 -	struct domain *d = D;
  39.559 -	int r;
  39.560 -	CONTROL_RING_IDX old_resp_prod, old_req_prod;
  39.561 -
  39.562 -	pthread_mutex_lock(&d->mux);
  39.563 -	for (;;) {
  39.564 -		pthread_cond_wait(&d->cond, &d->mux);
  39.565 -
  39.566 -		old_resp_prod = d->ctrl_if->tx_resp_prod;
  39.567 -		old_req_prod = d->ctrl_if->rx_req_prod;
  39.568 -
  39.569 -		domain_did_control_event(d);
  39.570 -		if (d->cc && d->cc->in_buf_used != 0 && d->plugged == 0) {
  39.571 -			r = d->cc->in_buf_used;
  39.572 -			if (r > 60)
  39.573 -				r = 60;
  39.574 -			PRINTF(1, "Sending to domain: %.*s\n",
  39.575 -			       r, d->cc->in_buf);
  39.576 -			send_control_message(CMSG_CONSOLE,
  39.577 -					     CMSG_CONSOLE_DATA,
  39.578 -					     0,
  39.579 -					     r,
  39.580 -					     d->cc->in_buf,
  39.581 -					     d);
  39.582 -			memmove(d->cc->in_buf, d->cc->in_buf + r,
  39.583 -				d->cc->in_buf_used - r);
  39.584 -			d->cc->in_buf_used -= r;
  39.585 -		}
  39.586 -
  39.587 -		if (d->ctrl_if->tx_resp_prod != old_resp_prod ||
  39.588 -		    d->ctrl_if->rx_req_prod != old_req_prod)
  39.589 -			xc_evtchn_send(xc_handle, d->control_evtchn);
  39.590 -	}
  39.591 -}
  39.592 -
  39.593 -/* This is the only thing you can do with a domain structure if you're
  39.594 -   not in the thread which controls that domain.  Domain 0 is
  39.595 -   special. */
  39.596 -void
  39.597 -signal_domain(struct domain *d)
  39.598 -{
  39.599 -	CONTROL_RING_IDX c;
  39.600 -	int id;
  39.601 -	struct event_receiver *evt;
  39.602 -
  39.603 -	pthread_mutex_lock(&d->mux);
  39.604 -	if (d == dom0) {
  39.605 -		/* Take events off of dom0's control ring, and send
  39.606 -		   them to the event receivers. */
  39.607 -		while (d->tx_req_cons != d->ctrl_if->tx_req_prod) {
  39.608 -			c = d->tx_req_cons % CONTROL_RING_SIZE;
  39.609 -			id = d->ctrl_if->tx_ring[c].id;
  39.610 -			evt = find_event_receiver(id);
  39.611 -			if (evt != NULL) {
  39.612 -				PRINTF(1, "delivering event id %d\n", evt->id);
  39.613 -				pthread_cond_broadcast(&evt->cond);
  39.614 -				pthread_mutex_unlock(&d->mux);
  39.615 -				sched_yield();
  39.616 -				pthread_mutex_lock(&d->mux);
  39.617 -			} else {
  39.618 -				warnx("unexpected message id %d discarded",
  39.619 -				      id);
  39.620 -				d->tx_req_cons++;
  39.621 -			}
  39.622 -		}
  39.623 -		while (d->rx_resp_cons != d->ctrl_if->rx_resp_prod) {
  39.624 -			c = d->rx_resp_cons % CONTROL_RING_SIZE;
  39.625 -			id = d->ctrl_if->rx_ring[c].id;
  39.626 -			evt = find_event_receiver(id);
  39.627 -			if (evt != NULL) {
  39.628 -				PRINTF(1, "delivering event rep id %d\n", evt->id);
  39.629 -				pthread_cond_broadcast(&evt->cond);
  39.630 -				pthread_mutex_unlock(&d->mux);
  39.631 -				sched_yield();
  39.632 -				pthread_mutex_lock(&d->mux);
  39.633 -			} else {
  39.634 -				warnx("unexpected message reply id %d discarded",
  39.635 -				      id);
  39.636 -				d->rx_resp_cons++;
  39.637 -			}
  39.638 -		}
  39.639 -	} else {
  39.640 -		if (d->plugged) {
  39.641 -			d->event_pending = 1;
  39.642 -		} else {
  39.643 -			pthread_cond_broadcast(&d->cond);
  39.644 -		}
  39.645 -	}
  39.646 -	pthread_mutex_unlock(&d->mux);
  39.647 -}
  39.648 -
  39.649 -static void
  39.650 -handle_evtchn_event(void)
  39.651 -{
  39.652 -	short port;
  39.653 -	struct domain *d;
  39.654 -
  39.655 -	read(evtchn_fd, &port, sizeof(short));
  39.656 -	write(evtchn_fd, &port, sizeof(short));
  39.657 -	foreach_domain (d) {
  39.658 -		if (d->control_evtchn == port) {
  39.659 -			signal_domain(d);
  39.660 -			return;
  39.661 -		}
  39.662 -	}
  39.663 -	warnx("got an event on an unknown port %d", port);
  39.664 -}
  39.665 -
  39.666 -void *
  39.667 -map_domain_mem(struct domain *d, unsigned long mfn)
  39.668 -{
  39.669 -	return xc_map_foreign_range(xc_handle, d->domid,
  39.670 -				    PAGE_SIZE, PROT_READ | PROT_WRITE,
  39.671 -				    mfn);
  39.672 -}
  39.673 -
  39.674 -static void
  39.675 -handle_console_event(struct console_connection *cc)
  39.676 -{
  39.677 -	int r;
  39.678 -	int fd;
  39.679 -
  39.680 -	switch (cc->state) {
  39.681 -	case CC_STATE_ERROR:
  39.682 -		/* Errors shouldn't get here. */
  39.683 -		abort();
  39.684 -	case CC_STATE_PENDING:
  39.685 -		fd = accept(cc->fd, NULL, NULL);
  39.686 -		if (fd >= 0) {
  39.687 -			PRINTF(3, "Accepted console connection for domain %d",
  39.688 -			       cc->dom->domid);
  39.689 -			close(cc->fd);
  39.690 -			cc->fd = fd;
  39.691 -			cc->state = CC_STATE_CONNECTED;
  39.692 -			while (cc->buf_used != 0) {
  39.693 -				r = write(cc->fd,
  39.694 -					  cc->buf,
  39.695 -					  cc->buf_used);
  39.696 -				if (r <= 0) {
  39.697 -					cc->state = CC_STATE_ERROR;
  39.698 -					break;
  39.699 -				}
  39.700 -				memmove(cc->buf,
  39.701 -					cc->buf + r,
  39.702 -					cc->buf_used - r);
  39.703 -				cc->buf_used -= r;
  39.704 -			}
  39.705 -			free(cc->buf);
  39.706 -			cc->buf = NULL;
  39.707 -			cc->buf_allocated = 0;
  39.708 -		} else {
  39.709 -			PRINTF(1, "error %s accepting console", strerror(errno));
  39.710 -		}
  39.711 -		pthread_mutex_unlock(&cc->dom->mux);
  39.712 -		break;
  39.713 -	case CC_STATE_CONNECTED:
  39.714 -		if (cc->in_buf_allocated == 0) {
  39.715 -			assert(cc->in_buf_used == 0);
  39.716 -			cc->in_buf_allocated = 128;
  39.717 -			cc->in_buf = xmalloc(cc->in_buf_allocated);
  39.718 -		}
  39.719 -		if (cc->in_buf_used == cc->in_buf_allocated) {
  39.720 -			cc->in_buf_allocated *= 2;
  39.721 -			cc->in_buf = xrealloc(cc->in_buf, cc->in_buf_allocated);
  39.722 -		}
  39.723 -		r = read(cc->fd, cc->in_buf + cc->in_buf_used,
  39.724 -			 cc->in_buf_allocated - cc->in_buf_used);
  39.725 -		if (r <= 0) {
  39.726 -			cc->state = CC_STATE_ERROR;
  39.727 -		} else {
  39.728 -			cc->in_buf_used += r;
  39.729 -		}
  39.730 -		pthread_mutex_unlock(&cc->dom->mux);
  39.731 -		signal_domain(cc->dom);
  39.732 -		break;
  39.733 -	}
  39.734 -}
  39.735 -
  39.736 -static void
  39.737 -handle_connection_event(struct open_connection *oc)
  39.738 -{
  39.739 -	int r;
  39.740 -
  39.741 -	/* We know that some amount of data is ready and waiting for
  39.742 -	   us.  Slurp it in. */
  39.743 -	if (oc->buf_used == oc->buf_allocated) {
  39.744 -		oc->buf_allocated *= 2;
  39.745 -		oc->buf = xrealloc(oc->buf, oc->buf_allocated);
  39.746 -	}
  39.747 -	r = read(oc->fd, oc->buf + oc->buf_used,
  39.748 -		 oc->buf_allocated - oc->buf_used);
  39.749 -	if (r < 0) {
  39.750 -		warn("reading command from remote");
  39.751 -		oc->state = OC_STATE_ERROR;
  39.752 -	} else if (r == 0) {
  39.753 -		warnx("reading command from remote");
  39.754 -		oc->state = OC_STATE_ERROR;
  39.755 -	} else {
  39.756 -		oc->buf_used += r;
  39.757 -		if (strchr(oc->buf, '\n'))
  39.758 -			oc->state = OC_STATE_COMMAND_PENDING;
  39.759 -	}
  39.760 -}
  39.761 -
  39.762 -static void
  39.763 -get_and_process_event(void)
  39.764 -{
  39.765 -	fd_set read_fds, except_fds;
  39.766 -	struct open_connection *oc;
  39.767 -	struct console_connection *cc;
  39.768 -	int max_fd = listen_fd;
  39.769 -	int r;
  39.770 -	struct list_head *li, *temp_li;
  39.771 -
  39.772 -	FD_ZERO(&read_fds);
  39.773 -	FD_ZERO(&except_fds);
  39.774 -	FD_SET(listen_fd, &read_fds);
  39.775 -	FD_SET(evtchn_fd, &read_fds);
  39.776 -	if (evtchn_fd > max_fd)
  39.777 -		max_fd = evtchn_fd;
  39.778 -	foreach_open_connection(oc) {
  39.779 -		FD_SET(oc->fd, &read_fds);
  39.780 -		FD_SET(oc->fd, &except_fds);
  39.781 -		if (oc->fd > max_fd)
  39.782 -			max_fd = oc->fd;
  39.783 -	}
  39.784 -	foreach_console_connection(cc) {
  39.785 -		FD_SET(cc->fd, &read_fds);
  39.786 -		FD_SET(cc->fd, &except_fds);
  39.787 -		if (cc->fd > max_fd)
  39.788 -			max_fd = cc->fd;
  39.789 -	}
  39.790 -
  39.791 -	r = select(max_fd + 1, &read_fds, NULL, &except_fds, NULL);
  39.792 -	if (r < 0)
  39.793 -		err(1, "select");
  39.794 -	if (FD_ISSET(listen_fd, &read_fds)) {
  39.795 -		accept_new_connection();
  39.796 -	} else if (FD_ISSET(evtchn_fd, &read_fds))
  39.797 -		handle_evtchn_event();
  39.798 -
  39.799 -
  39.800 -	foreach_open_connection(oc) {
  39.801 -		if (FD_ISSET(oc->fd, &read_fds))
  39.802 -			handle_connection_event(oc);
  39.803 -		if (FD_ISSET(oc->fd, &except_fds))
  39.804 -			oc->state = OC_STATE_ERROR;
  39.805 -	}
  39.806 -	list_foreach_safe(&head_console, li, temp_li) {
  39.807 -		cc = list_item(li, struct console_connection, list);
  39.808 -		if (FD_ISSET(cc->fd, &read_fds))
  39.809 -			handle_console_event(cc);
  39.810 -		if (FD_ISSET(cc->fd, &except_fds) ||
  39.811 -		    cc->state == CC_STATE_ERROR) {
  39.812 -			PRINTF(1, "Cleaning up console connection");
  39.813 -			cc->dom->cc = NULL;
  39.814 -			list_remove(&cc->list);
  39.815 -			close(cc->fd);
  39.816 -			if (cc->buf_allocated != 0)
  39.817 -				free(cc->buf);
  39.818 -			if (cc->in_buf_allocated != 0)
  39.819 -				free(cc->in_buf);
  39.820 -			free(cc);
  39.821 -		}
  39.822 -	}
  39.823 -
  39.824 -	/* Run pending stuff on the open connections. */
  39.825 -	list_foreach_safe(&head_connection, li, temp_li) {
  39.826 -		oc = list_item(li, struct open_connection, connection_list);
  39.827 -		switch (oc->state) {
  39.828 -		case OC_STATE_ERROR:
  39.829 -			list_remove(&oc->connection_list);
  39.830 -			closedown_connection(oc);
  39.831 -			break;
  39.832 -		case OC_STATE_COMMAND_PENDING:
  39.833 -			process_command(oc);
  39.834 -			break;
  39.835 -		case OC_STATE_CONNECTED:
  39.836 -			/* Don't need to do anything */
  39.837 -			break;
  39.838 -		}
  39.839 -	}
  39.840 -}
  39.841 -
  39.842 -static int
  39.843 -start_listening(void)
  39.844 -{
  39.845 -	int sock;
  39.846 -	struct sockaddr_in inaddr;
  39.847 -
  39.848 -	sock = socket(PF_INET, SOCK_STREAM, 0);
  39.849 -	if (sock < 0)
  39.850 -		err(1, "creating socket");
  39.851 -	memset(&inaddr, 0, sizeof(inaddr));
  39.852 -	inaddr.sin_family = AF_INET;
  39.853 -	inaddr.sin_port = htons(MINIXEND_PORT);
  39.854 -
  39.855 -	if (bind(sock, (struct sockaddr *)&inaddr, sizeof(inaddr)) < 0)
  39.856 -		err(1, "binding to port %d", MINIXEND_PORT);
  39.857 -	if (listen(sock, 5) < 0)
  39.858 -		err(1, "listening for connections");
  39.859 -
  39.860 -	return sock;
  39.861 -}
  39.862 -
  39.863 -static struct domain *
  39.864 -find_dom0(void)
  39.865 -{
  39.866 -	int r;
  39.867 -	xc_dominfo_t info;
  39.868 -	struct domain *work;
  39.869 -
  39.870 -	r = xc_domain_getinfo(xc_handle, 0, 1, &info);
  39.871 -	if (r < 0)
  39.872 -		err(1, "getting domain 0 information");
  39.873 -	work = xmalloc(sizeof(*work));
  39.874 -	work->control_evtchn = 2;
  39.875 -	if (ioctl(evtchn_fd, EVTCHN_BIND, 2) < 0)
  39.876 -		err(1, "binding to domain 0 control event channel");
  39.877 -
  39.878 -	work->domid = 0;
  39.879 -	work->name = strdup("dom0");
  39.880 -	work->mem_kb = info.max_memkb;
  39.881 -	work->state = DOM_STATE_RUNNING;
  39.882 -	work->shared_info_mfn = info.shared_info_frame;
  39.883 -
  39.884 -	work->shared_info = map_domain_mem(work, info.shared_info_frame);
  39.885 -	work->ctrl_if = (control_if_t *)((unsigned)work->shared_info + 2048);
  39.886 -	work->tx_req_cons = work->ctrl_if->tx_req_prod;
  39.887 -	work->rx_resp_cons = work->ctrl_if->rx_resp_prod;
  39.888 -
  39.889 -	pthread_mutex_init(&work->mux, NULL);
  39.890 -	pthread_cond_init(&work->cond, NULL);
  39.891 -
  39.892 -	list_insert_after(&work->domain_list, &head_domain);
  39.893 -
  39.894 -	return work;
  39.895 -}
  39.896 -
  39.897 -int
  39.898 -main(int argc, char *argv[])
  39.899 -{
  39.900 -	int r;
  39.901 -
  39.902 -	r = our_system(NETWORK_SCRIPT " start antispoof=no");
  39.903 -	if (r < 0)
  39.904 -		err(1, "running " NETWORK_SCRIPT);
  39.905 -	if (!WIFEXITED(r)) {
  39.906 -		if (WIFSIGNALED(r)) {
  39.907 -			errx(1, NETWORK_SCRIPT " killed by signal %d",
  39.908 -			     WTERMSIG(r));
  39.909 -		}
  39.910 -		errx(1, NETWORK_SCRIPT " terminated abnormally");
  39.911 -	}
  39.912 -	if (WEXITSTATUS(r) != 0)
  39.913 -		errx(1, NETWORK_SCRIPT " returned error status %d",
  39.914 -		     WEXITSTATUS(r));
  39.915 -
  39.916 -	xc_handle = xc_interface_open();
  39.917 -
  39.918 -	listen_fd = start_listening();
  39.919 -
  39.920 -	evtchn_fd = open("/dev/xen/evtchn", O_RDWR);
  39.921 -	if (evtchn_fd < 0)
  39.922 -		err(1, "openning /dev/xen/evtchn");
  39.923 -
  39.924 -	dom0 = find_dom0();
  39.925 -
  39.926 -	while (1) {
  39.927 -		get_and_process_event();
  39.928 -
  39.929 -		PRINTF(5, "Dom0 ring state:\n");
  39.930 -		PRINTF(5, "RX: req_prod %ld, resp_prod %ld, resp_cons %ld\n",
  39.931 -		       dom0->ctrl_if->rx_req_prod,
  39.932 -		       dom0->ctrl_if->rx_resp_prod,
  39.933 -		       dom0->rx_resp_cons);
  39.934 -		PRINTF(5, "TX: req_prod %ld, resp_prod %ld, req_cons %ld\n",
  39.935 -		       dom0->ctrl_if->tx_req_prod,
  39.936 -		       dom0->ctrl_if->tx_resp_prod,
  39.937 -		       dom0->tx_req_cons);
  39.938 -	}
  39.939 -
  39.940 -	return 0;
  39.941 -}
  39.942 -
    40.1 --- a/tools/x2d2/minixend.h	Tue Mar 08 22:57:09 2005 +0000
    40.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.3 @@ -1,154 +0,0 @@
    40.4 -#ifndef MINIXEND_H__
    40.5 -#define MINIXEND_H__
    40.6 -
    40.7 -#include <sys/types.h>
    40.8 -#include <xc.h>
    40.9 -
   40.10 -struct list_head {
   40.11 -	struct list_head *next, **pprev;
   40.12 -};
   40.13 -
   40.14 -struct open_connection {
   40.15 -	struct list_head connection_list;
   40.16 -	int fd;
   40.17 -	enum {
   40.18 -		OC_STATE_CONNECTED,
   40.19 -		OC_STATE_ERROR,
   40.20 -		OC_STATE_COMMAND_PENDING
   40.21 -	} state;
   40.22 -
   40.23 -	/* Buffer of stuff coming from the remote until we get a whole
   40.24 -	   command */
   40.25 -	int buf_used;
   40.26 -	int buf_allocated;
   40.27 -	char *buf;
   40.28 -};
   40.29 -
   40.30 -struct console_connection;
   40.31 -
   40.32 -/* Only ever accessed from the domain's controlling thread, unless
   40.33 -   it's dom0, in which case we perform a moderately complex dance to
   40.34 -   avoid needing any sort of locking at all. */
   40.35 -struct domain {
   40.36 -	struct list_head domain_list;
   40.37 -	int control_evtchn; /* the local port for the doain control
   40.38 -			       interface event channel. */
   40.39 -	int domid;
   40.40 -	char *name;
   40.41 -	int mem_kb;
   40.42 -	enum {
   40.43 -		DOM_STATE_CREATED, /* created but not built */
   40.44 -		DOM_STATE_PAUSED,  /* built but not started or paused */
   40.45 -		DOM_STATE_RUNNING, /* running normally */
   40.46 -		DOM_STATE_DEAD     /* dead; either destroyed, crashed,
   40.47 -				      or exitted. */
   40.48 -	} state;
   40.49 -
   40.50 -	unsigned long shared_info_mfn;
   40.51 -	shared_info_t *shared_info;
   40.52 -	control_if_t *ctrl_if;
   40.53 -	CONTROL_RING_IDX tx_req_cons;
   40.54 -	CONTROL_RING_IDX rx_resp_cons;
   40.55 -
   40.56 -	unsigned created_netif_backend:1;
   40.57 -	unsigned plugged:1;
   40.58 -	unsigned event_pending:1; /* True if an event arrived while
   40.59 -				     the domain was plugged. */
   40.60 -
   40.61 -	struct console_connection *cc;
   40.62 -
   40.63 -	char netif_mac[6];
   40.64 -
   40.65 -	/* Used for two purposes: waking up domain threads when
   40.66 -	   necessary, and synchronising access to dom0, which doesn't
   40.67 -	   have a domain thread. */
   40.68 -	pthread_mutex_t mux;
   40.69 -	pthread_cond_t cond;
   40.70 -
   40.71 -	pthread_t thread;
   40.72 -};
   40.73 -
   40.74 -struct console_connection {
   40.75 -	struct list_head list;
   40.76 -	int fd;
   40.77 -	struct domain *dom;
   40.78 -
   40.79 -	enum {
   40.80 -		CC_STATE_PENDING,
   40.81 -		CC_STATE_CONNECTED,
   40.82 -		CC_STATE_ERROR
   40.83 -	} state;
   40.84 -
   40.85 -	unsigned buf_allocated;
   40.86 -	unsigned buf_used;
   40.87 -	char *buf;
   40.88 -
   40.89 -	unsigned in_buf_allocated;
   40.90 -	unsigned in_buf_used;
   40.91 -	char *in_buf;
   40.92 -};
   40.93 -
   40.94 -
   40.95 -void *domain_thread_func(void *d);
   40.96 -void process_command(struct open_connection *oc);
   40.97 -
   40.98 -void *xmalloc(size_t s);
   40.99 -void *xrealloc(void *x, size_t s);
  40.100 -char *xstrdup(const char *s);
  40.101 -
  40.102 -int allocate_event_channel(struct domain *d, int event_ports[2]);
  40.103 -void *map_domain_mem(struct domain *d, unsigned long mfn);
  40.104 -void signal_domain(struct domain *d);
  40.105 -int our_system(const char *fmt, ...);
  40.106 -
  40.107 -extern unsigned xc_handle;
  40.108 -#define EVTCHN_BIND _IO('E', 2)
  40.109 -extern int evtchn_fd;
  40.110 -
  40.111 -#define list_item(head, type, field)                                        \
  40.112 -((type *)((unsigned)(head) - offsetof(type, field)))
  40.113 -
  40.114 -#define foreach_item(iter, head, type, field)                               \
  40.115 -for ((iter) = list_item((head)->next, type, field);                         \
  40.116 -     (iter) != list_item((head), type, field);                              \
  40.117 -     (iter) = list_item((iter)->field.next, type, field))
  40.118 -
  40.119 -#define list_insert_after(what, head)                                       \
  40.120 -do {                                                                        \
  40.121 -	(what)->next = (head)->next;                                        \
  40.122 -	(what)->pprev = &(head)->next;                                      \
  40.123 -	(head)->next->pprev = &(what)->next;                                \
  40.124 -	(head)->next = what;                                                \
  40.125 -} while (0)
  40.126 -
  40.127 -#define list_remove(head)                                                   \
  40.128 -(head)->next->pprev = (head)->pprev;                                        \
  40.129 -*(head)->pprev = (head)->next;
  40.130 -
  40.131 -#define list_foreach_safe(head, li, temp)                                   \
  40.132 -for ((li) = (head)->next, (temp) = (li)->next;                              \
  40.133 -     (li) != (head);                                                        \
  40.134 -     (li) = (temp), (temp) = (li)->next)
  40.135 -
  40.136 -#define LIST_HEAD(x) { (x), &(x)->next }
  40.137 -
  40.138 -
  40.139 -extern struct list_head head_domain;
  40.140 -extern struct list_head head_console;
  40.141 -
  40.142 -#define foreach_domain(d)                                                   \
  40.143 -foreach_item(d, &head_domain, struct domain, domain_list)
  40.144 -#define foreach_console_connection(cc)                                      \
  40.145 -foreach_item(cc, &head_console, struct console_connection, list)
  40.146 -
  40.147 -
  40.148 -#define CURRENT_LOG_LEVEL 0
  40.149 -
  40.150 -#define PRINTF(level, ...)                                         \
  40.151 -do {                                                               \
  40.152 -	if ((level) >= CURRENT_LOG_LEVEL)                          \
  40.153 -		printf(__VA_ARGS__);                               \
  40.154 -} while (0)
  40.155 -
  40.156 -
  40.157 -#endif /* MINIXEND_H__ */
    41.1 --- a/tools/x2d2/util.c	Tue Mar 08 22:57:09 2005 +0000
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,132 +0,0 @@
    41.4 -#define _GNU_SOURCE
    41.5 -
    41.6 -#include <sys/types.h>
    41.7 -#include <sys/wait.h>
    41.8 -#include <assert.h>
    41.9 -#include <err.h>
   41.10 -#include <errno.h>
   41.11 -#include <fcntl.h>
   41.12 -#include <stdarg.h>
   41.13 -#include <stdlib.h>
   41.14 -#include <stdio.h>
   41.15 -#include <string.h>
   41.16 -#include <unistd.h>
   41.17 -
   41.18 -void *
   41.19 -xmalloc(size_t s)
   41.20 -{
   41.21 -	void *x;
   41.22 -
   41.23 -	x = malloc(s);
   41.24 -	if (x == NULL)
   41.25 -		err(1, "allocating memory");
   41.26 -	memset(x, 0, s);
   41.27 -	return x;
   41.28 -}
   41.29 -
   41.30 -void *
   41.31 -xrealloc(void *x, size_t s)
   41.32 -{
   41.33 -	void *y;
   41.34 -	y = realloc(x, s);
   41.35 -	if (y == NULL)
   41.36 -		err(1, "allocating more memory");
   41.37 -	return y;
   41.38 -}
   41.39 -
   41.40 -char *
   41.41 -xstrdup(const char *s)
   41.42 -{
   41.43 -	char *x = strdup(s);
   41.44 -	if (x == NULL)
   41.45 -		err(1, "duplicating %s", s);
   41.46 -	return x;
   41.47 -}
   41.48 -
   41.49 -/* Slightly less stupid implementation of system().  We return
   41.50 -   negative iff there is an error executing the shell; otherwise, we
   41.51 -   return the wait status as reported by waitpid(). Also, we support
   41.52 -   printf-style escapes.  We don't handle setting the SIGCHLD handler
   41.53 -   to SIGIGN, though: in that case, we have a race. */
   41.54 -int
   41.55 -our_system(const char *fmt, ...)
   41.56 -{
   41.57 -	char *cmd = NULL;
   41.58 -	int r;
   41.59 -	va_list ap;
   41.60 -	pid_t child = -1;
   41.61 -	int pip[2] = {-1, -1};
   41.62 -	int e;
   41.63 -	fd_set fds;
   41.64 -	struct timeval to;
   41.65 -	int res;
   41.66 -	pid_t c;
   41.67 -	unsigned status;
   41.68 -
   41.69 -	va_start(ap, fmt);
   41.70 -	r = vasprintf(&cmd, fmt, ap);
   41.71 -	va_end(ap);
   41.72 -	if (r < 0)
   41.73 -		return r;
   41.74 -	r = pipe(pip);
   41.75 -	if (r < 0) {
   41.76 -		res = r;
   41.77 -		goto out;
   41.78 -	}
   41.79 -	child = fork();
   41.80 -	if (child < 0) {
   41.81 -		res = child;
   41.82 -		goto out;
   41.83 -	}
   41.84 -	if (child == 0) {
   41.85 -		close(pip[0]);
   41.86 -		fcntl(pip[1], F_SETFD, 1);
   41.87 -		r = execl("/bin/sh", "/bin/sh", "-c", cmd, NULL);
   41.88 -		/* Uh oh, exec failed */
   41.89 -		write(pip[1], &r, sizeof(r));
   41.90 -		_exit(1);
   41.91 -	}
   41.92 -
   41.93 -	close(pip[1]);
   41.94 -	pip[1] = -1;
   41.95 -
   41.96 -	c = waitpid(child, &status, 0);
   41.97 -	if (c < 0) {
   41.98 -		res = c;
   41.99 -		goto out;
  41.100 -	}
  41.101 -	assert(c == child);
  41.102 -	child = -1;
  41.103 -
  41.104 -	/* Check execl result */
  41.105 -	FD_ZERO(&fds);
  41.106 -	FD_SET(pip[0], &fds);
  41.107 -	memset(&to, 0, sizeof(to));
  41.108 -	r = select(pip[0]+1, &fds, NULL, NULL, &to);
  41.109 -	if (r == 0) {
  41.110 -		res = status;
  41.111 -	} else {
  41.112 -		assert(FD_ISSET(pip[0], &fds));
  41.113 -		r = read(pip[0], &res, sizeof(res));
  41.114 -		if (r != sizeof(res))
  41.115 -			res = status;
  41.116 -	}
  41.117 -	close(pip[0]);
  41.118 -	pip[0] = -1;
  41.119 -
  41.120 - out:
  41.121 -	e = errno;
  41.122 -	if (child >= 0) {
  41.123 -		/* Not obvious what the correct thing to do here is. */
  41.124 -		/* Don't want to kill the child; that will create a
  41.125 -		   zombie. */
  41.126 -//		kill(child, 9);
  41.127 -	}
  41.128 -	if (pip[0] >= 0)
  41.129 -		close(pip[0]);
  41.130 -	if (pip[1] >= 0)
  41.131 -		close(pip[1]);
  41.132 -	free(cmd);
  41.133 -	errno = e;
  41.134 -	return res;
  41.135 -}
    42.1 --- a/tools/xcs/ctrl_interface.c	Tue Mar 08 22:57:09 2005 +0000
    42.2 +++ b/tools/xcs/ctrl_interface.c	Wed Mar 09 20:19:23 2005 +0000
    42.3 @@ -57,14 +57,14 @@ int ctrl_chan_read_request(control_chann
    42.4      control_msg_t     *smsg;
    42.5      RING_IDX          c = cc->tx_ring.req_cons;
    42.6  
    42.7 -    if ( !RING_HAS_UNCONSUMED_REQUESTS(CTRL_RING, &cc->tx_ring) )
    42.8 +    if ( !RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring) )
    42.9      {
   42.10          DPRINTF("no request to read\n");
   42.11          return -1;
   42.12      }
   42.13      
   42.14      rmb(); /* make sure we see the data associated with the request */
   42.15 -    smsg = RING_GET_REQUEST(CTRL_RING, &cc->tx_ring, c);
   42.16 +    smsg = RING_GET_REQUEST(&cc->tx_ring, c);
   42.17      memcpy(&dmsg->msg, smsg, sizeof(*smsg));
   42.18      if ( dmsg->msg.length > sizeof(dmsg->msg.msg) )
   42.19          dmsg->msg.length = sizeof(dmsg->msg.msg);
   42.20 @@ -78,18 +78,18 @@ int ctrl_chan_write_request(control_chan
   42.21      control_msg_t *dmsg;
   42.22      RING_IDX       p = cc->rx_ring.req_prod_pvt;
   42.23      
   42.24 -    if ( RING_FULL(CTRL_RING, &cc->rx_ring) )
   42.25 +    if ( RING_FULL(&cc->rx_ring) )
   42.26      {
   42.27          DPRINTF("no space to write request");
   42.28          return -ENOSPC;
   42.29      }
   42.30  
   42.31 -    dmsg = RING_GET_REQUEST(CTRL_RING, &cc->rx_ring, p);
   42.32 +    dmsg = RING_GET_REQUEST(&cc->rx_ring, p);
   42.33      memcpy(dmsg, &smsg->msg, sizeof(*dmsg));
   42.34  
   42.35      wmb();
   42.36      cc->rx_ring.req_prod_pvt++;
   42.37 -    RING_PUSH_REQUESTS(CTRL_RING, &cc->rx_ring);
   42.38 +    RING_PUSH_REQUESTS(&cc->rx_ring);
   42.39      
   42.40      return 0;
   42.41  }
   42.42 @@ -99,14 +99,14 @@ int ctrl_chan_read_response(control_chan
   42.43      control_msg_t     *smsg;
   42.44      RING_IDX          c = cc->rx_ring.rsp_cons;
   42.45      
   42.46 -    if ( !RING_HAS_UNCONSUMED_RESPONSES(CTRL_RING, &cc->rx_ring) )
   42.47 +    if ( !RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring) )
   42.48      {
   42.49          DPRINTF("no response to read");
   42.50          return -1;
   42.51      }
   42.52  
   42.53      rmb(); /* make sure we see the data associated with the request */
   42.54 -    smsg = RING_GET_RESPONSE(CTRL_RING, &cc->rx_ring, c);
   42.55 +    smsg = RING_GET_RESPONSE(&cc->rx_ring, c);
   42.56      memcpy(&dmsg->msg, smsg, sizeof(*smsg));
   42.57      if ( dmsg->msg.length > sizeof(dmsg->msg.msg) )
   42.58          dmsg->msg.length = sizeof(dmsg->msg.msg);
   42.59 @@ -128,29 +128,29 @@ int ctrl_chan_write_response(control_cha
   42.60          return -ENOSPC;
   42.61      }
   42.62  
   42.63 -    dmsg = RING_GET_RESPONSE(CTRL_RING, &cc->tx_ring, p);
   42.64 +    dmsg = RING_GET_RESPONSE(&cc->tx_ring, p);
   42.65      memcpy(dmsg, &smsg->msg, sizeof(*dmsg));
   42.66  
   42.67      wmb();
   42.68      cc->tx_ring.rsp_prod_pvt++;
   42.69 -    RING_PUSH_RESPONSES(CTRL_RING, &cc->tx_ring);
   42.70 +    RING_PUSH_RESPONSES(&cc->tx_ring);
   42.71      
   42.72      return 0;
   42.73  }
   42.74  
   42.75  int ctrl_chan_request_to_read(control_channel_t *cc)
   42.76  {
   42.77 -    return (RING_HAS_UNCONSUMED_REQUESTS(CTRL_RING, &cc->tx_ring));
   42.78 +    return (RING_HAS_UNCONSUMED_REQUESTS(&cc->tx_ring));
   42.79  }
   42.80  
   42.81  int ctrl_chan_space_to_write_request(control_channel_t *cc)
   42.82  {
   42.83 -    return (!(RING_FULL(CTRL_RING, &cc->rx_ring)));
   42.84 +    return (!(RING_FULL(&cc->rx_ring)));
   42.85  }
   42.86  
   42.87  int ctrl_chan_response_to_read(control_channel_t *cc)
   42.88  {
   42.89 -    return (RING_HAS_UNCONSUMED_RESPONSES(CTRL_RING, &cc->rx_ring));
   42.90 +    return (RING_HAS_UNCONSUMED_RESPONSES(&cc->rx_ring));
   42.91  }
   42.92  
   42.93  int ctrl_chan_space_to_write_response(control_channel_t *cc)
   42.94 @@ -186,8 +186,8 @@ int ctrl_chan_connect(control_channel_t 
   42.95      }
   42.96  
   42.97      /* Synchronise ring indexes. */
   42.98 -    BACK_RING_ATTACH(CTRL_RING, &cc->tx_ring, &cc->interface->tx_ring);
   42.99 -    FRONT_RING_ATTACH(CTRL_RING, &cc->rx_ring, &cc->interface->rx_ring);
  42.100 +    BACK_RING_ATTACH(&cc->tx_ring, &cc->interface->tx_ring);
  42.101 +    FRONT_RING_ATTACH(&cc->rx_ring, &cc->interface->rx_ring);
  42.102  
  42.103      cc->connected = 1;
  42.104  
    43.1 --- a/xen/Makefile	Tue Mar 08 22:57:09 2005 +0000
    43.2 +++ b/xen/Makefile	Wed Mar 09 20:19:23 2005 +0000
    43.3 @@ -46,7 +46,8 @@ clean: delete-unfresh-files
    43.4  	rm -f include/asm-*/asm-offsets.h
    43.5  	rm -f tools/figlet/*.o tools/figlet/figlet
    43.6  
    43.7 -$(TARGET): delete-unfresh-files include/xen/compile.h
    43.8 +$(TARGET): delete-unfresh-files
    43.9 +	$(MAKE) include/xen/compile.h
   43.10  	[ -e include/asm ] || ln -sf asm-$(TARGET_ARCH) include/asm
   43.11  	$(MAKE) -C arch/$(TARGET_ARCH) asm-offsets.s
   43.12  	$(MAKE) include/asm-$(TARGET_ARCH)/asm-offsets.h
    44.1 --- a/xen/arch/ia64/domain.c	Tue Mar 08 22:57:09 2005 +0000
    44.2 +++ b/xen/arch/ia64/domain.c	Wed Mar 09 20:19:23 2005 +0000
    44.3 @@ -555,9 +555,6 @@ int construct_dom0(struct domain *d,
    44.4  #if 0
    44.5      strcpy(d->name,"Domain0");
    44.6  #endif
    44.7 -    /* Set up shared-info area. */
    44.8 -    update_dom_time(d);
    44.9 -    d->shared_info->domain_time = 0;
   44.10  
   44.11  	// prepare domain0 pagetable (maps METAphysical to physical)
   44.12  	// following is roughly mm_init() in linux/kernel/fork.c
    45.1 --- a/xen/arch/ia64/patch/linux-2.6.7/time.c	Tue Mar 08 22:57:09 2005 +0000
    45.2 +++ b/xen/arch/ia64/patch/linux-2.6.7/time.c	Wed Mar 09 20:19:23 2005 +0000
    45.3 @@ -70,7 +70,7 @@
    45.4  +    return now; 
    45.5  +}
    45.6  +
    45.7 -+void update_dom_time(struct domain *d)
    45.8 ++void update_dom_time(struct exec_domain *ed)
    45.9  +{
   45.10  +// FIXME: implement this?
   45.11  +//	printf("update_dom_time: called, not implemented, skipping\n");
    46.1 --- a/xen/arch/x86/domain.c	Tue Mar 08 22:57:09 2005 +0000
    46.2 +++ b/xen/arch/x86/domain.c	Wed Mar 09 20:19:23 2005 +0000
    46.3 @@ -19,6 +19,7 @@
    46.4  #include <xen/smp.h>
    46.5  #include <xen/delay.h>
    46.6  #include <xen/softirq.h>
    46.7 +#include <xen/grant_table.h>
    46.8  #include <asm/regs.h>
    46.9  #include <asm/mc146818rtc.h>
   46.10  #include <asm/system.h>
   46.11 @@ -696,7 +697,8 @@ long do_switch_to_user(void)
   46.12          regs->rcx = stu.rcx;
   46.13      }
   46.14      
   46.15 -    return regs->rax;
   46.16 +    /* Saved %rax gets written back to regs->rax in entry.S. */
   46.17 +    return stu.rax; 
   46.18  }
   46.19  
   46.20  #elif defined(__i386__)
   46.21 @@ -713,7 +715,7 @@ long do_switch_to_user(void)
   46.22  			: /* no output */ \
   46.23  			:"r" ((_ed)->debugreg[_reg]))
   46.24  
   46.25 -void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p)
   46.26 +void context_switch(struct exec_domain *prev_p, struct exec_domain *next_p)
   46.27  {
   46.28      struct tss_struct *tss = init_tss + smp_processor_id();
   46.29      execution_context_t *stack_ec = get_execution_context();
   46.30 @@ -804,6 +806,18 @@ void switch_to(struct exec_domain *prev_
   46.31      __sti();
   46.32  
   46.33      switch_segments(stack_ec, prev_p, next_p);
   46.34 +
   46.35 +    /*
   46.36 +     * We do this late on because it doesn't need to be protected by the
   46.37 +     * schedule_lock, and because we want this to be the very last use of
   46.38 +     * 'prev' (after this point, a dying domain's info structure may be freed
   46.39 +     * without warning). 
   46.40 +     */
   46.41 +    clear_bit(EDF_RUNNING, &prev_p->ed_flags);
   46.42 +
   46.43 +    schedule_tail(next_p);
   46.44 +
   46.45 +    BUG();
   46.46  }
   46.47  
   46.48  
   46.49 @@ -956,6 +970,9 @@ void domain_relinquish_memory(struct dom
   46.50      /* Ensure that noone is running over the dead domain's page tables. */
   46.51      synchronise_pagetables(~0UL);
   46.52  
   46.53 +    /* Release mappings of other domains */
   46.54 +    gnttab_release_all_mappings( d->grant_table );
   46.55 +
   46.56      /* Exit shadow mode before deconstructing final guest page table. */
   46.57      shadow_mode_disable(d);
   46.58  
   46.59 @@ -964,16 +981,15 @@ void domain_relinquish_memory(struct dom
   46.60      {
   46.61          if ( pagetable_val(ed->arch.guest_table) != 0 )
   46.62          {
   46.63 -            put_page_and_type(
   46.64 -                &frame_table[pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT]);
   46.65 +            put_page_and_type(&frame_table[
   46.66 +                pagetable_val(ed->arch.guest_table) >> PAGE_SHIFT]);
   46.67              ed->arch.guest_table = mk_pagetable(0);
   46.68          }
   46.69  
   46.70          if ( pagetable_val(ed->arch.guest_table_user) != 0 )
   46.71          {
   46.72 -            put_page_and_type(
   46.73 -                &frame_table[pagetable_val(ed->arch.guest_table_user) >>
   46.74 -                            PAGE_SHIFT]);
   46.75 +            put_page_and_type(&frame_table[
   46.76 +                pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT]);
   46.77              ed->arch.guest_table_user = mk_pagetable(0);
   46.78          }
   46.79      }
    47.1 --- a/xen/arch/x86/domain_build.c	Tue Mar 08 22:57:09 2005 +0000
    47.2 +++ b/xen/arch/x86/domain_build.c	Wed Mar 09 20:19:23 2005 +0000
    47.3 @@ -406,9 +406,6 @@ int construct_dom0(struct domain *d,
    47.4  
    47.5  #endif /* __x86_64__ */
    47.6  
    47.7 -    /* Set up shared-info area. */
    47.8 -    update_dom_time(d);
    47.9 -    d->shared_info->domain_time = 0;
   47.10      /* Mask all upcalls... */
   47.11      for ( i = 0; i < MAX_VIRT_CPUS; i++ )
   47.12          d->shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
    48.1 --- a/xen/arch/x86/mm.c	Tue Mar 08 22:57:09 2005 +0000
    48.2 +++ b/xen/arch/x86/mm.c	Wed Mar 09 20:19:23 2005 +0000
    48.3 @@ -104,7 +104,7 @@
    48.4  
    48.5  #ifdef VERBOSE
    48.6  #define MEM_LOG(_f, _a...)                           \
    48.7 -  printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \
    48.8 +  printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
    48.9           current->domain->id , __LINE__ , ## _a )
   48.10  #else
   48.11  #define MEM_LOG(_f, _a...) ((void)0)
   48.12 @@ -1911,16 +1911,127 @@ int do_mmu_update(
   48.13      return rc;
   48.14  }
   48.15  
   48.16 +void update_shadow_va_mapping(unsigned long va,
   48.17 +                              unsigned long val,
   48.18 +                              struct exec_domain *ed,
   48.19 +                              struct domain *d)
   48.20 +{
   48.21 +    /* This function assumes the caller is holding the domain's BIGLOCK
   48.22 +     * and is running in a shadow mode
   48.23 +     */
   48.24 +
   48.25 +    unsigned long   sval = 0;
   48.26 +
   48.27 +    l1pte_propagate_from_guest(d, &val, &sval);
   48.28 +
   48.29 +    if ( unlikely(__put_user(sval, ((unsigned long *)(
   48.30 +        &shadow_linear_pg_table[l1_linear_offset(va)])))) )
   48.31 +    {
   48.32 +        /*
   48.33 +         * Since L2's are guranteed RW, failure indicates either that the
   48.34 +         * page was not shadowed, or that the L2 entry has not yet been
   48.35 +         * updated to reflect the shadow.
   48.36 +         */
   48.37 +        if ( shadow_mode_external(current->domain) )
   48.38 +            BUG(); // can't use linear_l2_table with external tables.
   48.39 +
   48.40 +        l2_pgentry_t gpde = linear_l2_table[l2_table_offset(va)];
   48.41 +        unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT;
   48.42 +
   48.43 +        if (get_shadow_status(d, gpfn))
   48.44 +        {
   48.45 +            unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
   48.46 +            unsigned long *gl1e = map_domain_mem(gmfn << PAGE_SHIFT);
   48.47 +            unsigned l1_idx = l1_table_offset(va);
   48.48 +            gl1e[l1_idx] = sval;
   48.49 +            unmap_domain_mem(gl1e);
   48.50 +            put_shadow_status(d);
   48.51 +
   48.52 +            perfc_incrc(shadow_update_va_fail1);
   48.53 +        }
   48.54 +        else
   48.55 +            perfc_incrc(shadow_update_va_fail2);
   48.56 +    }
   48.57 +
   48.58 +    /*
   48.59 +     * If we're in log-dirty mode then we need to note that we've updated
   48.60 +     * the PTE in the PT-holding page. We need the machine frame number
   48.61 +     * for this.
   48.62 +     */
   48.63 +    if ( shadow_mode_log_dirty(d) )
   48.64 +        mark_dirty(d, va_to_l1mfn(va));
   48.65 +
   48.66 +    check_pagetable(d, ed->arch.guest_table, "va"); /* debug */
   48.67 +}
   48.68 +
   48.69 +int update_grant_va_mapping(unsigned long va,
   48.70 +                            unsigned long _nl1e, 
   48.71 +                            struct domain *d,
   48.72 +                            struct exec_domain *ed)
   48.73 +{
   48.74 +    /* Caller must:
   48.75 +     * . own d's BIGLOCK 
   48.76 +     * . already have 'get_page' correctly on the to-be-installed nl1e
   48.77 +     * . be responsible for flushing the TLB
   48.78 +     * . check PTE being installed isn't DISALLOWED
   48.79 +     */
   48.80 +
   48.81 +    /* Return value:
   48.82 +     * -ve : error
   48.83 +     * 0   : done
   48.84 +     * GNTUPDVA_prev_ro : done & prior mapping was ro to same frame
   48.85 +     * GNTUPDVA_prev_rw : done & prior mapping was rw to same frame
   48.86 +     */
   48.87 +
   48.88 +    int             rc = 0;
   48.89 +    l1_pgentry_t   *pl1e;
   48.90 +    unsigned long   _ol1e;
   48.91 +
   48.92 +    cleanup_writable_pagetable(d);
   48.93 +
   48.94 +    pl1e = &linear_pg_table[l1_linear_offset(va)];
   48.95 +
   48.96 +    if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
   48.97 +        rc = -EINVAL;
   48.98 +    else
   48.99 +    {
  48.100 +        l1_pgentry_t ol1e = mk_l1_pgentry(_ol1e);
  48.101 +
  48.102 +        if ( update_l1e(pl1e, ol1e, mk_l1_pgentry(_nl1e)) )
  48.103 +        {
  48.104 +            /* overwrote different mfn?  */
  48.105 +            if (((_ol1e ^ _nl1e) & (PADDR_MASK & PAGE_MASK)) != 0)
  48.106 +            {
  48.107 +                rc = 0;
  48.108 +                put_page_from_l1e(ol1e, d);
  48.109 +            }
  48.110 +            else
  48.111 +                rc = ((_ol1e & _PAGE_RW) ? GNTUPDVA_prev_rw
  48.112 +                                         : GNTUPDVA_prev_ro );
  48.113 +                /* use return code to avoid nasty grant table
  48.114 +                 * slow path in put_page_from_l1e -- caller
  48.115 +                 * must handle ref count instead. */
  48.116 +        }
  48.117 +        else
  48.118 +            rc = -EINVAL;
  48.119 +    }
  48.120 +
  48.121 +    if ( unlikely(shadow_mode_enabled(d)) )
  48.122 +        update_shadow_va_mapping(va, _nl1e, ed, d);
  48.123 +
  48.124 +    return rc;
  48.125 +}
  48.126 +
  48.127  
  48.128  int do_update_va_mapping(unsigned long va,
  48.129                           unsigned long val, 
  48.130                           unsigned long flags)
  48.131  {
  48.132 -    struct exec_domain *ed = current;
  48.133 -    struct domain *d = ed->domain;
  48.134 -    int err = 0;
  48.135 -    unsigned int cpu = ed->processor;
  48.136 -    unsigned long deferred_ops;
  48.137 +    struct exec_domain      *ed  = current;
  48.138 +    struct domain           *d   = ed->domain;
  48.139 +    unsigned int             cpu = ed->processor;
  48.140 +    unsigned long            deferred_ops;
  48.141 +    int                      rc = 0;
  48.142  
  48.143      perfc_incrc(calls_to_update_va);
  48.144  
  48.145 @@ -1941,53 +2052,10 @@ int do_update_va_mapping(unsigned long v
  48.146  
  48.147      if ( unlikely(!mod_l1_entry(&linear_pg_table[l1_linear_offset(va)],
  48.148                                  mk_l1_pgentry(val))) )
  48.149 -        err = -EINVAL;
  48.150 +        rc = -EINVAL;
  48.151  
  48.152      if ( unlikely(shadow_mode_enabled(d)) )
  48.153 -    {
  48.154 -        unsigned long sval = 0;
  48.155 -
  48.156 -        l1pte_propagate_from_guest(d, &val, &sval);
  48.157 -
  48.158 -        if ( unlikely(__put_user(sval, ((unsigned long *)(
  48.159 -            &shadow_linear_pg_table[l1_linear_offset(va)])))) )
  48.160 -        {
  48.161 -            /*
  48.162 -             * Since L2's are guranteed RW, failure indicates either that the
  48.163 -             * page was not shadowed, or that the L2 entry has not yet been
  48.164 -             * updated to reflect the shadow.
  48.165 -             */
  48.166 -            if ( shadow_mode_external(current->domain) )
  48.167 -                BUG(); // can't use linear_l2_table with external tables.
  48.168 -
  48.169 -            l2_pgentry_t gpde = linear_l2_table[l2_table_offset(va)];
  48.170 -            unsigned long gpfn = l2_pgentry_val(gpde) >> PAGE_SHIFT;
  48.171 -
  48.172 -            if (get_shadow_status(d, gpfn))
  48.173 -            {
  48.174 -                unsigned long gmfn = __gpfn_to_mfn(d, gpfn);
  48.175 -                unsigned long *gl1e = map_domain_mem(gmfn << PAGE_SHIFT);
  48.176 -                unsigned l1_idx = l1_table_offset(va);
  48.177 -                gl1e[l1_idx] = sval;
  48.178 -                unmap_domain_mem(gl1e);
  48.179 -                put_shadow_status(d);
  48.180 -
  48.181 -                perfc_incrc(shadow_update_va_fail1);
  48.182 -            }
  48.183 -            else
  48.184 -                perfc_incrc(shadow_update_va_fail2);
  48.185 -        }
  48.186 -
  48.187 -        /*
  48.188 -         * If we're in log-dirty mode then we need to note that we've updated
  48.189 -         * the PTE in the PT-holding page. We need the machine frame number
  48.190 -         * for this.
  48.191 -         */
  48.192 -        if ( shadow_mode_log_dirty(d) )
  48.193 -            mark_dirty(d, va_to_l1mfn(va));
  48.194 -  
  48.195 -        check_pagetable(d, ed->arch.guest_table, "va"); /* debug */
  48.196 -    }
  48.197 +        update_shadow_va_mapping(va, val, ed, d);
  48.198  
  48.199      deferred_ops = percpu_info[cpu].deferred_ops;
  48.200      percpu_info[cpu].deferred_ops = 0;
  48.201 @@ -2003,7 +2071,7 @@ int do_update_va_mapping(unsigned long v
  48.202      
  48.203      UNLOCK_BIGLOCK(d);
  48.204  
  48.205 -    return err;
  48.206 +    return rc;
  48.207  }
  48.208  
  48.209  int do_update_va_mapping_otherdomain(unsigned long va,
  48.210 @@ -2566,6 +2634,31 @@ void ptwr_status(void)
  48.211      page = &frame_table[pfn];
  48.212  }
  48.213  
  48.214 +void audit_pagelist(struct domain *d)
  48.215 +{
  48.216 +    struct list_head *list_ent;
  48.217 +    int xenpages, totpages;
  48.218 +
  48.219 +    list_ent = d->xenpage_list.next;
  48.220 +    for ( xenpages = 0; (list_ent != &d->xenpage_list); xenpages++ )
  48.221 +    {
  48.222 +        list_ent = list_ent->next;
  48.223 +    }
  48.224 +    list_ent = d->page_list.next;
  48.225 +    for ( totpages = 0; (list_ent != &d->page_list); totpages++ )
  48.226 +    {
  48.227 +        list_ent = list_ent->next;
  48.228 +    }
  48.229 +
  48.230 +    if ( xenpages != d->xenheap_pages ||
  48.231 +         totpages != d->tot_pages )
  48.232 +    {
  48.233 +        printk("ARGH! dom %d: xen=%d %d, pages=%d %d\n",
  48.234 +               xenpages, d->xenheap_pages, 
  48.235 +               totpages, d->tot_pages );
  48.236 +    }
  48.237 +}
  48.238 +
  48.239  void audit_domain(struct domain *d)
  48.240  {
  48.241      int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0;
  48.242 @@ -2669,6 +2762,8 @@ void audit_domain(struct domain *d)
  48.243             
  48.244      spin_lock(&d->page_alloc_lock);
  48.245  
  48.246 +    audit_pagelist(d);
  48.247 +
  48.248      /* PHASE 0 */
  48.249  
  48.250      list_ent = d->page_list.next;
  48.251 @@ -2969,13 +3064,13 @@ void audit_domain(struct domain *d)
  48.252          list_ent = frame_table[pfn].list.next;
  48.253      }
  48.254  
  48.255 -    spin_unlock(&d->page_alloc_lock);
  48.256  
  48.257      if ( pagetable_val(d->exec_domain[0]->arch.guest_table) )
  48.258          adjust(&frame_table[pagetable_val(
  48.259              d->exec_domain[0]->arch.guest_table)>>PAGE_SHIFT], 1, 1);
  48.260  
  48.261 -    printk("Audit %d: Done. pages=%d l1=%d l2=%d ctot=%d ttot=%d\n", d->id, i, l1, l2, ctot, ttot );
  48.262 +    spin_unlock(&d->page_alloc_lock);
  48.263 +    printk("Audit %d: Done. ref=%d xenpages=%d pages=%d l1=%d l2=%d ctot=%d ttot=%d\n", d->id, atomic_read(&d->refcnt), d->xenheap_pages, d->tot_pages, l1, l2, ctot, ttot );
  48.264  
  48.265      if ( d != current->domain )
  48.266          domain_unpause(d);
    49.1 --- a/xen/arch/x86/time.c	Tue Mar 08 22:57:09 2005 +0000
    49.2 +++ b/xen/arch/x86/time.c	Wed Mar 09 20:19:23 2005 +0000
    49.3 @@ -13,7 +13,9 @@
    49.4   *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
    49.5   */
    49.6  
    49.7 +#include <xen/config.h>
    49.8  #include <xen/errno.h>
    49.9 +#include <xen/event.h>
   49.10  #include <xen/sched.h>
   49.11  #include <xen/lib.h>
   49.12  #include <xen/config.h>
   49.13 @@ -272,13 +274,10 @@ s_time_t get_s_time(void)
   49.14      return now; 
   49.15  }
   49.16  
   49.17 -
   49.18 -void update_dom_time(struct domain *d)
   49.19 +static inline void __update_dom_time(struct exec_domain *ed)
   49.20  {
   49.21 +    struct domain *d  = ed->domain;
   49.22      shared_info_t *si = d->shared_info;
   49.23 -    unsigned long flags;
   49.24 -
   49.25 -    read_lock_irqsave(&time_lock, flags);
   49.26  
   49.27      spin_lock(&d->time_lock);
   49.28  
   49.29 @@ -295,10 +294,21 @@ void update_dom_time(struct domain *d)
   49.30      si->time_version2++;
   49.31  
   49.32      spin_unlock(&d->time_lock);
   49.33 -
   49.34 -    read_unlock_irqrestore(&time_lock, flags);
   49.35  }
   49.36  
   49.37 +int update_dom_time(struct exec_domain *ed)
   49.38 +{
   49.39 +    unsigned long flags;
   49.40 +
   49.41 +    if ( ed->domain->shared_info->tsc_timestamp == full_tsc_irq )
   49.42 +        return 0;
   49.43 +
   49.44 +    read_lock_irqsave(&time_lock, flags);
   49.45 +    __update_dom_time(ed);
   49.46 +    read_unlock_irqrestore(&time_lock, flags);
   49.47 +
   49.48 +    return 1;
   49.49 +}
   49.50  
   49.51  /* Set clock to <secs,usecs> after 00:00:00 UTC, 1 January, 1970. */
   49.52  void do_settime(unsigned long secs, unsigned long usecs, u64 system_time_base)
   49.53 @@ -320,9 +330,11 @@ void do_settime(unsigned long secs, unsi
   49.54      wc_sec  = secs;
   49.55      wc_usec = _usecs;
   49.56  
   49.57 +    /* Others will pick up the change at the next tick. */
   49.58 +    __update_dom_time(current);
   49.59 +    send_guest_virq(current, VIRQ_TIMER);
   49.60 +
   49.61      write_unlock_irq(&time_lock);
   49.62 -
   49.63 -    update_dom_time(current->domain);
   49.64  }
   49.65  
   49.66  
    50.1 --- a/xen/arch/x86/traps.c	Tue Mar 08 22:57:09 2005 +0000
    50.2 +++ b/xen/arch/x86/traps.c	Wed Mar 09 20:19:23 2005 +0000
    50.3 @@ -52,6 +52,7 @@
    50.4  #include <asm/i387.h>
    50.5  #include <asm/debugger.h>
    50.6  #include <asm/msr.h>
    50.7 +#include <asm/x86_emulate.h>
    50.8  
    50.9  /*
   50.10   * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
   50.11 @@ -369,8 +370,6 @@ long do_fpu_taskswitch(int set)
   50.12  
   50.13  static int emulate_privileged_op(struct xen_regs *regs)
   50.14  {
   50.15 -    extern void *decode_reg(struct xen_regs *regs, u8 b);
   50.16 -
   50.17      struct exec_domain *ed = current;
   50.18      unsigned long *reg, eip = regs->eip;
   50.19      u8 opcode;
   50.20 @@ -405,7 +404,7 @@ static int emulate_privileged_op(struct 
   50.21          eip += 1;
   50.22          if ( (opcode & 0xc0) != 0xc0 )
   50.23              goto fail;
   50.24 -        reg = decode_reg(regs, opcode & 7);
   50.25 +        reg = decode_register(opcode & 7, regs, 0);
   50.26          switch ( (opcode >> 3) & 7 )
   50.27          {
   50.28          case 0: /* Read CR0 */
   50.29 @@ -433,7 +432,7 @@ static int emulate_privileged_op(struct 
   50.30          eip += 1;
   50.31          if ( (opcode & 0xc0) != 0xc0 )
   50.32              goto fail;
   50.33 -        reg = decode_reg(regs, opcode & 7);
   50.34 +        reg = decode_register(opcode & 7, regs, 0);
   50.35          switch ( (opcode >> 3) & 7 )
   50.36          {
   50.37          case 0: /* Write CR0 */
    51.1 --- a/xen/arch/x86/x86_32/seg_fixup.c	Tue Mar 08 22:57:09 2005 +0000
    51.2 +++ b/xen/arch/x86/x86_32/seg_fixup.c	Wed Mar 09 20:19:23 2005 +0000
    51.3 @@ -28,6 +28,7 @@
    51.4  #include <xen/mm.h>
    51.5  #include <xen/perfc.h>
    51.6  #include <asm/processor.h>
    51.7 +#include <asm/x86_emulate.h>
    51.8  
    51.9  /* Make the scary benign errors go away. */
   51.10  #undef  DPRINTK
   51.11 @@ -258,24 +259,6 @@ int fixup_seg(u16 seg, unsigned long off
   51.12      return 1;
   51.13  }
   51.14  
   51.15 -/* Decode Reg field of a ModRM byte: return a pointer into a register block. */
   51.16 -void *decode_reg(struct xen_regs *regs, u8 b)
   51.17 -{
   51.18 -    switch ( b & 7 )
   51.19 -    {
   51.20 -    case 0: return &regs->eax;
   51.21 -    case 1: return &regs->ecx;
   51.22 -    case 2: return &regs->edx;
   51.23 -    case 3: return &regs->ebx;
   51.24 -    case 4: return &regs->esp;
   51.25 -    case 5: return &regs->ebp;
   51.26 -    case 6: return &regs->esi;
   51.27 -    case 7: return &regs->edi;
   51.28 -    }
   51.29 -
   51.30 -    return NULL;
   51.31 -}
   51.32 -
   51.33  /*
   51.34   * Called from the general-protection fault handler to attempt to decode
   51.35   * and emulate an instruction that depends on 4GB segments.
   51.36 @@ -402,8 +385,8 @@ int gpf_emulate_4gb(struct xen_regs *reg
   51.37      }
   51.38  
   51.39      /* Decode Reg and R/M fields. */
   51.40 -    regreg = decode_reg(regs, reg);
   51.41 -    memreg = decode_reg(regs, rm);
   51.42 +    regreg = decode_register(reg, regs, 0);
   51.43 +    memreg = decode_register(rm,  regs, 0);
   51.44  
   51.45      /* Decode Mod field. */
   51.46      switch ( modrm >> 6 )
    52.1 --- a/xen/arch/x86/x86_64/entry.S	Tue Mar 08 22:57:09 2005 +0000
    52.2 +++ b/xen/arch/x86/x86_64/entry.S	Wed Mar 09 20:19:23 2005 +0000
    52.3 @@ -243,6 +243,7 @@ FLT15:  movq  %rax,(%rsi)               
    52.4          movq  %rax,XREGS_rip+8(%rsp)
    52.5          movb  $0,TRAPBOUNCE_flags(%rdx)
    52.6          testb $TF_kernel_mode,EDOMAIN_thread_flags(%rbx)
    52.7 +        movq  %rbx,%rdi # toggle_guest_mode(current)
    52.8          jz    SYMBOL_NAME(toggle_guest_mode)
    52.9          ret
   52.10  .section .fixup,"ax"
    53.1 --- a/xen/arch/x86/x86_64/traps.c	Tue Mar 08 22:57:09 2005 +0000
    53.2 +++ b/xen/arch/x86/x86_64/traps.c	Wed Mar 09 20:19:23 2005 +0000
    53.3 @@ -248,31 +248,6 @@ void __init percpu_traps_init(void)
    53.4      wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
    53.5  }
    53.6  
    53.7 -void *decode_reg(struct xen_regs *regs, u8 b)
    53.8 -{
    53.9 -    switch ( b )
   53.10 -    {
   53.11 -    case  0: return &regs->rax;
   53.12 -    case  1: return &regs->rcx;
   53.13 -    case  2: return &regs->rdx;
   53.14 -    case  3: return &regs->rbx;
   53.15 -    case  4: return &regs->rsp;
   53.16 -    case  5: return &regs->rbp;
   53.17 -    case  6: return &regs->rsi;
   53.18 -    case  7: return &regs->rdi;
   53.19 -    case  8: return &regs->r8;
   53.20 -    case  9: return &regs->r9;
   53.21 -    case 10: return &regs->r10;
   53.22 -    case 11: return &regs->r11;
   53.23 -    case 12: return &regs->r12;
   53.24 -    case 13: return &regs->r13;
   53.25 -    case 14: return &regs->r14;
   53.26 -    case 15: return &regs->r15;
   53.27 -    }
   53.28 -
   53.29 -    return NULL;
   53.30 -}
   53.31 -
   53.32  long do_set_callbacks(unsigned long event_address,
   53.33                        unsigned long failsafe_address,
   53.34                        unsigned long syscall_address)
    54.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    54.2 +++ b/xen/arch/x86/x86_emulate.c	Wed Mar 09 20:19:23 2005 +0000
    54.3 @@ -0,0 +1,968 @@
    54.4 +/******************************************************************************
    54.5 + * x86_emulate.c
    54.6 + * 
    54.7 + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
    54.8 + * 
    54.9 + * Copyright (c) 2005 Keir Fraser
   54.10 + */
   54.11 +
   54.12 +#ifdef __TEST_HARNESS__
   54.13 +#include <stdio.h>
   54.14 +#include <stdint.h>
   54.15 +typedef uint8_t            u8;
   54.16 +typedef uint16_t           u16;
   54.17 +typedef uint32_t           u32;
   54.18 +typedef uint64_t           u64;
   54.19 +typedef int8_t             s8;
   54.20 +typedef int16_t            s16;
   54.21 +typedef int32_t            s32;
   54.22 +typedef int64_t            s64;
   54.23 +#include <public/xen.h>
   54.24 +#else
   54.25 +#include <xen/config.h>
   54.26 +#include <xen/types.h>
   54.27 +#include <xen/lib.h>
   54.28 +#include <asm/regs.h>
   54.29 +#endif
   54.30 +#include <asm-x86/x86_emulate.h>
   54.31 +
   54.32 +/*
   54.33 + * Opcode effective-address decode tables.
   54.34 + * Note that we only emulate instructions that have at least one memory
   54.35 + * operand (excluding implicit stack references). We assume that stack
   54.36 + * references and instruction fetches will never occur in special memory
   54.37 + * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
   54.38 + * not be handled.
   54.39 + */
   54.40 +
   54.41 +/* Operand sizes: 8-bit operands or specified/overridden size. */
   54.42 +#define ByteOp      (1<<0) /* 8-bit operands. */
   54.43 +/* Destination operand type. */
   54.44 +#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
   54.45 +#define DstReg      (2<<1) /* Register operand. */
   54.46 +#define DstMem      (3<<1) /* Memory operand. */
   54.47 +#define DstMask     (3<<1)
   54.48 +/* Source operand type. */
   54.49 +#define SrcNone     (0<<3) /* No source operand. */
   54.50 +#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
   54.51 +#define SrcReg      (1<<3) /* Register operand. */
   54.52 +#define SrcMem      (2<<3) /* Memory operand. */
   54.53 +#define SrcImm      (3<<3) /* Immediate operand. */
   54.54 +#define SrcImmByte  (4<<3) /* 8-bit sign-extended immediate operand. */
   54.55 +#define SrcMask     (7<<3)
   54.56 +/* Generic ModRM decode. */
   54.57 +#define ModRM       (1<<6)
   54.58 +/* Destination is only written; never read. */
   54.59 +#define Mov         (1<<7)
   54.60 +
   54.61 +static u8 opcode_table[256] = {
   54.62 +    /* 0x00 - 0x07 */
   54.63 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.64 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.65 +    0, 0, 0, 0,
   54.66 +    /* 0x08 - 0x0F */
   54.67 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.68 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.69 +    0, 0, 0, 0,
   54.70 +    /* 0x10 - 0x17 */
   54.71 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.72 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.73 +    0, 0, 0, 0,
   54.74 +    /* 0x18 - 0x1F */
   54.75 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.76 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.77 +    0, 0, 0, 0,
   54.78 +    /* 0x20 - 0x27 */
   54.79 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.80 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.81 +    0, 0, 0, 0,
   54.82 +    /* 0x28 - 0x2F */
   54.83 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.84 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.85 +    0, 0, 0, 0,
   54.86 +    /* 0x30 - 0x37 */
   54.87 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.88 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.89 +    0, 0, 0, 0,
   54.90 +    /* 0x38 - 0x3F */
   54.91 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
   54.92 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
   54.93 +    0, 0, 0, 0,
   54.94 +    /* 0x40 - 0x4F */
   54.95 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   54.96 +    /* 0x50 - 0x5F */
   54.97 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   54.98 +    /* 0x60 - 0x6F */
   54.99 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.100 +    /* 0x70 - 0x7F */
  54.101 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.102 +    /* 0x80 - 0x87 */
  54.103 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  54.104 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
  54.105 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  54.106 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  54.107 +    /* 0x88 - 0x8F */
  54.108 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
  54.109 +    ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
  54.110 +    0, 0, 0, DstMem|SrcNone|ModRM|Mov,
  54.111 +    /* 0x90 - 0x9F */
  54.112 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.113 +    /* 0xA0 - 0xA7 */
  54.114 +    ByteOp|DstReg|SrcMem|Mov, DstReg|SrcMem|Mov,
  54.115 +    ByteOp|DstMem|SrcReg|Mov, DstMem|SrcReg|Mov,
  54.116 +    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  54.117 +    ByteOp|ImplicitOps, ImplicitOps,
  54.118 +    /* 0xA8 - 0xAF */
  54.119 +    0, 0, ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  54.120 +    ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
  54.121 +    ByteOp|ImplicitOps, ImplicitOps,
  54.122 +    /* 0xB0 - 0xBF */
  54.123 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.124 +    /* 0xC0 - 0xC7 */
  54.125 +    ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
  54.126 +    0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
  54.127 +    /* 0xC8 - 0xCF */
  54.128 +    0, 0, 0, 0, 0, 0, 0, 0,
  54.129 +    /* 0xD0 - 0xD7 */
  54.130 +    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  54.131 +    ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM, 
  54.132 +    0, 0, 0, 0,
  54.133 +    /* 0xD8 - 0xDF */
  54.134 +    0, 0, 0, 0, 0, 0, 0, 0,
  54.135 +    /* 0xE0 - 0xEF */
  54.136 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.137 +    /* 0xF0 - 0xF7 */
  54.138 +    0, 0, 0, 0,
  54.139 +    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM,
  54.140 +    /* 0xF8 - 0xFF */
  54.141 +    0, 0, 0, 0,
  54.142 +    0, 0, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
  54.143 +};
  54.144 +
  54.145 +static u8 twobyte_table[256] = {
  54.146 +    /* 0x00 - 0x0F */
  54.147 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0,
  54.148 +    /* 0x10 - 0x1F */
  54.149 +    0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps|ModRM, 0, 0, 0, 0, 0, 0, 0,
  54.150 +    /* 0x20 - 0x2F */
  54.151 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.152 +    /* 0x30 - 0x3F */
  54.153 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.154 +    /* 0x40 - 0x47 */
  54.155 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.156 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.157 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.158 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.159 +    /* 0x48 - 0x4F */
  54.160 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.161 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.162 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.163 +    DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
  54.164 +    /* 0x50 - 0x5F */
  54.165 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.166 +    /* 0x60 - 0x6F */
  54.167 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.168 +    /* 0x70 - 0x7F */
  54.169 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.170 +    /* 0x80 - 0x8F */
  54.171 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.172 +    /* 0x90 - 0x9F */
  54.173 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.174 +    /* 0xA0 - 0xA7 */
  54.175 +    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0, 
  54.176 +    /* 0xA8 - 0xAF */
  54.177 +    0, 0, 0, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
  54.178 +    /* 0xB0 - 0xB7 */
  54.179 +    ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, DstMem|SrcReg|ModRM,
  54.180 +    0, 0, 0, 0,
  54.181 +    /* 0xB8 - 0xBF */
  54.182 +    0, 0, DstMem|SrcImmByte|ModRM, DstMem|SrcReg|ModRM, 0, 0, 0, 0,
  54.183 +    /* 0xC0 - 0xCF */
  54.184 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.185 +    /* 0xD0 - 0xDF */
  54.186 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.187 +    /* 0xE0 - 0xEF */
  54.188 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  54.189 +    /* 0xF0 - 0xFF */
  54.190 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  54.191 +};
  54.192 +
  54.193 +/* Type, address-of, and value of an instruction's operand. */
  54.194 +struct operand {
  54.195 +    enum { OP_REG, OP_MEM, OP_IMM } type;
  54.196 +    unsigned int  bytes;
  54.197 +    unsigned long val, orig_val, *ptr;
  54.198 +};
  54.199 +
  54.200 +/* EFLAGS bit definitions. */
  54.201 +#define EFLG_OF (1<<11)
  54.202 +#define EFLG_DF (1<<10)
  54.203 +#define EFLG_SF (1<<7)
  54.204 +#define EFLG_ZF (1<<6)
  54.205 +#define EFLG_AF (1<<4)
  54.206 +#define EFLG_PF (1<<2)
  54.207 +#define EFLG_CF (1<<0)
  54.208 +
  54.209 +/*
  54.210 + * Instruction emulation:
  54.211 + * Most instructions are emulated directly via a fragment of inline assembly
  54.212 + * code. This allows us to save/restore EFLAGS and thus very easily pick up
  54.213 + * any modified flags.
  54.214 + */
  54.215 +
  54.216 +#if defined(__x86_64__)
  54.217 +#define _LO32 "k"          /* force 32-bit operand */
  54.218 +#define _STK  "%%rsp"      /* stack pointer */
  54.219 +#elif defined(__i386__)
  54.220 +#define _LO32 ""           /* force 32-bit operand */
  54.221 +#define _STK  "%%esp"      /* stack pointer */
  54.222 +#endif
  54.223 +
  54.224 +/*
  54.225 + * These EFLAGS bits are restored from saved value during emulation, and
  54.226 + * any changes are written back to the saved value after emulation.
  54.227 + */
  54.228 +#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
  54.229 +
  54.230 +/* Before executing instruction: restore necessary bits in EFLAGS. */
  54.231 +/* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~msk; */
  54.232 +#define _PRE_EFLAGS(_sav, _msk, _tmp)           \
  54.233 +"push %"_sav"; "                                \
  54.234 +"movl %"_msk",%"_LO32 _tmp"; "                  \
  54.235 +"andl %"_LO32 _tmp",("_STK"); "                 \
  54.236 +"notl %"_LO32 _tmp"; "                          \
  54.237 +"andl %"_LO32 _tmp",%"_sav"; "                  \
  54.238 +"pushf; "                                       \
  54.239 +"andl %"_LO32 _tmp",("_STK"); "                 \
  54.240 +"pop  %"_tmp"; "                                \
  54.241 +"orl  %"_LO32 _tmp",("_STK"); "                 \
  54.242 +"popf; "
  54.243 +
  54.244 +/* After executing instruction: write-back necessary bits in EFLAGS. */
  54.245 +/* _sav |= EFLAGS & _msk; */
  54.246 +#define _POST_EFLAGS(_sav, _msk, _tmp)          \
  54.247 +"pushf; "                                       \
  54.248 +"pop  %"_tmp"; "                                \
  54.249 +"andl %"_msk",%"_LO32 _tmp"; "                  \
  54.250 +"orl  %"_LO32 _tmp",%"_sav"; "
  54.251 +
  54.252 +/* Raw emulation: instruction has two explicit operands. */
  54.253 +#define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy)\
  54.254 +do{ unsigned long _tmp;                                                    \
  54.255 +    switch ( (_dst).bytes )                                                \
  54.256 +    {                                                                      \
  54.257 +    case 2:                                                                \
  54.258 +        __asm__ __volatile__ (                                             \
  54.259 +            _PRE_EFLAGS("0","4","2")                                       \
  54.260 +            _op"w %"_wx"3,%1; "                                            \
  54.261 +            _POST_EFLAGS("0","4","2")                                      \
  54.262 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.263 +            : _wy ((_src).val), "i" (EFLAGS_MASK) );                       \
  54.264 +        break;                                                             \
  54.265 +    case 4:                                                                \
  54.266 +        __asm__ __volatile__ (                                             \
  54.267 +            _PRE_EFLAGS("0","4","2")                                       \
  54.268 +            _op"l %"_lx"3,%1; "                                            \
  54.269 +            _POST_EFLAGS("0","4","2")                                      \
  54.270 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.271 +            : _ly ((_src).val), "i" (EFLAGS_MASK) );                       \
  54.272 +        break;                                                             \
  54.273 +    case 8:                                                                \
  54.274 +        __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy);           \
  54.275 +        break;                                                             \
  54.276 +    }                                                                      \
  54.277 +} while (0)
  54.278 +#define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
  54.279 +do{ unsigned long _tmp;                                                    \
  54.280 +    switch ( (_dst).bytes )                                                \
  54.281 +    {                                                                      \
  54.282 +    case 1:                                                                \
  54.283 +        __asm__ __volatile__ (                                             \
  54.284 +            _PRE_EFLAGS("0","4","2")                                       \
  54.285 +            _op"b %"_bx"3,%1; "                                            \
  54.286 +            _POST_EFLAGS("0","4","2")                                      \
  54.287 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.288 +            : _by ((_src).val), "i" (EFLAGS_MASK) );                       \
  54.289 +        break;                                                             \
  54.290 +    default:                                                               \
  54.291 +        __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy);\
  54.292 +        break;                                                             \
  54.293 +    }                                                                      \
  54.294 +} while (0)
  54.295 +/* Source operand is byte-sized and may be restricted to just %cl. */
  54.296 +#define emulate_2op_SrcB(_op, _src, _dst, _eflags)                         \
  54.297 +    __emulate_2op(_op, _src, _dst, _eflags,                                \
  54.298 +                  "b", "c", "b", "c", "b", "c", "b", "c")
  54.299 +/* Source operand is byte, word, long or quad sized. */
  54.300 +#define emulate_2op_SrcV(_op, _src, _dst, _eflags)                         \
  54.301 +    __emulate_2op(_op, _src, _dst, _eflags,                                \
  54.302 +                  "b", "q", "w", "r", _LO32, "r", "", "r")
  54.303 +/* Source operand is word, long or quad sized. */
  54.304 +#define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)                  \
  54.305 +    __emulate_2op_nobyte(_op, _src, _dst, _eflags,                         \
  54.306 +                  "w", "r", _LO32, "r", "", "r")
  54.307 +
  54.308 +/* Instruction has only one explicit operand (no source operand). */
  54.309 +#define emulate_1op(_op,_dst,_eflags)                                      \
  54.310 +do{ unsigned long _tmp;                                                    \
  54.311 +    switch ( (_dst).bytes )                                                \
  54.312 +    {                                                                      \
  54.313 +    case 1:                                                                \
  54.314 +        __asm__ __volatile__ (                                             \
  54.315 +            _PRE_EFLAGS("0","3","2")                                       \
  54.316 +            _op"b %1; "                                                    \
  54.317 +            _POST_EFLAGS("0","3","2")                                      \
  54.318 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.319 +            : "i" (EFLAGS_MASK) );                                         \
  54.320 +        break;                                                             \
  54.321 +    case 2:                                                                \
  54.322 +        __asm__ __volatile__ (                                             \
  54.323 +            _PRE_EFLAGS("0","3","2")                                       \
  54.324 +            _op"w %1; "                                                    \
  54.325 +            _POST_EFLAGS("0","3","2")                                      \
  54.326 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.327 +            : "i" (EFLAGS_MASK) );                                         \
  54.328 +        break;                                                             \
  54.329 +    case 4:                                                                \
  54.330 +        __asm__ __volatile__ (                                             \
  54.331 +            _PRE_EFLAGS("0","3","2")                                       \
  54.332 +            _op"l %1; "                                                    \
  54.333 +            _POST_EFLAGS("0","3","2")                                      \
  54.334 +            : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)              \
  54.335 +            : "i" (EFLAGS_MASK) );                                         \
  54.336 +        break;                                                             \
  54.337 +    case 8:                                                                \
  54.338 +        __emulate_1op_8byte(_op, _dst, _eflags);                           \
  54.339 +        break;                                                             \
  54.340 +    }                                                                      \
  54.341 +} while (0)
  54.342 +
  54.343 +/* Emulate an instruction with quadword operands (x86/64 only). */
  54.344 +#if defined(__x86_64__)
  54.345 +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)         \
  54.346 +do{ __asm__ __volatile__ (                                              \
  54.347 +        _PRE_EFLAGS("0","4","2")                                        \
  54.348 +        _op"q %"_qx"3,%1; "                                             \
  54.349 +        _POST_EFLAGS("0","4","2")                                       \
  54.350 +        : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)               \
  54.351 +        : _qy ((_src).val), "i" (EFLAGS_MASK) );                        \
  54.352 +} while (0)
  54.353 +#define __emulate_1op_8byte(_op, _dst, _eflags)                         \
  54.354 +do{ __asm__ __volatile__ (                                              \
  54.355 +        _PRE_EFLAGS("0","3","2")                                        \
  54.356 +        _op"q %1; "                                                     \
  54.357 +        _POST_EFLAGS("0","3","2")                                       \
  54.358 +        : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp)               \
  54.359 +        : "i" (EFLAGS_MASK) );                                          \
  54.360 +} while (0)
  54.361 +#elif defined(__i386__)
  54.362 +#define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy)
  54.363 +#define __emulate_1op_8byte(_op, _dst, _eflags)
  54.364 +#endif /* __i386__ */
  54.365 +
  54.366 +/* Fetch next part of the instruction being emulated. */
  54.367 +#define insn_fetch(_type, _size, _eip) \
  54.368 +({ unsigned long _x; \
  54.369 +   if ( ops->read_std((unsigned long)(_eip), &_x, (_size)) ) \
  54.370 +       goto done; \
  54.371 +   (_eip) += (_size); \
  54.372 +   (_type)_x; \
  54.373 +})
  54.374 +
  54.375 +#define DPRINTF(_f, _a...) printf( _f , ## _a )
  54.376 +
  54.377 +void *
  54.378 +decode_register(
  54.379 +    u8 modrm_reg, struct xen_regs *regs, int highbyte_regs)
  54.380 +{
  54.381 +    void *p;
  54.382 +
  54.383 +    switch ( modrm_reg )
  54.384 +    {
  54.385 +    case  0: p = &regs->eax; break;
  54.386 +    case  1: p = &regs->ecx; break;
  54.387 +    case  2: p = &regs->edx; break;
  54.388 +    case  3: p = &regs->ebx; break;
  54.389 +    case  4: p = (highbyte_regs ?
  54.390 +                  ((unsigned char *)&regs->eax + 1) : 
  54.391 +                  (unsigned char *)&regs->esp); break;
  54.392 +    case  5: p = (highbyte_regs ?
  54.393 +                  ((unsigned char *)&regs->ecx + 1) : 
  54.394 +                  (unsigned char *)&regs->ebp); break;
  54.395 +    case  6: p = (highbyte_regs ?
  54.396 +                  ((unsigned char *)&regs->edx + 1) : 
  54.397 +                  (unsigned char *)&regs->esi); break;
  54.398 +    case  7: p = (highbyte_regs ?
  54.399 +                  ((unsigned char *)&regs->ebx + 1) : 
  54.400 +                  (unsigned char *)&regs->edi); break;
  54.401 +#if defined(__x86_64__)
  54.402 +    case  8: p = &regs->r8;  break;
  54.403 +    case  9: p = &regs->r9;  break;
  54.404 +    case 10: p = &regs->r10; break;
  54.405 +    case 11: p = &regs->r11; break;
  54.406 +    case 12: p = &regs->r12; break;
  54.407 +    case 13: p = &regs->r13; break;
  54.408 +    case 14: p = &regs->r14; break;
  54.409 +    case 15: p = &regs->r15; break;
  54.410 +#endif
  54.411 +    default: p = NULL; break;
  54.412 +    }
  54.413 +
  54.414 +    return p;
  54.415 +}
  54.416 +
  54.417 +int 
  54.418 +x86_emulate_memop(
  54.419 +    struct xen_regs *regs,
  54.420 +    unsigned long cr2,
  54.421 +    struct x86_mem_emulator *ops,
  54.422 +    int mode)
  54.423 +{
  54.424 +    u8 b, d, sib, twobyte = 0, rex_prefix = 0;
  54.425 +    u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
  54.426 +    unsigned int op_bytes = (mode == 8) ? 4 : mode, ad_bytes = mode;
  54.427 +    unsigned int lock_prefix = 0, rep_prefix = 0;
  54.428 +    struct operand src, dst;
  54.429 +
  54.430 +    /* Shadow copy of register state. Committed on successful emulation. */
  54.431 +    struct xen_regs _regs = *regs;
  54.432 +
  54.433 +    /* Legacy prefixes. */
  54.434 + next_prefix:
  54.435 +    switch ( b = insn_fetch(u8, 1, _regs.eip) )
  54.436 +    {
  54.437 +    case 0x66: /* operand-size override */
  54.438 +        op_bytes ^= 6;                    /* switch between 2/4 bytes */
  54.439 +        goto next_prefix;
  54.440 +    case 0x67: /* address-size override */
  54.441 +        ad_bytes ^= (mode == 8) ? 12 : 6; /* switch between 2/4 or 4/8 bytes */
  54.442 +        goto next_prefix;
  54.443 +    case 0x2e: /* CS override */
  54.444 +    case 0x3e: /* DS override */
  54.445 +    case 0x26: /* ES override */
  54.446 +    case 0x64: /* FS override */
  54.447 +    case 0x65: /* GS override */
  54.448 +    case 0x36: /* SS override */
  54.449 +        DPRINTF("Warning: ignoring a segment override. Probably okay. :-)\n");
  54.450 +        goto next_prefix;
  54.451 +    case 0xf0: /* LOCK */
  54.452 +        lock_prefix = 1;
  54.453 +        goto next_prefix;
  54.454 +    case 0xf3: /* REP/REPE/REPZ */
  54.455 +        rep_prefix = 1;
  54.456 +        goto next_prefix;
  54.457 +    case 0xf2: /* REPNE/REPNZ */
  54.458 +        goto next_prefix;
  54.459 +    }
  54.460 +
  54.461 +    if ( ad_bytes == 2 )
  54.462 +    {
  54.463 +        DPRINTF("Cannot parse 16-bit effective addresses.\n");
  54.464 +        goto cannot_emulate;
  54.465 +    }
  54.466 +
  54.467 +    /* REX prefix. */
  54.468 +    if ( (mode == 8) && ((b & 0xf0) == 0x40) )
  54.469 +    {
  54.470 +        rex_prefix = b;
  54.471 +        if ( b & 8 )
  54.472 +            op_bytes = 8;          /* REX.W */
  54.473 +        modrm_reg = (b & 4) << 1;  /* REX.R */
  54.474 +        /* REX.B and REX.X do not need to be decoded. */
  54.475 +        b = insn_fetch(u8, 1, _regs.eip);
  54.476 +    }
  54.477 +
  54.478 +    /* Opcode byte(s). */
  54.479 +    d = opcode_table[b];
  54.480 +    if ( d == 0 )
  54.481 +    {
  54.482 +        /* Two-byte opcode? */
  54.483 +        if ( b == 0x0f )
  54.484 +        {
  54.485 +            twobyte = 1;
  54.486 +            b = insn_fetch(u8, 1, _regs.eip);
  54.487 +            d = twobyte_table[b];
  54.488 +        }
  54.489 +
  54.490 +        /* Unrecognised? */
  54.491 +        if ( d == 0 )
  54.492 +            goto cannot_emulate;
  54.493 +    }
  54.494 +
  54.495 +    /* ModRM and SIB bytes. */
  54.496 +    if ( d & ModRM )
  54.497 +    {
  54.498 +        modrm = insn_fetch(u8, 1, _regs.eip);
  54.499 +        modrm_mod |= (modrm & 0xc0) >> 6;
  54.500 +        modrm_reg |= (modrm & 0x38) >> 3;
  54.501 +        modrm_rm  |= (modrm & 0x07);
  54.502 +        switch ( modrm_mod )
  54.503 +        {
  54.504 +        case 0:
  54.505 +            if ( (modrm_rm == 4) && 
  54.506 +                 (((sib = insn_fetch(u8, 1, _regs.eip)) & 7) == 5) )
  54.507 +                _regs.eip += 4; /* skip disp32 specified by SIB.base */
  54.508 +            else if ( modrm_rm == 5 )
  54.509 +                _regs.eip += 4; /* skip disp32 */
  54.510 +            break;
  54.511 +        case 1:
  54.512 +            if ( modrm_rm == 4 )
  54.513 +                sib = insn_fetch(u8, 1, _regs.eip);
  54.514 +            _regs.eip += 1; /* skip disp8 */
  54.515 +            break;
  54.516 +        case 2:
  54.517 +            if ( modrm_rm == 4 )
  54.518 +                sib = insn_fetch(u8, 1, _regs.eip);
  54.519 +            _regs.eip += 4; /* skip disp32 */
  54.520 +            break;
  54.521 +        case 3:
  54.522 +            DPRINTF("Cannot parse ModRM.mod == 3.\n");
  54.523 +            goto cannot_emulate;
  54.524 +        }
  54.525 +    }
  54.526 +
  54.527 +    /* Decode and fetch the destination operand: register or memory. */
  54.528 +    switch ( d & DstMask )
  54.529 +    {
  54.530 +    case ImplicitOps:
  54.531 +        /* Special instructions do their own operand decoding. */
  54.532 +        goto special_insn;
  54.533 +    case DstReg:
  54.534 +        dst.type = OP_REG;
  54.535 +        if ( d & ByteOp )
  54.536 +        {
  54.537 +            dst.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
  54.538 +            dst.val = *(u8 *)dst.ptr;
  54.539 +            dst.bytes = 1;
  54.540 +        }
  54.541 +        else
  54.542 +        {
  54.543 +            dst.ptr = decode_register(modrm_reg, &_regs, 0);
  54.544 +            switch ( (dst.bytes = op_bytes) )
  54.545 +            {
  54.546 +            case 2: dst.val = *(u16 *)dst.ptr; break;
  54.547 +            case 4: dst.val = *(u32 *)dst.ptr; break;
  54.548 +            case 8: dst.val = *(u64 *)dst.ptr; break;
  54.549 +            }
  54.550 +        }
  54.551 +        break;
  54.552 +    case DstMem:
  54.553 +        dst.type  = OP_MEM;
  54.554 +        dst.ptr   = (unsigned long *)cr2;
  54.555 +        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.556 +        if ( !(d & Mov) && /* optimisation - avoid slow emulated read */
  54.557 +             ops->read_emulated((unsigned long)dst.ptr, &dst.val, dst.bytes) )
  54.558 +             goto done;
  54.559 +        break;
  54.560 +    }
  54.561 +    dst.orig_val = dst.val;
  54.562 +
  54.563 +    /* Decode and fetch the source operand: register, memory or immediate. */
  54.564 +    switch ( d & SrcMask )
  54.565 +    {
  54.566 +    case SrcNone:
  54.567 +        break;
  54.568 +    case SrcReg:
  54.569 +        src.type = OP_REG;
  54.570 +        if ( d & ByteOp )
  54.571 +        {
  54.572 +            src.ptr = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
  54.573 +            src.val = src.orig_val = *(u8 *)src.ptr;
  54.574 +            src.bytes = 1;
  54.575 +        }
  54.576 +        else
  54.577 +        {
  54.578 +            src.ptr = decode_register(modrm_reg, &_regs, 0);
  54.579 +            switch ( (src.bytes = op_bytes) )
  54.580 +            {
  54.581 +            case 2: src.val = src.orig_val = *(u16 *)src.ptr; break;
  54.582 +            case 4: src.val = src.orig_val = *(u32 *)src.ptr; break;
  54.583 +            case 8: src.val = src.orig_val = *(u64 *)src.ptr; break;
  54.584 +            }
  54.585 +        }
  54.586 +        break;
  54.587 +    case SrcMem:
  54.588 +        src.type  = OP_MEM;
  54.589 +        src.ptr   = (unsigned long *)cr2;
  54.590 +        src.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.591 +        if ( ops->read_emulated((unsigned long)src.ptr, &src.val, src.bytes) )
  54.592 +            goto done;
  54.593 +        src.orig_val = src.val;
  54.594 +        break;
  54.595 +    case SrcImm:
  54.596 +        src.type  = OP_IMM;
  54.597 +        src.ptr   = (unsigned long *)_regs.eip;
  54.598 +        src.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.599 +        if ( src.bytes == 8 ) src.bytes = 4;
  54.600 +        /* NB. Immediates are sign-extended as necessary. */
  54.601 +        switch ( src.bytes )
  54.602 +        {
  54.603 +        case 1: src.val = insn_fetch(s8,  1, _regs.eip); break;
  54.604 +        case 2: src.val = insn_fetch(s16, 2, _regs.eip); break;
  54.605 +        case 4: src.val = insn_fetch(s32, 4, _regs.eip); break;
  54.606 +        }
  54.607 +        break;
  54.608 +    case SrcImmByte:
  54.609 +        src.type  = OP_IMM;
  54.610 +        src.ptr   = (unsigned long *)_regs.eip;
  54.611 +        src.bytes = 1;
  54.612 +        src.val   = insn_fetch(s8,  1, _regs.eip);
  54.613 +        break;
  54.614 +    }
  54.615 +
  54.616 +    if ( twobyte )
  54.617 +        goto twobyte_insn;
  54.618 +
  54.619 +    switch ( b )
  54.620 +    {
  54.621 +    case 0x00 ... 0x05: add: /* add */
  54.622 +        emulate_2op_SrcV("add", src, dst, _regs.eflags);
  54.623 +        break;
  54.624 +    case 0x08 ... 0x0d: or:  /* or */
  54.625 +        emulate_2op_SrcV("or", src, dst, _regs.eflags);
  54.626 +        break;
  54.627 +    case 0x10 ... 0x15: adc: /* adc */
  54.628 +        emulate_2op_SrcV("adc", src, dst, _regs.eflags);
  54.629 +        break;
  54.630 +    case 0x18 ... 0x1d: sbb: /* sbb */
  54.631 +        emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
  54.632 +        break;
  54.633 +    case 0x20 ... 0x25: and: /* and */
  54.634 +        emulate_2op_SrcV("and", src, dst, _regs.eflags);
  54.635 +        break;
  54.636 +    case 0x28 ... 0x2d: sub: /* sub */
  54.637 +        emulate_2op_SrcV("sub", src, dst, _regs.eflags);
  54.638 +        break;
  54.639 +    case 0x30 ... 0x35: xor: /* xor */
  54.640 +        emulate_2op_SrcV("xor", src, dst, _regs.eflags);
  54.641 +        break;
  54.642 +    case 0x38 ... 0x3d: cmp: /* cmp */
  54.643 +        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
  54.644 +        break;
  54.645 +    case 0x80 ... 0x83: /* Grp1 */
  54.646 +        switch ( modrm_reg )
  54.647 +        {
  54.648 +        case 0: goto add;
  54.649 +        case 1: goto or;
  54.650 +        case 2: goto adc;
  54.651 +        case 3: goto sbb;
  54.652 +        case 4: goto and;
  54.653 +        case 5: goto sub;
  54.654 +        case 6: goto xor;
  54.655 +        case 7: goto cmp;
  54.656 +        }
  54.657 +        break;
  54.658 +    case 0x84 ... 0x85: test: /* test */
  54.659 +        emulate_2op_SrcV("test", src, dst, _regs.eflags);
  54.660 +        break;
  54.661 +    case 0x86 ... 0x87: /* xchg */
  54.662 +        src.val ^= dst.val;
  54.663 +        dst.val ^= src.val;
  54.664 +        src.val ^= dst.val;
  54.665 +        break;
  54.666 +    case 0xa0 ... 0xa1: /* mov */
  54.667 +        dst.ptr = (unsigned long *)&_regs.eax;
  54.668 +        dst.val = src.val;
  54.669 +        _regs.eip += ad_bytes; /* skip src displacement */
  54.670 +        break;
  54.671 +    case 0xa2 ... 0xa3: /* mov */
  54.672 +        dst.val = (unsigned long)_regs.eax;
  54.673 +        _regs.eip += ad_bytes; /* skip dst displacement */
  54.674 +        break;
  54.675 +    case 0x88 ... 0x8b: /* mov */
  54.676 +    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
  54.677 +        dst.val = src.val;
  54.678 +        break;
  54.679 +    case 0x8f: /* pop (sole member of Grp1a) */
  54.680 +        /* 64-bit mode: POP defaults to 64-bit operands. */
  54.681 +        if ( (mode == 8) && (dst.bytes == 4) )
  54.682 +            dst.bytes = 8;
  54.683 +        if ( ops->read_std(_regs.esp, &dst.val, dst.bytes) )
  54.684 +            goto done;
  54.685 +        _regs.esp += dst.bytes;
  54.686 +        break;
  54.687 +    case 0xc0 ... 0xc1: grp2: /* Grp2 */
  54.688 +        switch ( modrm_reg )
  54.689 +        {
  54.690 +        case 0: /* rol */
  54.691 +            emulate_2op_SrcB("rol", src, dst, _regs.eflags);
  54.692 +            break;
  54.693 +        case 1: /* ror */
  54.694 +            emulate_2op_SrcB("ror", src, dst, _regs.eflags);
  54.695 +            break;
  54.696 +        case 2: /* rcl */
  54.697 +            emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
  54.698 +            break;
  54.699 +        case 3: /* rcr */
  54.700 +            emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
  54.701 +            break;
  54.702 +        case 4: /* sal/shl */
  54.703 +        case 6: /* sal/shl */
  54.704 +            emulate_2op_SrcB("sal", src, dst, _regs.eflags);
  54.705 +            break;
  54.706 +        case 5: /* shr */
  54.707 +            emulate_2op_SrcB("shr", src, dst, _regs.eflags);
  54.708 +            break;
  54.709 +        case 7: /* sar */
  54.710 +            emulate_2op_SrcB("sar", src, dst, _regs.eflags);
  54.711 +            break;
  54.712 +        }
  54.713 +        break;
  54.714 +    case 0xd0 ... 0xd1: /* Grp2 */
  54.715 +        src.val = 1;
  54.716 +        goto grp2;
  54.717 +    case 0xd2 ... 0xd3: /* Grp2 */
  54.718 +        src.val = _regs.ecx;
  54.719 +        goto grp2;
  54.720 +    case 0xf6 ... 0xf7: /* Grp3 */
  54.721 +        switch ( modrm_reg )
  54.722 +        {
  54.723 +        case 0 ... 1: /* test */
  54.724 +            /* Special case in Grp3: test has an immediate source operand. */
  54.725 +            src.type = OP_IMM;
  54.726 +            src.ptr  = (unsigned long *)_regs.eip;
  54.727 +            src.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.728 +            if ( src.bytes == 8 ) src.bytes = 4;
  54.729 +            switch ( src.bytes )
  54.730 +            {
  54.731 +            case 1: src.val = insn_fetch(s8,  1, _regs.eip); break;
  54.732 +            case 2: src.val = insn_fetch(s16, 2, _regs.eip); break;
  54.733 +            case 4: src.val = insn_fetch(s32, 4, _regs.eip); break;
  54.734 +            }
  54.735 +            goto test;
  54.736 +        case 2: /* not */
  54.737 +            dst.val = ~dst.val;
  54.738 +            break;
  54.739 +        case 3: /* neg */
  54.740 +            emulate_1op("neg", dst, _regs.eflags);
  54.741 +            break;
  54.742 +        default:
  54.743 +            goto cannot_emulate;
  54.744 +        }
  54.745 +        break;
  54.746 +    case 0xfe ... 0xff: /* Grp4/Grp5 */
  54.747 +        switch ( modrm_reg )
  54.748 +        {
  54.749 +        case 0: /* inc */
  54.750 +            emulate_1op("inc", dst, _regs.eflags);
  54.751 +            break;
  54.752 +        case 1: /* dec */
  54.753 +            emulate_1op("dec", dst, _regs.eflags);
  54.754 +            break;
  54.755 +        case 6: /* push */
  54.756 +            /* 64-bit mode: PUSH defaults to 64-bit operands. */
  54.757 +            if ( (mode == 8) && (dst.bytes == 4) )
  54.758 +            {
  54.759 +                dst.bytes = 8;
  54.760 +                if ( ops->read_std((unsigned long)dst.ptr, &dst.val, 8) )
  54.761 +                    goto done;
  54.762 +            }
  54.763 +            _regs.esp -= dst.bytes;
  54.764 +            if ( ops->write_std(_regs.esp, dst.val, dst.bytes) )
  54.765 +                goto done;
  54.766 +            dst.val = dst.orig_val; /* skanky: disable writeback */
  54.767 +            break;
  54.768 +        default:
  54.769 +            goto cannot_emulate;
  54.770 +        }
  54.771 +        break;
  54.772 +    }
  54.773 +
  54.774 + writeback:
  54.775 +    if ( (d & Mov) || (dst.orig_val != dst.val) )
  54.776 +    {
  54.777 +        switch ( dst.type )
  54.778 +        {
  54.779 +        case OP_REG:
  54.780 +            /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
  54.781 +            switch ( dst.bytes )
  54.782 +            {
  54.783 +            case 1: *(u8  *)dst.ptr = (u8)dst.val; break;
  54.784 +            case 2: *(u16 *)dst.ptr = (u16)dst.val; break;
  54.785 +            case 4: *dst.ptr = (u32)dst.val; break; /* 64b mode: zero-extend */
  54.786 +            case 8: *dst.ptr = dst.val; break;
  54.787 +            }
  54.788 +            break;
  54.789 +        case OP_MEM:
  54.790 +            if ( lock_prefix )
  54.791 +            {
  54.792 +                unsigned long seen;
  54.793 +                if ( ops->cmpxchg_emulated((unsigned long)dst.ptr,
  54.794 +                                           dst.orig_val, dst.val,
  54.795 +                                           &seen, dst.bytes) )
  54.796 +                    goto done;
  54.797 +                if ( seen != dst.orig_val )
  54.798 +                    goto done; /* Try again... */
  54.799 +            }
  54.800 +            else
  54.801 +            {
  54.802 +                if ( ops->write_emulated((unsigned long)dst.ptr,
  54.803 +                                         dst.val, dst.bytes) )
  54.804 +                    goto done;
  54.805 +            }
  54.806 +            break;
  54.807 +        default:
  54.808 +            break;
  54.809 +        }
  54.810 +    }
  54.811 +
  54.812 +    /* Commit shadow register state. */
  54.813 +    *regs = _regs;
  54.814 +
  54.815 + done:
  54.816 +    return 0;
  54.817 +
  54.818 + special_insn:
  54.819 +    if ( twobyte )
  54.820 +        goto twobyte_special_insn;
  54.821 +    if ( rep_prefix )
  54.822 +    {
  54.823 +        if ( _regs.ecx == 0 )
  54.824 +        {
  54.825 +            regs->eip = _regs.eip;
  54.826 +            goto done;
  54.827 +        }
  54.828 +        _regs.ecx--;
  54.829 +        _regs.eip = regs->eip;
  54.830 +    }
  54.831 +    switch ( b )
  54.832 +    {
  54.833 +    case 0xa4 ... 0xa5: /* movs */
  54.834 +        dst.type  = OP_MEM;
  54.835 +        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.836 +        if ( _regs.error_code & 2 )
  54.837 +        {
  54.838 +            /* Write fault: destination is special memory. */
  54.839 +            dst.ptr = (unsigned long *)cr2;
  54.840 +            if ( ops->read_std(_regs.esi - _regs.edi + cr2, 
  54.841 +                               &dst.val, dst.bytes) )
  54.842 +                goto done;
  54.843 +        }
  54.844 +        else
  54.845 +        {
  54.846 +            /* Read fault: source is special memory. */
  54.847 +            dst.ptr = (unsigned long *)(_regs.edi - _regs.esi + cr2);
  54.848 +            if ( ops->read_emulated(cr2, &dst.val, dst.bytes) )
  54.849 +                goto done;
  54.850 +        }
  54.851 +        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  54.852 +        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  54.853 +        break;
  54.854 +    case 0xa6 ... 0xa7: /* cmps */
  54.855 +        DPRINTF("Urk! I don't handle CMPS.\n");
  54.856 +        goto cannot_emulate;
  54.857 +    case 0xaa ... 0xab: /* stos */
  54.858 +        dst.type  = OP_MEM;
  54.859 +        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.860 +        dst.ptr   = (unsigned long *)cr2;
  54.861 +        dst.val   = _regs.eax;
  54.862 +        _regs.edi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  54.863 +        break;
  54.864 +    case 0xac ... 0xad: /* lods */
  54.865 +        dst.type  = OP_REG;
  54.866 +        dst.bytes = (d & ByteOp) ? 1 : op_bytes;
  54.867 +        dst.ptr   = (unsigned long *)&_regs.eax;
  54.868 +        if ( ops->read_emulated(cr2, &dst.val, dst.bytes) )
  54.869 +            goto done;
  54.870 +        _regs.esi += (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes;
  54.871 +        break;
  54.872 +    case 0xae ... 0xaf: /* scas */
  54.873 +        DPRINTF("Urk! I don't handle SCAS.\n");
  54.874 +        goto cannot_emulate;
  54.875 +    }
  54.876 +    goto writeback;
  54.877 +
  54.878 + twobyte_insn:
  54.879 +    switch ( b )
  54.880 +    {
  54.881 +    case 0x40 ... 0x4f: /* cmov */
  54.882 +        dst.val = dst.orig_val = src.val;
  54.883 +        d &= ~Mov; /* default to no move */
  54.884 +        /* First, assume we're decoding an even cmov opcode (lsb == 0). */
  54.885 +        switch ( (b & 15) >> 1 )
  54.886 +        {
  54.887 +        case 0: /* cmovo */
  54.888 +            d |= (_regs.eflags & EFLG_OF) ? Mov : 0;
  54.889 +            break;
  54.890 +        case 1: /* cmovb/cmovc/cmovnae */
  54.891 +            d |= (_regs.eflags & EFLG_CF) ? Mov : 0;
  54.892 +            break;
  54.893 +        case 2: /* cmovz/cmove */
  54.894 +            d |= (_regs.eflags & EFLG_ZF) ? Mov : 0;
  54.895 +            break;
  54.896 +        case 3: /* cmovbe/cmovna */
  54.897 +            d |= (_regs.eflags & (EFLG_CF|EFLG_ZF)) ? Mov : 0;
  54.898 +            break;
  54.899 +        case 4: /* cmovs */
  54.900 +            d |= (_regs.eflags & EFLG_SF) ? Mov : 0;
  54.901 +            break;
  54.902 +        case 5: /* cmovp/cmovpe */
  54.903 +            d |= (_regs.eflags & EFLG_PF) ? Mov : 0;
  54.904 +            break;
  54.905 +        case 7: /* cmovle/cmovng */
  54.906 +            d |= (_regs.eflags & EFLG_ZF) ? Mov : 0;
  54.907 +            /* fall through */
  54.908 +        case 6: /* cmovl/cmovnge */
  54.909 +            d |= (!(_regs.eflags & EFLG_SF) != !(_regs.eflags & EFLG_OF)) ?
  54.910 +                Mov : 0;
  54.911 +            break;
  54.912 +        }
  54.913 +        /* Odd cmov opcodes (lsb == 1) have inverted sense. */
  54.914 +        d ^= (b & 1) ? Mov : 0;
  54.915 +        break;
  54.916 +    case 0xb0 ... 0xb1: /* cmpxchg */
  54.917 +        /* Save real source value, then compare EAX against destination. */
  54.918 +        src.orig_val = src.val;
  54.919 +        src.val = _regs.eax;
  54.920 +        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
  54.921 +        /* Always write back. The question is: where to? */
  54.922 +        d |= Mov;
  54.923 +        if ( _regs.eflags & EFLG_ZF )
  54.924 +        {
  54.925 +            /* Success: write back to memory. */
  54.926 +            dst.val = src.orig_val;
  54.927 +        }
  54.928 +        else
  54.929 +        {
  54.930 +            /* Failure: write the value we saw to EAX. */
  54.931 +            dst.type = OP_REG;
  54.932 +            dst.ptr  = (unsigned long *)&_regs.eax;
  54.933 +        }
  54.934 +        break;
  54.935 +    case 0xa3: bt: /* bt */
  54.936 +        src.val &= (1UL << (1 << dst.bytes)) - 1; /* only subword offset */
  54.937 +        emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
  54.938 +        break;
  54.939 +    case 0xb3: btr: /* btr */
  54.940 +        src.val &= (1UL << (1 << dst.bytes)) - 1; /* only subword offset */
  54.941 +        emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
  54.942 +        break;
  54.943 +    case 0xab: bts: /* bts */
  54.944 +        src.val &= (1UL << (1 << dst.bytes)) - 1; /* only subword offset */
  54.945 +        emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
  54.946 +        break;
  54.947 +    case 0xbb: btc: /* btc */
  54.948 +        src.val &= (1UL << (1 << dst.bytes)) - 1; /* only subword offset */
  54.949 +        emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
  54.950 +        break;
  54.951 +    case 0xba: /* Grp8 */
  54.952 +        switch ( modrm_reg >> 1 )
  54.953 +        {
  54.954 +        case 0: goto bt;
  54.955 +        case 1: goto bts;
  54.956 +        case 2: goto btr;
  54.957 +        case 3: goto btc;
  54.958 +        }
  54.959 +        break;
  54.960 +    }
  54.961 +    goto writeback;
  54.962 +
  54.963 + twobyte_special_insn:
  54.964 +    /* Only prefetch instructions get here, so nothing to do. */
  54.965 +    dst.orig_val = dst.val; /* disable writeback */
  54.966 +    goto writeback;
  54.967 +
  54.968 + cannot_emulate:
  54.969 +    DPRINTF("Cannot emulate %02x\n", b);
  54.970 +    return -1;
  54.971 +}
    55.1 --- a/xen/common/domain.c	Tue Mar 08 22:57:09 2005 +0000
    55.2 +++ b/xen/common/domain.c	Wed Mar 09 20:19:23 2005 +0000
    55.3 @@ -283,9 +283,6 @@ int final_setup_guest(struct domain *p, 
    55.4      if ( (rc = arch_final_setup_guest(p->exec_domain[0],c)) != 0 )
    55.5          goto out;
    55.6  
    55.7 -    /* Set up the shared info structure. */
    55.8 -    update_dom_time(p);
    55.9 -
   55.10      set_bit(DF_CONSTRUCTED, &p->d_flags);
   55.11  
   55.12   out:    
   55.13 @@ -339,9 +336,6 @@ long do_boot_vcpu(unsigned long vcpu, fu
   55.14          goto out;
   55.15      }
   55.16  
   55.17 -    /* Set up the shared info structure. */
   55.18 -    update_dom_time(d);
   55.19 -
   55.20      /* domain_unpause_by_systemcontroller */
   55.21      if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) )
   55.22          domain_wake(ed);
    56.1 --- a/xen/common/grant_table.c	Tue Mar 08 22:57:09 2005 +0000
    56.2 +++ b/xen/common/grant_table.c	Wed Mar 09 20:19:23 2005 +0000
    56.3 @@ -4,6 +4,7 @@
    56.4   * Mechanism for granting foreign access to page frames, and receiving
    56.5   * page-ownership transfers.
    56.6   * 
    56.7 + * Copyright (c) 2005 Christopher Clark
    56.8   * Copyright (c) 2004 K A Fraser
    56.9   * 
   56.10   * This program is free software; you can redistribute it and/or modify
   56.11 @@ -21,8 +22,12 @@
   56.12   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   56.13   */
   56.14  
   56.15 +#define GRANT_DEBUG 1
   56.16 +
   56.17  #include <xen/config.h>
   56.18  #include <xen/sched.h>
   56.19 +#include <asm-x86/mm.h>
   56.20 +#include <asm-x86/shadow.h>
   56.21  
   56.22  #define PIN_FAIL(_rc, _f, _a...)   \
   56.23      do {                           \
   56.24 @@ -50,19 +55,24 @@ put_maptrack_handle(
   56.25      t->maptrack_head = handle;
   56.26  }
   56.27  
   56.28 -static void
   56.29 +static int
   56.30  __gnttab_map_grant_ref(
   56.31 -    gnttab_map_grant_ref_t *uop)
   56.32 +    gnttab_map_grant_ref_t *uop,
   56.33 +    unsigned long *va)
   56.34  {
   56.35 -    domid_t        dom, sdom;
   56.36 -    grant_ref_t    ref;
   56.37 -    struct domain *ld, *rd;
   56.38 -    u16            flags, sflags;
   56.39 -    int            handle;
   56.40 +    domid_t               dom, sdom;
   56.41 +    grant_ref_t           ref;
   56.42 +    struct domain        *ld, *rd;
   56.43 +    struct exec_domain   *led;
   56.44 +    u16                   flags, sflags;
   56.45 +    int                   handle;
   56.46      active_grant_entry_t *act;
   56.47 -    grant_entry_t *sha;
   56.48 -    s16            rc = 0;
   56.49 -    unsigned long  frame;
   56.50 +    grant_entry_t        *sha;
   56.51 +    s16                   rc = 0;
   56.52 +    unsigned long         frame = 0, host_virt_addr;
   56.53 +
   56.54 +    /* Returns 0 if TLB flush / invalidate required by caller.
   56.55 +     * va will indicate the address to be invalidated. */
   56.56  
   56.57      /*
   56.58       * We bound the number of times we retry CMPXCHG on memory locations that
   56.59 @@ -74,23 +84,33 @@ static void
   56.60       */
   56.61      int            retries = 0;
   56.62  
   56.63 -    ld = current->domain;
   56.64 +    led = current;
   56.65 +    ld = led->domain;
   56.66  
   56.67      /* Bitwise-OR avoids short-circuiting which screws control flow. */
   56.68      if ( unlikely(__get_user(dom, &uop->dom) |
   56.69                    __get_user(ref, &uop->ref) |
   56.70 +                  __get_user(host_virt_addr, &uop->host_virt_addr) |
   56.71                    __get_user(flags, &uop->flags)) )
   56.72      {
   56.73          DPRINTK("Fault while reading gnttab_map_grant_ref_t.\n");
   56.74 -        return; /* don't set status */
   56.75 +        return -EFAULT; /* don't set status */
   56.76      }
   56.77  
   56.78 -    if ( unlikely(ref >= NR_GRANT_ENTRIES) || 
   56.79 +    if ( ((host_virt_addr != 0) || (flags & GNTMAP_host_map) ) &&
   56.80 +         unlikely(!__addr_ok(host_virt_addr)))
   56.81 +    {
   56.82 +        DPRINTK("Bad virtual address (%x) or flags (%x).\n", host_virt_addr, flags);
   56.83 +        (void)__put_user(GNTST_bad_virt_addr, &uop->handle);
   56.84 +        return GNTST_bad_gntref;
   56.85 +    }
   56.86 +
   56.87 +    if ( unlikely(ref >= NR_GRANT_ENTRIES) ||
   56.88           unlikely((flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
   56.89      {
   56.90          DPRINTK("Bad ref (%d) or flags (%x).\n", ref, flags);
   56.91          (void)__put_user(GNTST_bad_gntref, &uop->handle);
   56.92 -        return;
   56.93 +        return GNTST_bad_gntref;
   56.94      }
   56.95  
   56.96      if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
   56.97 @@ -100,7 +120,7 @@ static void
   56.98              put_domain(rd);
   56.99          DPRINTK("Could not find domain %d\n", dom);
  56.100          (void)__put_user(GNTST_bad_domain, &uop->handle);
  56.101 -        return;
  56.102 +        return GNTST_bad_domain;
  56.103      }
  56.104  
  56.105      if ( unlikely((handle = get_maptrack_handle(ld->grant_table)) == -1) )
  56.106 @@ -108,14 +128,16 @@ static void
  56.107          put_domain(rd);
  56.108          DPRINTK("No more map handles available\n");
  56.109          (void)__put_user(GNTST_no_device_space, &uop->handle);
  56.110 -        return;
  56.111 +        return GNTST_no_device_space;
  56.112      }
  56.113 +    DPRINTK("Mapping grant ref (%hu) for domain (%hu) with flags (%x)\n",
  56.114 +            ref, dom, flags);
  56.115  
  56.116      act = &rd->grant_table->active[ref];
  56.117      sha = &rd->grant_table->shared[ref];
  56.118  
  56.119      spin_lock(&rd->grant_table->lock);
  56.120 -    
  56.121 +
  56.122      if ( act->pin == 0 )
  56.123      {
  56.124          /* CASE 1: Activating a previously inactive entry. */
  56.125 @@ -148,7 +170,7 @@ static void
  56.126  
  56.127              /* NB. prev_scombo is updated in place to seen value. */
  56.128              if ( unlikely(cmpxchg_user((u32 *)&sha->flags,
  56.129 -                                       prev_scombo, 
  56.130 +                                       prev_scombo,
  56.131                                         new_scombo)) )
  56.132                  PIN_FAIL(GNTST_general_error,
  56.133                           "Fault while modifying shared flags and domid.\n");
  56.134 @@ -168,16 +190,18 @@ static void
  56.135          }
  56.136  
  56.137          /* rmb(); */ /* not on x86 */
  56.138 -        frame = sha->frame;
  56.139 -        if ( unlikely(!pfn_is_ram(frame)) || 
  56.140 -             unlikely(!((flags & GNTMAP_readonly) ? 
  56.141 -                        get_page(&frame_table[frame], rd) : 
  56.142 -                        get_page_and_type(&frame_table[frame], rd, 
  56.143 +
  56.144 +        frame = __translate_gpfn_to_mfn(rd, sha->frame);
  56.145 +
  56.146 +        if ( unlikely(!pfn_is_ram(frame)) ||
  56.147 +             unlikely(!((flags & GNTMAP_readonly) ?
  56.148 +                        get_page(&frame_table[frame], rd) :
  56.149 +                        get_page_and_type(&frame_table[frame], rd,
  56.150                                            PGT_writable_page))) )
  56.151          {
  56.152              clear_bit(_GTF_writing, &sha->flags);
  56.153              clear_bit(_GTF_reading, &sha->flags);
  56.154 -            PIN_FAIL(GNTST_general_error, 
  56.155 +            PIN_FAIL(GNTST_general_error,
  56.156                       "Could not pin the granted frame!\n");
  56.157          }
  56.158  
  56.159 @@ -230,7 +254,9 @@ static void
  56.160                  sflags = prev_sflags;
  56.161              }
  56.162  
  56.163 -            if ( unlikely(!get_page_type(&frame_table[act->frame],
  56.164 +            frame = act->frame;
  56.165 +
  56.166 +            if ( unlikely(!get_page_type(&frame_table[frame],
  56.167                                           PGT_writable_page)) )
  56.168              {
  56.169                  clear_bit(_GTF_writing, &sha->flags);
  56.170 @@ -251,34 +277,91 @@ static void
  56.171      ld->grant_table->maptrack[handle].ref_and_flags =
  56.172          (ref << MAPTRACK_REF_SHIFT) | (flags & MAPTRACK_GNTMAP_MASK);
  56.173  
  56.174 +    if ( (host_virt_addr != 0) && (flags & GNTMAP_host_map) )
  56.175 +    {
  56.176 +        /* Write update into the pagetable
  56.177 +         */
  56.178 +        if ( 0 > (rc = update_grant_va_mapping( host_virt_addr,
  56.179 +                                (frame << PAGE_SHIFT) | _PAGE_PRESENT  |
  56.180 +                                                        _PAGE_ACCESSED |
  56.181 +                                                        _PAGE_DIRTY    |
  56.182 +                       ((flags & GNTMAP_readonly) ? 0 : _PAGE_RW),
  56.183 +                       ld, led )) )
  56.184 +        {
  56.185 +            /* Abort. */
  56.186 +            act->pin -= (flags & GNTMAP_readonly) ?
  56.187 +                GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  56.188 +
  56.189 +            if ( flags & GNTMAP_readonly )
  56.190 +                act->pin -= GNTPIN_hstr_inc;
  56.191 +            else
  56.192 +            {
  56.193 +                act->pin -= GNTPIN_hstw_inc;
  56.194 +                if ( (act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) == 0 )
  56.195 +                    put_page_type(&frame_table[frame]);
  56.196 +
  56.197 +                if ( act->pin == 0 )
  56.198 +                    put_page(&frame_table[frame]);
  56.199 +            }
  56.200 +            goto fail;
  56.201 +        }
  56.202 +
  56.203 +        if ( rc == GNTUPDVA_prev_ro )
  56.204 +            act->pin -= GNTPIN_hstr_inc;
  56.205 +
  56.206 +        if ( rc == GNTUPDVA_prev_rw ) 
  56.207 +        {
  56.208 +            act->pin -= GNTPIN_hstw_inc;
  56.209 +            put_page_type(&frame_table[frame]);
  56.210 +        }
  56.211 +        rc = 0;
  56.212 +        *va = host_virt_addr;
  56.213 +
  56.214 +        /* IMPORTANT: must flush / invalidate entry in TLB.
  56.215 +         * This is done in the outer gnttab_map_grant_ref when return 0.
  56.216 +         */
  56.217 +    }
  56.218 +
  56.219 +    if ( flags & GNTMAP_device_map )
  56.220 +        (void)__put_user(frame,  &uop->dev_bus_addr);
  56.221 +
  56.222      /* Unchecked and unconditional. */
  56.223      (void)__put_user(handle, &uop->handle);
  56.224 -    (void)__put_user(act->frame,  &uop->dev_bus_addr);
  56.225  
  56.226      spin_unlock(&rd->grant_table->lock);
  56.227      put_domain(rd);
  56.228 -    return;
  56.229 +    return 0;
  56.230  
  56.231   fail:
  56.232      (void)__put_user(rc, &uop->handle);
  56.233      spin_unlock(&rd->grant_table->lock);
  56.234      put_domain(rd);
  56.235 -    put_maptrack_handle(ld->grant_table, handle);
  56.236 +    put_maptrack_handle(ld->grant_table, handle); //cwc22: check this
  56.237 +    return rc;
  56.238  }
  56.239  
  56.240  static long
  56.241  gnttab_map_grant_ref(
  56.242      gnttab_map_grant_ref_t *uop, unsigned int count)
  56.243  {
  56.244 -    int i;
  56.245 +    int i, flush = 0;
  56.246 +    unsigned long va;
  56.247 +
  56.248      for ( i = 0; i < count; i++ )
  56.249 -        __gnttab_map_grant_ref(&uop[i]);
  56.250 +        if ( __gnttab_map_grant_ref(&uop[i], &va) == 0)
  56.251 +            flush++;
  56.252 +
  56.253 +    if ( flush == 1 )
  56.254 +        __flush_tlb_one(va);
  56.255 +    else if ( flush )
  56.256 +        local_flush_tlb();
  56.257      return 0;
  56.258  }
  56.259  
  56.260 -static void
  56.261 +static int
  56.262  __gnttab_unmap_grant_ref(
  56.263 -    gnttab_unmap_grant_ref_t *uop)
  56.264 +    gnttab_unmap_grant_ref_t *uop,
  56.265 +    unsigned long *va)
  56.266  {
  56.267      domid_t        dom;
  56.268      grant_ref_t    ref;
  56.269 @@ -288,7 +371,7 @@ static void
  56.270      active_grant_entry_t *act;
  56.271      grant_entry_t *sha;
  56.272      grant_mapping_t *map;
  56.273 -    s16            rc = 0;
  56.274 +    s16            rc = -EFAULT;
  56.275      unsigned long  frame, virt;
  56.276  
  56.277      ld = current->domain;
  56.278 @@ -299,7 +382,7 @@ static void
  56.279                    __get_user(handle, &uop->handle)) )
  56.280      {
  56.281          DPRINTK("Fault while reading gnttab_unmap_grant_ref_t.\n");
  56.282 -        return; /* don't set status */
  56.283 +        return -EFAULT; /* don't set status */
  56.284      }
  56.285  
  56.286      map = &ld->grant_table->maptrack[handle];
  56.287 @@ -309,7 +392,7 @@ static void
  56.288      {
  56.289          DPRINTK("Bad handle (%d).\n", handle);
  56.290          (void)__put_user(GNTST_bad_handle, &uop->status);
  56.291 -        return;
  56.292 +        return GNTST_bad_handle;
  56.293      }
  56.294  
  56.295      dom = map->domid;
  56.296 @@ -322,8 +405,10 @@ static void
  56.297              put_domain(rd);
  56.298          DPRINTK("Could not find domain %d\n", dom);
  56.299          (void)__put_user(GNTST_bad_domain, &uop->status);
  56.300 -        return;
  56.301 +        return GNTST_bad_domain;
  56.302      }
  56.303 +    DPRINTK("Unmapping grant ref (%hu) for domain (%hu) with handle (%hu)\n",
  56.304 +            ref, dom, handle);
  56.305  
  56.306      act = &rd->grant_table->active[ref];
  56.307      sha = &rd->grant_table->shared[ref];
  56.308 @@ -344,12 +429,53 @@ static void
  56.309          frame = act->frame;
  56.310      }
  56.311  
  56.312 -    if ( (virt != 0) && (map->ref_and_flags & GNTMAP_host_map) )
  56.313 +    if ( (virt != 0) &&
  56.314 +         (map->ref_and_flags & GNTMAP_host_map) &&
  56.315 +         ((act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask)) > 0))
  56.316      {
  56.317 -        act->pin -= (map->ref_and_flags & GNTMAP_readonly) ?
  56.318 -            GNTPIN_hstr_inc : GNTPIN_hstw_inc;
  56.319 +        l1_pgentry_t   *pl1e;
  56.320 +        unsigned long   _ol1e;
  56.321 +
  56.322 +        pl1e = &linear_pg_table[l1_linear_offset(virt)];
  56.323 +                                                                                            
  56.324 +        if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) )
  56.325 +        {
  56.326 +            DPRINTK("Could not find PTE entry for address %x\n", virt);
  56.327 +            rc = -EINVAL;
  56.328 +            goto fail;
  56.329 +        }
  56.330 +
  56.331 +        /* check that the virtual address supplied is actually
  56.332 +         * mapped to act->frame.
  56.333 +         */
  56.334 +        if ( unlikely((_ol1e >> PAGE_SHIFT) != frame ))
  56.335 +        {
  56.336 +            DPRINTK("PTE entry %x for address %x doesn't match frame %x\n",
  56.337 +                    _ol1e, virt, frame);
  56.338 +            rc = -EINVAL;
  56.339 +            goto fail;
  56.340 +        }
  56.341 +
  56.342 +        /* This code _requires_ that the act->pin bits are updated
  56.343 +         * if a mapping is ever switched between RO and RW.
  56.344 +         */
  56.345 +        act->pin -= ( _ol1e & _PAGE_RW ) ? GNTPIN_hstw_inc
  56.346 +                                         : GNTPIN_hstr_inc;
  56.347 +
  56.348 +        /* Delete pagetable entry
  56.349 +         */
  56.350 +        if ( unlikely(__put_user(0, (unsigned long *)pl1e)))
  56.351 +        {
  56.352 +            DPRINTK("Cannot delete PTE entry at %x for virtual address %x\n",
  56.353 +                    pl1e, virt);
  56.354 +            rc = -EINVAL;
  56.355 +            goto fail;
  56.356 +        }
  56.357 +        rc = 0;
  56.358 +        *va = virt;
  56.359      }
  56.360  
  56.361 +    /* If the last writable mapping has been removed, put_page_type */
  56.362      if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
  56.363                !(map->ref_and_flags & GNTMAP_readonly) )
  56.364      {
  56.365 @@ -367,15 +493,24 @@ static void
  56.366      (void)__put_user(rc, &uop->status);
  56.367      spin_unlock(&rd->grant_table->lock);
  56.368      put_domain(rd);
  56.369 +    return rc;
  56.370  }
  56.371  
  56.372  static long
  56.373  gnttab_unmap_grant_ref(
  56.374      gnttab_unmap_grant_ref_t *uop, unsigned int count)
  56.375  {
  56.376 -    int i;
  56.377 +    int i, flush = 0;
  56.378 +    unsigned long va = 0;
  56.379 +
  56.380      for ( i = 0; i < count; i++ )
  56.381 -        __gnttab_unmap_grant_ref(&uop[i]);
  56.382 +        if ( __gnttab_unmap_grant_ref(&uop[i], &va) == 0)
  56.383 +            flush++;
  56.384 +
  56.385 +    if ( flush == 1 )
  56.386 +        __flush_tlb_one(va);
  56.387 +    else if ( flush )
  56.388 +        local_flush_tlb();
  56.389      return 0;
  56.390  }
  56.391  
  56.392 @@ -423,7 +558,7 @@ gnttab_setup_table(
  56.393      {
  56.394          ASSERT(d->grant_table != NULL);
  56.395          (void)put_user(GNTST_okay, &uop->status);
  56.396 -        (void)put_user(virt_to_phys(d->grant_table) >> PAGE_SHIFT,
  56.397 +        (void)put_user(virt_to_phys(d->grant_table->shared) >> PAGE_SHIFT,
  56.398                         &uop->frame_list[0]);
  56.399      }
  56.400  
  56.401 @@ -431,15 +566,98 @@ gnttab_setup_table(
  56.402      return 0;
  56.403  }
  56.404  
  56.405 +#ifdef GRANT_DEBUG
  56.406 +static int
  56.407 +gnttab_dump_table(gnttab_dump_table_t *uop)
  56.408 +{
  56.409 +    grant_table_t        *gt;
  56.410 +    gnttab_dump_table_t   op;
  56.411 +    struct domain        *d;
  56.412 +    u32                   shared_mfn;
  56.413 +    active_grant_entry_t *act;
  56.414 +    grant_entry_t         sha_copy;
  56.415 +    grant_mapping_t      *maptrack;
  56.416 +    int                   i;
  56.417 +
  56.418 +
  56.419 +    if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
  56.420 +    {
  56.421 +        DPRINTK("Fault while reading gnttab_dump_table_t.\n");
  56.422 +        return -EFAULT;
  56.423 +    }
  56.424 +
  56.425 +    if ( op.dom == DOMID_SELF )
  56.426 +    {
  56.427 +        op.dom = current->domain->id;
  56.428 +    }
  56.429 +
  56.430 +    if ( unlikely((d = find_domain_by_id(op.dom)) == NULL) )
  56.431 +    {
  56.432 +        DPRINTK("Bad domid %d.\n", op.dom);
  56.433 +        (void)put_user(GNTST_bad_domain, &uop->status);
  56.434 +        return 0;
  56.435 +    }
  56.436 +
  56.437 +    ASSERT(d->grant_table != NULL);
  56.438 +    gt = d->grant_table;
  56.439 +    (void)put_user(GNTST_okay, &uop->status);
  56.440 +
  56.441 +    shared_mfn = virt_to_phys(d->grant_table->shared);
  56.442 +
  56.443 +    DPRINTK("Grant table for dom (%hu) MFN (%x)\n",
  56.444 +            op.dom, shared_mfn);
  56.445 +
  56.446 +    spin_lock(&gt->lock);
  56.447 +
  56.448 +    ASSERT(d->grant_table->active != NULL);
  56.449 +    ASSERT(d->grant_table->shared != NULL);
  56.450 +
  56.451 +    for ( i = 0; i < NR_GRANT_ENTRIES; i++ )
  56.452 +    {
  56.453 +        act      = &gt->active[i];
  56.454 +        sha_copy =  gt->shared[i];
  56.455 +
  56.456 +        if ( act->pin || act->domid || act->frame ||
  56.457 +             sha_copy.flags || sha_copy.domid || sha_copy.frame )
  56.458 +        {
  56.459 +            DPRINTK("Grant: dom (%hu) ACTIVE (%d) pin:(%x) dom:(%hu) frame:(%u)\n",
  56.460 +                    op.dom, i, act->pin, act->domid, act->frame);
  56.461 +            DPRINTK("Grant: dom (%hu) SHARED (%d) flags:(%hx) dom:(%hu) frame:(%u)\n",
  56.462 +                    op.dom, i, sha_copy.flags, sha_copy.domid, sha_copy.frame);
  56.463 +
  56.464 +        }
  56.465 +
  56.466 +    }
  56.467 +
  56.468 +    ASSERT(d->grant_table->maptrack != NULL);
  56.469 +
  56.470 +    for ( i = 0; i < NR_MAPTRACK_ENTRIES; i++ )
  56.471 +    {
  56.472 +        maptrack = &gt->maptrack[i];
  56.473 +
  56.474 +        if ( maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK )
  56.475 +        {
  56.476 +            DPRINTK("Grant: dom (%hu) MAP (%d) ref:(%hu) flags:(%x) dom:(%hu)\n",
  56.477 +                    op.dom, i,
  56.478 +                    maptrack->ref_and_flags >> MAPTRACK_REF_SHIFT,
  56.479 +                    maptrack->ref_and_flags & MAPTRACK_GNTMAP_MASK,
  56.480 +                    maptrack->domid);
  56.481 +        }
  56.482 +    }
  56.483 +
  56.484 +    spin_unlock(&gt->lock);
  56.485 +
  56.486 +    put_domain(d);
  56.487 +    return 0;
  56.488 +}
  56.489 +#endif
  56.490 +
  56.491  long 
  56.492  do_grant_table_op(
  56.493      unsigned int cmd, void *uop, unsigned int count)
  56.494  {
  56.495      long rc;
  56.496  
  56.497 -    /* XXX stubbed out XXX */
  56.498 -    return -ENOSYS;
  56.499 -
  56.500      if ( count > 512 )
  56.501          return -EINVAL;
  56.502  
  56.503 @@ -462,6 +680,11 @@ do_grant_table_op(
  56.504      case GNTTABOP_setup_table:
  56.505          rc = gnttab_setup_table((gnttab_setup_table_t *)uop, count);
  56.506          break;
  56.507 +#ifdef GRANT_DEBUG
  56.508 +    case GNTTABOP_dump_table:
  56.509 +        rc = gnttab_dump_table((gnttab_dump_table_t *)uop);
  56.510 +        break;
  56.511 +#endif
  56.512      default:
  56.513          rc = -ENOSYS;
  56.514          break;
  56.515 @@ -476,6 +699,11 @@ int
  56.516  gnttab_check_unmap(
  56.517      struct domain *rd, struct domain *ld, unsigned long frame, int readonly)
  56.518  {
  56.519 +    /* TODO: beat the caller around the head with a brick.
  56.520 +     *       have to walk the grant tables to find this thing.
  56.521 +     */
  56.522 +    /*DPRINTK("gnttab_check_unmap remote dom(%d) local dom(%d) frame (%x) flags(%x).\n",
  56.523 +            rd->id, ld->id, frame, readonly);*/
  56.524      return 0;
  56.525  }
  56.526  
  56.527 @@ -552,8 +780,17 @@ gnttab_prepare_for_transfer(
  56.528  
  56.529  void 
  56.530  gnttab_notify_transfer(
  56.531 -    struct domain *rd, grant_ref_t ref, unsigned long frame)
  56.532 +    struct domain *rd, grant_ref_t ref, unsigned long sframe)
  56.533  {
  56.534 +    unsigned long frame;
  56.535 +
  56.536 +    /* cwc22
  56.537 +     * TODO: this requires that the machine_to_phys_mapping
  56.538 +     *       has already been updated, so the accept_transfer hypercall
  56.539 +     *       must do this.
  56.540 +     */
  56.541 +    frame = __mfn_to_gpfn(rd, sframe);
  56.542 +
  56.543      wmb(); /* Ensure that the reassignment is globally visible. */
  56.544      rd->grant_table->shared[ref].frame = frame;
  56.545  }
  56.546 @@ -608,6 +845,74 @@ grant_table_create(
  56.547  }
  56.548  
  56.549  void
  56.550 +gnttab_release_all_mappings(grant_table_t *gt)
  56.551 +{
  56.552 +    grant_mapping_t        *map;
  56.553 +    domid_t                 dom;
  56.554 +    grant_ref_t             ref;
  56.555 +    u16                     handle;
  56.556 +    u32                     pincount;
  56.557 +    struct domain          *ld, *rd;
  56.558 +    unsigned long           frame;
  56.559 +    active_grant_entry_t   *act;
  56.560 +    grant_entry_t          *sha;
  56.561 +
  56.562 +    ld = current->domain;
  56.563 +
  56.564 +    for ( handle = 0; handle < NR_MAPTRACK_ENTRIES; handle++ )
  56.565 +    {
  56.566 +        map = &gt->maptrack[handle];
  56.567 +                                                                                        
  56.568 +        if ( map->ref_and_flags & MAPTRACK_GNTMAP_MASK )
  56.569 +        {
  56.570 +            dom = map->domid;
  56.571 +            ref = map->ref_and_flags >> MAPTRACK_REF_SHIFT;
  56.572 +
  56.573 +            DPRINTK("Grant release (%hu) ref:(%hu) flags:(%x) dom:(%hu)\n",
  56.574 +                    handle, ref,
  56.575 +                    map->ref_and_flags & MAPTRACK_GNTMAP_MASK, dom);
  56.576 +
  56.577 +            if ( unlikely((rd = find_domain_by_id(dom)) == NULL) ||
  56.578 +                 unlikely(ld == rd) )
  56.579 +            {
  56.580 +                if ( rd != NULL )
  56.581 +                    put_domain(rd);
  56.582 +
  56.583 +                printk(KERN_WARNING "Grant release: Could not find domain %d\n", dom);
  56.584 +                continue;
  56.585 +            }
  56.586 +
  56.587 +            act = &rd->grant_table->active[ref];
  56.588 +            sha = &rd->grant_table->shared[ref];
  56.589 +
  56.590 +            spin_lock(&rd->grant_table->lock);
  56.591 +
  56.592 +            frame = act->frame;
  56.593 +
  56.594 +            pincount = ((act->pin & GNTPIN_hstw_mask) >> GNTPIN_hstw_shift) +
  56.595 +                       ((act->pin & GNTPIN_devw_mask) >> GNTPIN_devw_shift);
  56.596 +
  56.597 +            if ( pincount > 0 )
  56.598 +                put_page_type(&frame_table[frame]);
  56.599 +
  56.600 +            if (act->pin)
  56.601 +                put_page(&frame_table[frame]);
  56.602 +
  56.603 +            act->pin = 0;
  56.604 +
  56.605 +            clear_bit(_GTF_reading, &sha->flags);
  56.606 +            clear_bit(_GTF_writing, &sha->flags);
  56.607 +
  56.608 +            spin_unlock(&rd->grant_table->lock);
  56.609 +
  56.610 +            map->ref_and_flags = 0;
  56.611 +
  56.612 +            put_domain(rd);
  56.613 +        }
  56.614 +    }
  56.615 +}
  56.616 +
  56.617 +void
  56.618  grant_table_destroy(
  56.619      struct domain *d)
  56.620  {
  56.621 @@ -615,6 +920,9 @@ grant_table_destroy(
  56.622  
  56.623      if ( (t = d->grant_table) != NULL )
  56.624      {
  56.625 +        if ( t->maptrack != NULL )
  56.626 +            gnttab_release_all_mappings(t);
  56.627 +
  56.628          /* Free memory relating to this grant table. */
  56.629          d->grant_table = NULL;
  56.630          free_xenheap_page((unsigned long)t->shared);
  56.631 @@ -629,6 +937,7 @@ grant_table_init(
  56.632      void)
  56.633  {
  56.634      /* Nothing. */
  56.635 +    DPRINTK("Grant table init\n");
  56.636  }
  56.637  
  56.638  /*
    57.1 --- a/xen/common/page_alloc.c	Tue Mar 08 22:57:09 2005 +0000
    57.2 +++ b/xen/common/page_alloc.c	Wed Mar 09 20:19:23 2005 +0000
    57.3 @@ -393,6 +393,13 @@ void init_xenheap_pages(unsigned long ps
    57.4  
    57.5      memguard_guard_range(__va(ps), pe - ps);
    57.6  
    57.7 +    /*
    57.8 +     * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
    57.9 +     * prevent merging of power-of-two blocks across the zone boundary.
   57.10 +     */
   57.11 +    if ( !IS_XEN_HEAP_FRAME(phys_to_page(pe)) )
   57.12 +        pe -= PAGE_SIZE;
   57.13 +
   57.14      local_irq_save(flags);
   57.15      init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
   57.16      local_irq_restore(flags);
    58.1 --- a/xen/common/physdev.c	Tue Mar 08 22:57:09 2005 +0000
    58.2 +++ b/xen/common/physdev.c	Wed Mar 09 20:19:23 2005 +0000
    58.3 @@ -85,31 +85,93 @@ static phys_dev_t *find_pdev(struct doma
    58.4  }
    58.5  
    58.6  /* Add a device to a per-domain device-access list. */
    58.7 -static void add_dev_to_task(struct domain *p, 
    58.8 -                            struct pci_dev *dev, int acc)
    58.9 +static int add_dev_to_task(struct domain *p, struct pci_dev *dev, 
   58.10 +                           int acc)
   58.11  {
   58.12 -    phys_dev_t *pdev;
   58.13 +    phys_dev_t *physdev;
   58.14      
   58.15 -    if ( (pdev = find_pdev(p, dev)) )
   58.16 +    if ( (physdev = xmalloc(phys_dev_t)) == NULL )
   58.17      {
   58.18 -        /* Sevice already on list: update access permissions. */
   58.19 -        pdev->flags = acc;
   58.20 -        return;
   58.21 +        INFO("Error allocating pdev structure.\n");
   58.22 +        return -ENOMEM;
   58.23 +    }
   58.24 +    
   58.25 +    physdev->dev = dev;
   58.26 +    physdev->flags = acc;
   58.27 +    physdev->state = 0;
   58.28 +    list_add(&physdev->node, &p->pcidev_list);
   58.29 +
   58.30 +    if ( acc == ACC_WRITE )
   58.31 +        physdev->owner = p;
   58.32 +
   58.33 +    return 0;
   58.34 +}
   58.35 +
   58.36 +/* Remove a device from a per-domain device-access list. */
   58.37 +static void remove_dev_from_task(struct domain *p, struct pci_dev *dev)
   58.38 +{
   58.39 +    phys_dev_t *physdev = find_pdev(p, dev);
   58.40 +
   58.41 +    if ( physdev == NULL )
   58.42 +        BUG();
   58.43 +    
   58.44 +    list_del(&physdev->node);
   58.45 +
   58.46 +    xfree(physdev);
   58.47 +}
   58.48 +
   58.49 +static int setup_ioport_memory_access(domid_t dom, struct domain* p, 
   58.50 +                                      struct exec_domain* ed,
   58.51 +                                      struct pci_dev *pdev)
   58.52 +{
   58.53 +    struct exec_domain* edc;
   58.54 +    int i, j;
   58.55 +
   58.56 +    /* Now, setup access to the IO ports and memory regions for the device. */
   58.57 +    if ( ed->arch.io_bitmap == NULL )
   58.58 +    {
   58.59 +        if ( (ed->arch.io_bitmap = xmalloc_array(u8, IOBMP_BYTES)) == NULL )
   58.60 +            return -ENOMEM;
   58.61 +
   58.62 +        memset(ed->arch.io_bitmap, 0xFF, IOBMP_BYTES);
   58.63 +
   58.64 +        ed->arch.io_bitmap_sel = ~0ULL;
   58.65 +
   58.66 +        for_each_exec_domain(p, edc) {
   58.67 +            if (edc == ed)
   58.68 +                continue;
   58.69 +            edc->arch.io_bitmap = ed->arch.io_bitmap;
   58.70 +        }
   58.71      }
   58.72  
   58.73 -    if ( (pdev = xmalloc(phys_dev_t)) == NULL )
   58.74 +    for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
   58.75      {
   58.76 -        INFO("Error allocating pdev structure.\n");
   58.77 -        return;
   58.78 +        struct resource *r = &pdev->resource[i];
   58.79 +        
   58.80 +        if ( r->flags & IORESOURCE_IO )
   58.81 +        {
   58.82 +            /* Give the domain access to the IO ports it needs.  Currently,
   58.83 +             * this will allow all processes in that domain access to those
   58.84 +             * ports as well.  This will do for now, since driver domains don't
   58.85 +             * run untrusted processes! */
   58.86 +            INFO("Giving domain %u IO resources (%lx - %lx) "
   58.87 +                 "for device %s\n", dom, r->start, r->end, pdev->slot_name);
   58.88 +            for ( j = r->start; j < r->end + 1; j++ )
   58.89 +            {
   58.90 +                clear_bit(j, ed->arch.io_bitmap);
   58.91 +                clear_bit(j / IOBMP_BITS_PER_SELBIT, &ed->arch.io_bitmap_sel);
   58.92 +            }
   58.93 +        }
   58.94 +        /* rights to IO memory regions are checked when the domain maps them */
   58.95      }
   58.96 -    
   58.97 -    pdev->dev = dev;
   58.98 -    pdev->flags = acc;
   58.99 -    pdev->state = 0;
  58.100 -    list_add(&pdev->node, &p->pcidev_list);
  58.101  
  58.102 -    if ( acc == ACC_WRITE )
  58.103 -        pdev->owner = p;
  58.104 +    for_each_exec_domain(p, edc) {
  58.105 +        if (edc == ed)
  58.106 +            continue;
  58.107 +        edc->arch.io_bitmap_sel = ed->arch.io_bitmap_sel;
  58.108 +    }
  58.109 +
  58.110 +    return 0;
  58.111  }
  58.112  
  58.113  /*
  58.114 @@ -120,13 +182,15 @@ static void add_dev_to_task(struct domai
  58.115   * bridge, then the domain should get access to all the leaf devices below
  58.116   * that bridge (XXX this is unimplemented!).
  58.117   */
  58.118 -int physdev_pci_access_modify(
  58.119 -    domid_t dom, int bus, int dev, int func, int enable)
  58.120 +int physdev_pci_access_modify(domid_t dom, int bus, int dev, int func, 
  58.121 +                              int enable)
  58.122  {
  58.123      struct domain *p;
  58.124 -    struct exec_domain *ed, *edc;
  58.125 +    struct exec_domain *ed;
  58.126      struct pci_dev *pdev;
  58.127 -    int i, j, rc = 0;
  58.128 +    phys_dev_t *physdev;
  58.129 +    int rc = 0;
  58.130 +    int oldacc = -1, allocated_physdev = 0;
  58.131  
  58.132      if ( !IS_PRIV(current->domain) )
  58.133          BUG();
  58.134 @@ -158,66 +222,47 @@ int physdev_pci_access_modify(
  58.135      {
  58.136          INFO("  dev does not exist\n");
  58.137          rc = -ENODEV;
  58.138 -        goto out;
  58.139 +        goto clear_privilege;
  58.140      }
  58.141 -    add_dev_to_task(p, pdev, ACC_WRITE);
  58.142 +    
  58.143 +    if ( (physdev = find_pdev(p, pdev)) != NULL) {
  58.144 +        /* Sevice already on list: update access permissions. */
  58.145 +        oldacc = physdev->flags;
  58.146 +        physdev->flags = ACC_WRITE;
  58.147 +    } else {
  58.148 +        if ( (rc = add_dev_to_task(p, pdev, ACC_WRITE)) < 0)
  58.149 +            goto clear_privilege;
  58.150 +        allocated_physdev = 1;
  58.151 +    }
  58.152  
  58.153      INFO("  add RW %02x:%02x:%02x\n", pdev->bus->number,
  58.154           PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  58.155  
  58.156      /* Is the device a bridge or cardbus? */
  58.157 -    if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
  58.158 +    if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL ) {
  58.159          INFO("XXX can't give access to bridge devices yet\n");
  58.160 -
  58.161 -    /* Now, setup access to the IO ports and memory regions for the device. */
  58.162 -
  58.163 -    if ( ed->arch.io_bitmap == NULL )
  58.164 -    {
  58.165 -        if ( (ed->arch.io_bitmap = xmalloc_array(u8, IOBMP_BYTES)) == NULL )
  58.166 -        {
  58.167 -            rc = -ENOMEM;
  58.168 -            goto out;
  58.169 -        }
  58.170 -        memset(ed->arch.io_bitmap, 0xFF, IOBMP_BYTES);
  58.171 -
  58.172 -        ed->arch.io_bitmap_sel = ~0ULL;
  58.173 -
  58.174 -        for_each_exec_domain(p, edc) {
  58.175 -            if (edc == ed)
  58.176 -                continue;
  58.177 -            edc->arch.io_bitmap = ed->arch.io_bitmap;
  58.178 -        }
  58.179 +        rc = -EPERM;
  58.180 +        goto remove_dev;
  58.181      }
  58.182  
  58.183 -    for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
  58.184 -    {
  58.185 -        struct resource *r = &pdev->resource[i];
  58.186 -        
  58.187 -        if ( r->flags & IORESOURCE_IO )
  58.188 -        {
  58.189 -            /* Give the domain access to the IO ports it needs.  Currently,
  58.190 -             * this will allow all processes in that domain access to those
  58.191 -             * ports as well.  This will do for now, since driver domains don't
  58.192 -             * run untrusted processes! */
  58.193 -            INFO("Giving domain %u IO resources (%lx - %lx) "
  58.194 -                 "for device %s\n", dom, r->start, r->end, pdev->slot_name);
  58.195 -            for ( j = r->start; j < r->end + 1; j++ )
  58.196 -            {
  58.197 -                clear_bit(j, ed->arch.io_bitmap);
  58.198 -                clear_bit(j / IOBMP_BITS_PER_SELBIT, &ed->arch.io_bitmap_sel);
  58.199 -            }
  58.200 -        }
  58.201 +    if ( (rc = setup_ioport_memory_access(dom, p, ed, pdev)) < 0 )
  58.202 +        goto remove_dev;
  58.203  
  58.204 -        /* rights to IO memory regions are checked when the domain maps them */
  58.205 -    }
  58.206 +    put_domain(p);
  58.207 +    return rc;
  58.208  
  58.209 -    for_each_exec_domain(p, edc) {
  58.210 -        if (edc == ed)
  58.211 -            continue;
  58.212 -        edc->arch.io_bitmap_sel = ed->arch.io_bitmap_sel;
  58.213 +remove_dev:
  58.214 +    if (allocated_physdev) {
  58.215 +        /* new device was added - remove it from the list */
  58.216 +        remove_dev_from_task(p, pdev);
  58.217 +    } else {
  58.218 +        /* device already existed - just undo the access changes */
  58.219 +        physdev->flags = oldacc;
  58.220      }
  58.221 -
  58.222 - out:
  58.223 +    
  58.224 +clear_privilege:
  58.225 +    clear_bit(DF_PHYSDEV, &p->d_flags);
  58.226 +    clear_bit(DF_PRIVILEGED, &p->d_flags);
  58.227      put_domain(p);
  58.228      return rc;
  58.229  }
  58.230 @@ -708,7 +753,9 @@ long do_physdev_op(physdev_op_t *uop)
  58.231          break;
  58.232      }
  58.233  
  58.234 -    copy_to_user(uop, &op, sizeof(op));
  58.235 +    if (copy_to_user(uop, &op, sizeof(op)))
  58.236 +        ret = -EFAULT;
  58.237 +
  58.238      return ret;
  58.239  }
  58.240  
  58.241 @@ -764,7 +811,12 @@ void physdev_init_dom0(struct domain *p)
  58.242          if ( (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) &&
  58.243               (dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) )
  58.244              continue;
  58.245 -        pdev = xmalloc(phys_dev_t);
  58.246 +
  58.247 +        if ( (pdev = xmalloc(phys_dev_t)) == NULL ) {
  58.248 +            INFO("failed to allocate physical device structure!\n");
  58.249 +            break;
  58.250 +        }
  58.251 +
  58.252          pdev->dev = dev;
  58.253          pdev->flags = ACC_WRITE;
  58.254          pdev->state = 0;
    59.1 --- a/xen/common/schedule.c	Tue Mar 08 22:57:09 2005 +0000
    59.2 +++ b/xen/common/schedule.c	Wed Mar 09 20:19:23 2005 +0000
    59.3 @@ -414,10 +414,6 @@ void __enter_scheduler(void)
    59.4  
    59.5      spin_unlock_irq(&schedule_data[cpu].schedule_lock);
    59.6  
    59.7 -    /* Ensure that the domain has an up-to-date time base. */
    59.8 -    if ( !is_idle_task(next->domain) )
    59.9 -        update_dom_time(next->domain);
   59.10 -
   59.11      if ( unlikely(prev == next) )
   59.12          return;
   59.13      
   59.14 @@ -450,23 +446,11 @@ void __enter_scheduler(void)
   59.15  
   59.16      TRACE_2D(TRC_SCHED_SWITCH, next->domain->id, next);
   59.17  
   59.18 -    switch_to(prev, next);
   59.19 +    /* Ensure that the domain has an up-to-date time base. */
   59.20 +    if ( !is_idle_task(next->domain) && update_dom_time(next) )
   59.21 +        send_guest_virq(next, VIRQ_TIMER);
   59.22  
   59.23 -    /*
   59.24 -     * We do this late on because it doesn't need to be protected by the
   59.25 -     * schedule_lock, and because we want this to be the very last use of
   59.26 -     * 'prev' (after this point, a dying domain's info structure may be freed
   59.27 -     * without warning). 
   59.28 -     */
   59.29 -    clear_bit(EDF_RUNNING, &prev->ed_flags);
   59.30 -
   59.31 -    /* Mark a timer event for the newly-scheduled domain. */
   59.32 -    if ( !is_idle_task(next->domain) )
   59.33 -        send_guest_virq(next, VIRQ_TIMER);
   59.34 -    
   59.35 -    schedule_tail(next);
   59.36 -
   59.37 -    BUG();
   59.38 +    context_switch(prev, next);
   59.39  }
   59.40  
   59.41  /* No locking needed -- pointer comparison is safe :-) */
   59.42 @@ -499,11 +483,8 @@ static void t_timer_fn(unsigned long unu
   59.43  
   59.44      TRACE_0D(TRC_SCHED_T_TIMER_FN);
   59.45  
   59.46 -    if ( !is_idle_task(ed->domain) )
   59.47 -    {
   59.48 -        update_dom_time(ed->domain);
   59.49 +    if ( !is_idle_task(ed->domain) && update_dom_time(ed) )
   59.50          send_guest_virq(ed, VIRQ_TIMER);
   59.51 -    }
   59.52  
   59.53      t_timer[ed->processor].expires = NOW() + MILLISECS(10);
   59.54      add_ac_timer(&t_timer[ed->processor]);
   59.55 @@ -515,7 +496,7 @@ static void dom_timer_fn(unsigned long d
   59.56      struct exec_domain *ed = (struct exec_domain *)data;
   59.57  
   59.58      TRACE_0D(TRC_SCHED_DOM_TIMER_FN);
   59.59 -    update_dom_time(ed->domain);
   59.60 +    (void)update_dom_time(ed);
   59.61      send_guest_virq(ed, VIRQ_TIMER);
   59.62  }
   59.63  
    60.1 --- a/xen/include/asm-x86/mm.h	Tue Mar 08 22:57:09 2005 +0000
    60.2 +++ b/xen/include/asm-x86/mm.h	Wed Mar 09 20:19:23 2005 +0000
    60.3 @@ -336,4 +336,14 @@ void audit_domains(void);
    60.4  
    60.5  void propagate_page_fault(unsigned long addr, u16 error_code);
    60.6  
    60.7 +/* update_grant_va_mapping
    60.8 + * Caller must own d's BIGLOCK, is responsible for flushing the TLB,
    60.9 + * and have already get_page'd */
   60.10 +int update_grant_va_mapping(unsigned long va,
   60.11 +                            unsigned long val,
   60.12 +                            struct domain *d,
   60.13 +                            struct exec_domain *ed);
   60.14 +#define GNTUPDVA_prev_ro 1
   60.15 +#define GNTUPDVA_prev_rw 2
   60.16 +
   60.17  #endif /* __ASM_X86_MM_H__ */
    61.1 --- a/xen/include/asm-x86/shadow.h	Tue Mar 08 22:57:09 2005 +0000
    61.2 +++ b/xen/include/asm-x86/shadow.h	Wed Mar 09 20:19:23 2005 +0000
    61.3 @@ -61,6 +61,62 @@ extern void vmx_shadow_clear_state(struc
    61.4        ? phys_to_machine_mapping(gpfn)                  \
    61.5        : (gpfn) )
    61.6  
    61.7 +#define __translate_gpfn_to_mfn(_d, gpfn)              \
    61.8 +    ( (shadow_mode_translate(_d))                      \
    61.9 +      ? translate_gpfn_to_mfn(_d, gpfn)                \
   61.10 +      : (gpfn) )
   61.11 +
   61.12 +static inline unsigned long
   61.13 +translate_gpfn_to_mfn(struct domain *rd, unsigned long gpfn)
   61.14 +{
   61.15 +    unsigned long       ma_of_phys_to_mach;
   61.16 +    l2_pgentry_t       *l2_table;
   61.17 +    l2_pgentry_t        l2_entry;
   61.18 +    unsigned long       ma_of_l1_table;
   61.19 +    l1_pgentry_t       *l1_table;
   61.20 +    l1_pgentry_t        pte;
   61.21 +    unsigned long       mfn = 0;
   61.22 +
   61.23 +    /*
   61.24 +     * translation of: (domain, gpfn) -> mfn
   61.25 +     * where domain != current, and is in translate shadow mode
   61.26 +     */
   61.27 +
   61.28 +    ASSERT( shadow_mode_translate(rd) );
   61.29 +
   61.30 +    shadow_lock(rd);
   61.31 +
   61.32 +    /* TODO: check using shadow_lock is correct
   61.33 +     * TODO: move arch.phys_table from exec_domain to domain
   61.34 +     *       - use of zero index is a hack - FIXME
   61.35 +     */
   61.36 +
   61.37 +    ma_of_phys_to_mach = pagetable_val( (rd->exec_domain[0])->arch.phys_table );
   61.38 +
   61.39 +    l2_table = (l2_pgentry_t *) map_domain_mem( ma_of_phys_to_mach );
   61.40 +    l2_entry = l2_table[ gpfn >> (L2_PAGETABLE_SHIFT - PAGE_SHIFT) ];
   61.41 +
   61.42 +    unmap_domain_mem( l2_table );
   61.43 +
   61.44 +    if ( l2_pgentry_val(l2_entry) == 0 )
   61.45 +        goto unlock_out;
   61.46 +
   61.47 +    ma_of_l1_table = l2_pgentry_to_phys( l2_entry );
   61.48 +
   61.49 +    l1_table = (l1_pgentry_t *) map_domain_mem( ma_of_l1_table );
   61.50 +    pte      = l1_table[ (gpfn >> (L1_PAGETABLE_SHIFT - PAGE_SHIFT)) &
   61.51 +                         (L1_PAGETABLE_ENTRIES - 1 ) ];
   61.52 +
   61.53 +    unmap_domain_mem( l1_table );
   61.54 +
   61.55 +    mfn = l1_pgentry_to_pfn(pte);
   61.56 +
   61.57 +unlock_out:
   61.58 +    shadow_unlock(rd);
   61.59 +
   61.60 +    return mfn;
   61.61 +}
   61.62 +
   61.63  extern void __shadow_mode_disable(struct domain *d);
   61.64  static inline void shadow_mode_disable(struct domain *d)
   61.65  {
    62.1 --- a/xen/include/asm-x86/x86_64/string.h	Tue Mar 08 22:57:09 2005 +0000
    62.2 +++ b/xen/include/asm-x86/x86_64/string.h	Wed Mar 09 20:19:23 2005 +0000
    62.3 @@ -1,69 +1,6 @@
    62.4  #ifndef _X86_64_STRING_H_
    62.5  #define _X86_64_STRING_H_
    62.6  
    62.7 -#ifdef __KERNEL__
    62.8 -
    62.9 -#define struct_cpy(x,y) (*(x)=*(y))
   62.10 -
   62.11 -/* Written 2002 by Andi Kleen */ 
   62.12 -
   62.13 -/* Only used for special circumstances. Stolen from i386/string.h */ 
   62.14 -static inline void * __inline_memcpy(void * to, const void * from, size_t n)
   62.15 -{
   62.16 -unsigned long d0, d1, d2;
   62.17 -__asm__ __volatile__(
   62.18 -	"rep ; movsl\n\t"
   62.19 -	"testb $2,%b4\n\t"
   62.20 -	"je 1f\n\t"
   62.21 -	"movsw\n"
   62.22 -	"1:\ttestb $1,%b4\n\t"
   62.23 -	"je 2f\n\t"
   62.24 -	"movsb\n"
   62.25 -	"2:"
   62.26 -	: "=&c" (d0), "=&D" (d1), "=&S" (d2)
   62.27 -	:"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
   62.28 -	: "memory");
   62.29 -return (to);
   62.30 -}
   62.31 -
   62.32 -/* Even with __builtin_ the compiler may decide to use the out of line
   62.33 -   function. */
   62.34 -
   62.35 -#define __HAVE_ARCH_MEMCPY 1
   62.36 -extern void *__memcpy(void *to, const void *from, size_t len); 
   62.37 -#define memcpy(dst,src,len) \
   62.38 -	({ size_t __len = (len);				\
   62.39 -	   void *__ret;						\
   62.40 -	   if (__builtin_constant_p(len) && __len >= 64)	\
   62.41 -		 __ret = __memcpy((dst),(src),__len);		\
   62.42 -	   else							\
   62.43 -		 __ret = __builtin_memcpy((dst),(src),__len);	\
   62.44 -	   __ret; }) 
   62.45 -
   62.46 -
   62.47 -#define __HAVE_ARCH_MEMSET
   62.48 -#define memset __builtin_memset
   62.49 -
   62.50 -#define __HAVE_ARCH_MEMMOVE
   62.51 -void * memmove(void * dest,const void *src,size_t count);
   62.52 -
   62.53 -/* Use C out of line version for memcmp */ 
   62.54 -#define memcmp __builtin_memcmp
   62.55 -int memcmp(const void * cs,const void * ct,size_t count);
   62.56 -
   62.57 -/* out of line string functions use always C versions */ 
   62.58 -#define strlen __builtin_strlen
   62.59 -size_t strlen(const char * s);
   62.60 -
   62.61 -#define strcpy __builtin_strcpy
   62.62 -char * strcpy(char * dest,const char *src);
   62.63 -
   62.64 -#define strcat __builtin_strcat
   62.65 -char * strcat(char * dest, const char * src);
   62.66 -
   62.67 -#define strcmp __builtin_strcmp
   62.68 -int strcmp(const char * cs,const char * ct);
   62.69 -
   62.70 -#endif /* __KERNEL__ */
   62.71 +/* nothing */
   62.72  
   62.73  #endif
    63.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    63.2 +++ b/xen/include/asm-x86/x86_emulate.h	Wed Mar 09 20:19:23 2005 +0000
    63.3 @@ -0,0 +1,131 @@
    63.4 +/******************************************************************************
    63.5 + * x86_emulate.h
    63.6 + * 
    63.7 + * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
    63.8 + * 
    63.9 + * Copyright (c) 2005 Keir Fraser
   63.10 + */
   63.11 +
   63.12 +#ifndef __X86_EMULATE_H__
   63.13 +#define __X86_EMULATE_H__
   63.14 +
   63.15 +/*
   63.16 + * x86_mem_emulator:
   63.17 + * 
   63.18 + * These operations represent the instruction emulator's interface to memory.
   63.19 + * There are two categories of operation: those that act on ordinary memory
   63.20 + * regions (*_std), and those that act on memory regions known to require
   63.21 + * special treatment or emulation (*_emulated).
   63.22 + * 
   63.23 + * The emulator assumes that an instruction accesses only one 'emulated memory'
   63.24 + * location, and that this is one of its data operands. Instruction fetches and
   63.25 + * stack operations are assumed never to access emulated memory. The emulator
   63.26 + * automatically deduces which operand of a string-move operation is accessing
   63.27 + * emulated memory, and requires that the other operand accesses normal memory.
   63.28 + * 
   63.29 + * NOTES:
   63.30 + *  1. The emulator isn't very smart about emulated vs. standard memory.
   63.31 + *     'Emulated memory' access addresses should be checked for sanity.
   63.32 + *     'Normal memory' accesses may fault, and the caller must arrange to
   63.33 + *     detect and handle reentrancy into the emulator via recursive faults.
   63.34 + *     Accesses may be unaligned and may cross page boundaries.
   63.35 + *  2. If the access fails (cannot emulate, or a standard access faults) then
   63.36 + *     it is up to the memop to propagate the fault to the guest VM via
   63.37 + *     some out-of-band mechanism, unknown to the emulator. The memop signals
   63.38 + *     failure by returning a non-zero value to the emulator, which will then
   63.39 + *     immediately bail.
   63.40 + */
   63.41 +struct x86_mem_emulator
   63.42 +{
   63.43 +    /*
   63.44 +     * read_std: Read bytes of standard (non-emulated/special) memory.
   63.45 +     *           Used for instruction fetch, stack operations, and others.
   63.46 +     *  @addr:  [IN ] Linear address from which to read.
   63.47 +     *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
   63.48 +     *  @bytes: [IN ] Number of bytes to read from memory.
   63.49 +     */
   63.50 +    int (*read_std)(
   63.51 +        unsigned long addr,
   63.52 +        unsigned long *val,
   63.53 +        unsigned int bytes);
   63.54 +
   63.55 +    /*
   63.56 +     * write_std: Write bytes of standard (non-emulated/special) memory.
   63.57 +     *            Used for stack operations, and others.
   63.58 +     *  @addr:  [IN ] Linear address to which to write.
   63.59 +     *  @val:   [IN ] Value to write to memory (low-order bytes used as req'd).
   63.60 +     *  @bytes: [IN ] Number of bytes to write to memory.
   63.61 +     */
   63.62 +    int (*write_std)(
   63.63 +        unsigned long addr,
   63.64 +        unsigned long val,
   63.65 +        unsigned int bytes);
   63.66 +
   63.67 +    /*
   63.68 +     * read_emulated: Read bytes from emulated/special memory area.
   63.69 +     *  @addr:  [IN ] Linear address from which to read.
   63.70 +     *  @val:   [OUT] Value read from memory, zero-extended to 'u_long'.
   63.71 +     *  @bytes: [IN ] Number of bytes to read from memory.
   63.72 +     */
   63.73 +    int (*read_emulated)(
   63.74 +        unsigned long addr,
   63.75 +        unsigned long *val,
   63.76 +        unsigned int bytes);
   63.77 +
   63.78 +    /*
   63.79 +     * write_emulated: Read bytes from emulated/special memory area.
   63.80 +     *  @addr:  [IN ] Linear address to which to write.
   63.81 +     *  @val:   [IN ] Value to write to memory (low-order bytes used as req'd).
   63.82 +     *  @bytes: [IN ] Number of bytes to write to memory.
   63.83 +     */
   63.84 +    int (*write_emulated)(
   63.85 +        unsigned long addr,
   63.86 +        unsigned long val,
   63.87 +        unsigned int bytes);
   63.88 +
   63.89 +    /*
   63.90 +     * cmpxchg_emulated: Emulate an atomic (LOCKed) CMPXCHG operation on an
   63.91 +     *                   emulated/special memory area.
   63.92 +     *  @addr:  [IN ] Linear address to access.
   63.93 +     *  @old:   [IN ] Value expected to be current at @addr.
   63.94 +     *  @new:   [IN ] Value to write to @addr.
   63.95 +     *  @seen:  [OUT] Value actually seen at @addr, zero-extended to 'u_long'.
   63.96 +     *  @bytes: [IN ] Number of bytes to access using CMPXCHG.
   63.97 +     */
   63.98 +    int (*cmpxchg_emulated)(
   63.99 +        unsigned long addr,
  63.100 +        unsigned long old, 
  63.101 +        unsigned long new,
  63.102 +        unsigned long *seen,
  63.103 +        unsigned int bytes);
  63.104 +};
  63.105 +
  63.106 +
  63.107 +struct xen_regs;
  63.108 +
  63.109 +/*
  63.110 + * x86_emulate_memop: Emulate an instruction that faulted attempting to
  63.111 + *                    read/write a 'special' memory area.
  63.112 + *  @regs: Register state at time of fault.
  63.113 + *  @cr2:  Linear faulting address.
  63.114 + *  @ops:  Interface to access special memory.
  63.115 + *  @mode: Current execution mode, represented by the default size of memory
  63.116 + *         addresses, in bytes. Valid values are 2, 4 and 8 (x86/64 only).
  63.117 + */
  63.118 +extern int
  63.119 +x86_emulate_memop(
  63.120 +    struct xen_regs *regs,
  63.121 +    unsigned long cr2,
  63.122 +    struct x86_mem_emulator *ops,
  63.123 +    int mode);
  63.124 +
  63.125 +/*
  63.126 + * Given the 'reg' portion of a ModRM byte, and a register block, return a
  63.127 + * pointer into the block that addresses the relevant register.
  63.128 + * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  63.129 + */
  63.130 +extern void *
  63.131 +decode_register(
  63.132 +    u8 modrm_reg, struct xen_regs *regs, int highbyte_regs);
  63.133 +
  63.134 +#endif /* __X86_EMULATE_H__ */
    64.1 --- a/xen/include/public/arch-x86_64.h	Tue Mar 08 22:57:09 2005 +0000
    64.2 +++ b/xen/include/public/arch-x86_64.h	Wed Mar 09 20:19:23 2005 +0000
    64.3 @@ -95,18 +95,18 @@
    64.4  
    64.5  /*
    64.6   * int HYPERVISOR_switch_to_user(void)
    64.7 - *  All arguments are on the kernel stack, in the following format.
    64.8 + * All arguments are on the kernel stack, in the following format.
    64.9   * Never returns if successful. Current kernel context is lost.
   64.10   * If flags contains ECF_IN_SYSCALL:
   64.11 - *   Restore RIP, RFLAGS, RSP. 
   64.12 + *   Restore RAX, RIP, RFLAGS, RSP. 
   64.13   *   Discard R11, RCX, CS, SS.
   64.14   * Otherwise:
   64.15 - *   Restore R11, RCX, CS:RIP, RFLAGS, SS:RSP.
   64.16 + *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
   64.17   * All other registers are saved on hypercall entry and restored to user.
   64.18   */
   64.19  struct switch_to_user {
   64.20      /* Top of stack (%rsp at point of hypercall). */
   64.21 -    u64 r11, rcx, flags, rip, cs, rflags, rsp, ss;
   64.22 +    u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
   64.23      /* Bottom of switch_to_user stack frame. */
   64.24  } PACKED;
   64.25  
    65.1 --- a/xen/include/public/grant_table.h	Tue Mar 08 22:57:09 2005 +0000
    65.2 +++ b/xen/include/public/grant_table.h	Wed Mar 09 20:19:23 2005 +0000
    65.3 @@ -207,6 +207,19 @@ typedef struct {
    65.4  } PACKED gnttab_setup_table_t; /* 16 bytes */
    65.5  
    65.6  /*
    65.7 + * GNTTABOP_dump_table: Dump the contents of the grant table to the
    65.8 + * xen console. Debugging use only.
    65.9 + */
   65.10 +#define GNTTABOP_dump_table           3
   65.11 +typedef struct {
   65.12 +    /* IN parameters. */
   65.13 +    domid_t     dom;                  /*  0 */
   65.14 +    /* OUT parameters. */
   65.15 +    s16         status;               /* 2: GNTST_* */
   65.16 +} PACKED gnttab_dump_table_t; /* 4 bytes */
   65.17 +
   65.18 +
   65.19 +/*
   65.20   * Bitfield values for update_pin_status.flags.
   65.21   */
   65.22   /* Map the grant entry for access by I/O devices. */
   65.23 @@ -233,9 +246,10 @@ typedef struct {
   65.24  #define GNTST_general_error    (-1) /* General undefined error.              */
   65.25  #define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
   65.26  #define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
   65.27 -#define GNTST_bad_handle       (-3) /* Unrecognised or inappropriate handle. */
   65.28 -#define GNTST_no_device_space  (-4) /* Out of space in I/O MMU.              */
   65.29 -#define GNTST_permission_denied (-5) /* Not enough privilege for operation.  */
   65.30 +#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
   65.31 +#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
   65.32 +#define GNTST_no_device_space  (-6) /* Out of space in I/O MMU.              */
   65.33 +#define GNTST_permission_denied (-7) /* Not enough privilege for operation.  */
   65.34  
   65.35  #define GNTTABOP_error_msgs {                   \
   65.36      "okay",                                     \
   65.37 @@ -243,9 +257,20 @@ typedef struct {
   65.38      "unrecognised domain id",                   \
   65.39      "invalid grant reference",                  \
   65.40      "invalid mapping handle",                   \
   65.41 +    "invalid virtual address",                  \
   65.42      "no spare translation slot in the I/O MMU", \
   65.43      "permission denied"                         \
   65.44  }
   65.45          
   65.46 +                                                                                       
   65.47 +typedef struct {
   65.48 +    union {                           /*  0 */
   65.49 +        gnttab_map_grant_ref_t    map_grant_ref;
   65.50 +        gnttab_unmap_grant_ref_t  unmap_grant_ref;
   65.51 +        gnttab_setup_table_t      setup_table;
   65.52 +        gnttab_dump_table_t       dump_table;
   65.53 +        u8                        __dummy[24];
   65.54 +    } PACKED u;
   65.55 +} PACKED gnttab_op_t; /* 32 bytes */
   65.56  
   65.57  #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
    66.1 --- a/xen/include/public/io/blkif.h	Tue Mar 08 22:57:09 2005 +0000
    66.2 +++ b/xen/include/public/io/blkif.h	Wed Mar 09 20:19:23 2005 +0000
    66.3 @@ -57,8 +57,7 @@ typedef struct {
    66.4   * Generate blkif ring structures and types.
    66.5   */
    66.6  
    66.7 -#define BLKIF_RING RING_PARAMS(blkif_request_t, blkif_response_t, PAGE_SIZE)
    66.8 -DEFINE_RING_TYPES(blkif, BLKIF_RING);
    66.9 +DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t, PAGE_SIZE);
   66.10  
   66.11  /*
   66.12   * BLKIF_OP_PROBE:
    67.1 --- a/xen/include/public/io/domain_controller.h	Tue Mar 08 22:57:09 2005 +0000
    67.2 +++ b/xen/include/public/io/domain_controller.h	Wed Mar 09 20:19:23 2005 +0000
    67.3 @@ -45,9 +45,8 @@ typedef u32 CONTROL_RING_IDX;
    67.4   * CONTROL_RING_MEM is currently an 8-slot ring of ctrl_msg_t structs and
    67.5   * two 32-bit counters:  (64 * 8) + (2 * 4) = 520
    67.6   */
    67.7 -#define CONTROL_RING_MEM 520 
    67.8 -#define CTRL_RING RING_PARAMS(control_msg_t, control_msg_t, CONTROL_RING_MEM)
    67.9 -DEFINE_RING_TYPES(ctrl, CTRL_RING);
   67.10 +#define CONTROL_RING_MEM 520
   67.11 +DEFINE_RING_TYPES(ctrl, control_msg_t, control_msg_t, CONTROL_RING_MEM);
   67.12  
   67.13  typedef struct {
   67.14      ctrl_sring_t tx_ring; /*    0: guest -> controller  */
    68.1 --- a/xen/include/public/io/ring.h	Tue Mar 08 22:57:09 2005 +0000
    68.2 +++ b/xen/include/public/io/ring.h	Wed Mar 09 20:19:23 2005 +0000
    68.3 @@ -8,49 +8,21 @@
    68.4  
    68.5  typedef unsigned int RING_IDX;
    68.6  
    68.7 -/* This is horrible: it rounds a 32-bit unsigned constant down to the
    68.8 - * nearest power of two, by finding the highest set bit. */
    68.9 -#define __RD2PO2(_x) (((_x) & 0x80000000) ? 0x80000000 :                \
   68.10 -                      ((_x) & 0x40000000) ? 0x40000000 :                \
   68.11 -                      ((_x) & 0x20000000) ? 0x20000000 :                \
   68.12 -                      ((_x) & 0x10000000) ? 0x10000000 :                \
   68.13 -                      ((_x) & 0x08000000) ? 0x08000000 :                \
   68.14 -                      ((_x) & 0x04000000) ? 0x04000000 :                \
   68.15 -                      ((_x) & 0x02000000) ? 0x02000000 :                \
   68.16 -                      ((_x) & 0x01000000) ? 0x01000000 :                \
   68.17 -                      ((_x) & 0x00800000) ? 0x00800000 :                \
   68.18 -                      ((_x) & 0x00400000) ? 0x00400000 :                \
   68.19 -                      ((_x) & 0x00200000) ? 0x00200000 :                \
   68.20 -                      ((_x) & 0x00100000) ? 0x00100000 :                \
   68.21 -                      ((_x) & 0x00080000) ? 0x00080000 :                \
   68.22 -                      ((_x) & 0x00040000) ? 0x00040000 :                \
   68.23 -                      ((_x) & 0x00020000) ? 0x00020000 :                \
   68.24 -                      ((_x) & 0x00010000) ? 0x00010000 :                \
   68.25 -                      ((_x) & 0x00008000) ? 0x00008000 :                \
   68.26 -                      ((_x) & 0x00004000) ? 0x00004000 :                \
   68.27 -                      ((_x) & 0x00002000) ? 0x00002000 :                \
   68.28 -                      ((_x) & 0x00001000) ? 0x00001000 :                \
   68.29 -                      ((_x) & 0x00000800) ? 0x00000800 :                \
   68.30 -                      ((_x) & 0x00000400) ? 0x00000400 :                \
   68.31 -                      ((_x) & 0x00000200) ? 0x00000200 :                \
   68.32 -                      ((_x) & 0x00000100) ? 0x00000100 :                \
   68.33 -                      ((_x) & 0x00000080) ? 0x00000080 :                \
   68.34 -                      ((_x) & 0x00000040) ? 0x00000040 :                \
   68.35 -                      ((_x) & 0x00000020) ? 0x00000020 :                \
   68.36 -                      ((_x) & 0x00000010) ? 0x00000010 :                \
   68.37 -                      ((_x) & 0x00000008) ? 0x00000008 :                \
   68.38 -                      ((_x) & 0x00000004) ? 0x00000004 :                \
   68.39 -                      ((_x) & 0x00000002) ? 0x00000002 :                \
   68.40 -                      ((_x) & 0x00000001) ? 0x00000001 : 0x00000000)
   68.41 +/* Round a 32-bit unsigned constant down to the nearest power of two. */
   68.42 +#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
   68.43 +#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
   68.44 +#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
   68.45 +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
   68.46 +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
   68.47  
   68.48 -/* Given a shared ring, tell me how many entries there are in it.  The
   68.49 - * rule is: a ring contains as many entries as will fit, rounded down to
   68.50 - * the nearest power of two (so we can mask with (size-1) to loop
   68.51 - * around) */
   68.52 -#define __SRING_SIZE(__params, __esize)                                 \
   68.53 -    __RD2PO2((sizeof((__params)->size) - (2 * sizeof(RING_IDX))) / (__esize))
   68.54 -#define SRING_SIZE(__params, __sringp)                                  \
   68.55 -    __SRING_SIZE(__params, sizeof (__sringp)->ring[0])
   68.56 +/*
   68.57 + * Calculate size of a shared ring, given the total available space for the
   68.58 + * ring and indexes (_sz), and the name tag of the request/response structure.
   68.59 + * S ring contains as many entries as will fit, rounded down to the nearest 
   68.60 + * power of two (so we can mask with (size-1) to loop around).
   68.61 + */
   68.62 +#define __RING_SIZE(_name, _sz)                                         \
   68.63 +    (__RD32(((_sz) - 2*sizeof(RING_IDX)) / sizeof(union _name##_sring_entry)))
   68.64  
   68.65  /*
   68.66   *  Macros to make the correct C datatypes for a new kind of ring.
   68.67 @@ -62,8 +34,7 @@ typedef unsigned int RING_IDX;
   68.68   *
   68.69   *  In a header where you want the ring datatype declared, you then do:
   68.70   *
   68.71 - *   #define MY_RING RING_PARAMS(request_t, response_t, PAGE_SIZE)
   68.72 - *   DEFINE_RING_TYPES(mytag, MY_RING);
   68.73 + *     DEFINE_RING_TYPES(mytag, request_t, response_t, PAGE_SIZE);
   68.74   *
   68.75   *  These expand out to give you a set of types, as you can see below.
   68.76   *  The most important of these are:
   68.77 @@ -72,51 +43,29 @@ typedef unsigned int RING_IDX;
   68.78   *     mytag_front_ring_t - The 'front' half of the ring.
   68.79   *     mytag_back_ring_t  - The 'back' half of the ring.
   68.80   *
   68.81 - *  Use the RING_PARAMS define (MY_RING above) as a first parameter on all
   68.82 - *  the ring functions.  To initialize a ring in your code, on the front 
   68.83 - *  half, you do a:
   68.84 + *  To initialize a ring in your code, on the front half, you do:
   68.85   *
   68.86   *      mytag_front_ring_t front_ring;
   68.87   *
   68.88 - *      SHARED_RING_INIT(MY_RING, (mytag_sring_t *)shared_page)
   68.89 - *      FRONT_RING_INIT(MY_RING, &front_ring, (mytag_sring_t *)shared_page)
   68.90 + *      SHARED_RING_INIT((mytag_sring_t *)shared_page);
   68.91 + *      FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page);
   68.92   *
   68.93   *  Initializing the back follows similarly...
   68.94   */
   68.95           
   68.96 -/*  NB: RING SIZING. (a note to ease future debugging...)
   68.97 - *
   68.98 - *  Passing size information into the ring macros is made difficult by 
   68.99 - *  the lack of a reasonable constant declaration in C.  To get around this,
  68.100 - *  the RING_PARAMS define places the requested size of the ring as the 
  68.101 - *  static size of the 'size' array in the anonymous RING_PARAMS struct.
  68.102 - *  While this struct is never actually instantiated, __SRING_SIZE is 
  68.103 - *  able to use sizeof() to get at the constant size.
  68.104 - */
  68.105 -
  68.106 -#define RING_PARAMS(__req_t, __rsp_t, __size)                           \
  68.107 -((struct {                                                              \
  68.108 -    char size[__size];                                                  \
  68.109 -    __req_t req;                                                        \
  68.110 -    __rsp_t rsp;                                                        \
  68.111 -                                                                        \
  68.112 -} *) 0)
  68.113 -
  68.114 -
  68.115 -#define DEFINE_RING_TYPES(__name, __params)                             \
  68.116 +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t, __size)             \
  68.117                                                                          \
  68.118  /* Shared ring entry */                                                 \
  68.119  union __name##_sring_entry {                                            \
  68.120 -    typeof ((__params)->req) req;                                       \
  68.121 -    typeof ((__params)->rsp) rsp;                                       \
  68.122 +    __req_t req;                                                        \
  68.123 +    __rsp_t rsp;                                                        \
  68.124  } PACKED;                                                               \
  68.125                                                                          \
  68.126  /* Shared ring page */                                                  \
  68.127  struct __name##_sring {                                                 \
  68.128      RING_IDX req_prod;                                                  \
  68.129      RING_IDX rsp_prod;                                                  \
  68.130 -    union __name##_sring_entry                                          \
  68.131 -        ring[__SRING_SIZE(__params, sizeof (union __name##_sring_entry))];        \
  68.132 +    union __name##_sring_entry ring[__RING_SIZE(__name, __size)];       \
  68.133  } PACKED;                                                               \
  68.134                                                                          \
  68.135  /* "Front" end's private variables */                                   \
  68.136 @@ -149,106 +98,97 @@ typedef struct __name##_back_ring __name
  68.137   * 
  68.138   *   N.B. these macros do NO INTERLOCKS OR FLOW CONTROL.  
  68.139   *   This is OK in 1-for-1 request-response situations where the 
  68.140 - *   requestor (front end) never has more than SRING_SIZE()-1
  68.141 + *   requestor (front end) never has more than RING_SIZE()-1
  68.142   *   outstanding requests.
  68.143   */
  68.144  
  68.145  
  68.146  /* Initialising empty rings */
  68.147 -#define SHARED_RING_INIT(_p, _s) do {                                   \
  68.148 +#define SHARED_RING_INIT(_s) do {                                       \
  68.149      (_s)->req_prod = 0;                                                 \
  68.150      (_s)->rsp_prod = 0;                                                 \
  68.151  } while(0)
  68.152  
  68.153 -#define FRONT_RING_INIT(_p, _r, _s) do {                                \
  68.154 +#define FRONT_RING_INIT(_r, _s) do {                                    \
  68.155      (_r)->req_prod_pvt = 0;                                             \
  68.156      (_r)->rsp_cons = 0;                                                 \
  68.157      (_r)->sring = (_s);                                                 \
  68.158  } while (0)
  68.159  
  68.160 -#define BACK_RING_INIT(_p, _r, _s) do {                                 \
  68.161 +#define BACK_RING_INIT(_r, _s) do {                                     \
  68.162      (_r)->rsp_prod_pvt = 0;                                             \
  68.163      (_r)->req_cons = 0;                                                 \
  68.164      (_r)->sring = (_s);                                                 \
  68.165  } while (0)
  68.166  
  68.167  /* Initialize to existing shared indexes -- for recovery */
  68.168 -#define FRONT_RING_ATTACH(_p, _r, _s) do {                              \
  68.169 +#define FRONT_RING_ATTACH(_r, _s) do {                                  \
  68.170      (_r)->sring = (_s);                                                 \
  68.171      (_r)->req_prod_pvt = (_s)->req_prod;                                \
  68.172      (_r)->rsp_cons = (_s)->rsp_prod;                                    \
  68.173  } while (0)
  68.174  
  68.175 -#define BACK_RING_ATTACH(_p, _r, _s) do {                               \
  68.176 +#define BACK_RING_ATTACH(_r, _s) do {                                   \
  68.177      (_r)->sring = (_s);                                                 \
  68.178      (_r)->rsp_prod_pvt = (_s)->rsp_prod;                                \
  68.179      (_r)->req_cons = (_s)->req_prod;                                    \
  68.180  } while (0)
  68.181  
  68.182 -
  68.183 -/* How to mask off a number for use as an offset into a ring 
  68.184 - * N.B. This evalutes its second argument once but its first often */
  68.185 -#define __SHARED_RING_MASK(_p, _s, _i)                                  \
  68.186 -    ((_i) & (SRING_SIZE((_p), (_s)) - 1))
  68.187 -
  68.188  /* How big is this ring? */
  68.189 -#define RING_SIZE(_p, _r) SRING_SIZE((_p), (_r)->sring)
  68.190 +#define RING_SIZE(_r)                                                   \
  68.191 +    (sizeof((_r)->sring->ring)/sizeof((_r)->sring->ring[0]))
  68.192  
  68.193  /* How many empty slots are on a ring? */
  68.194 -#define RING_PENDING_REQUESTS(_p, _r)                                   \
  68.195 +#define RING_PENDING_REQUESTS(_r)                                       \
  68.196     ( ((_r)->req_prod_pvt - (_r)->rsp_cons) )
  68.197     
  68.198  /* Test if there is an empty slot available on the front ring. 
  68.199   * (This is only meaningful from the front. )
  68.200   */
  68.201 -#define RING_FULL(_p, _r)                                               \
  68.202 -    (((_r)->req_prod_pvt - (_r)->rsp_cons) == SRING_SIZE((_p), (_r)->sring))
  68.203 +#define RING_FULL(_r)                                                   \
  68.204 +    (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
  68.205  
  68.206  /* Test if there are outstanding messages to be processed on a ring. */
  68.207 -#define RING_HAS_UNCONSUMED_RESPONSES(_p, _r)                           \
  68.208 +#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
  68.209     ( (_r)->rsp_cons != (_r)->sring->rsp_prod )
  68.210     
  68.211 -#define RING_HAS_UNCONSUMED_REQUESTS(_p, _r)                            \
  68.212 +#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
  68.213     ( ((_r)->req_cons != (_r)->sring->req_prod ) &&                      \
  68.214       (((_r)->req_cons - (_r)->rsp_prod_pvt) !=                          \
  68.215 -      SRING_SIZE((_p), (_r)->sring)) )
  68.216 +      RING_SIZE(_r)) )
  68.217        
  68.218  /* Test if there are messages waiting to be pushed. */
  68.219 -#define RING_HAS_UNPUSHED_REQUESTS(_p, _r)                              \
  68.220 +#define RING_HAS_UNPUSHED_REQUESTS(_r)                                  \
  68.221     ( (_r)->req_prod_pvt != (_r)->sring->req_prod )
  68.222     
  68.223 -#define RING_HAS_UNPUSHED_RESPONSES(_p, _r)                             \
  68.224 +#define RING_HAS_UNPUSHED_RESPONSES(_r)                                 \
  68.225     ( (_r)->rsp_prod_pvt != (_r)->sring->rsp_prod )
  68.226 -   
  68.227  
  68.228  /* Copy the private producer pointer into the shared ring so the other end 
  68.229   * can see the updates we've made. */
  68.230 -#define RING_PUSH_REQUESTS(_p, _r) do {                                 \
  68.231 +#define RING_PUSH_REQUESTS(_r) do {                                     \
  68.232      wmb();                                                              \
  68.233      (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
  68.234  } while (0)
  68.235  
  68.236 -#define RING_PUSH_RESPONSES(_p, _r) do {                                \
  68.237 +#define RING_PUSH_RESPONSES(_r) do {                                    \
  68.238      wmb();                                                              \
  68.239      (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
  68.240  } while (0)
  68.241  
  68.242 -/* Direct access to individual ring elements, by index.  
  68.243 - */
  68.244 -#define RING_GET_REQUEST(_p, _r, _idx)                                  \
  68.245 +/* Direct access to individual ring elements, by index. */
  68.246 +#define RING_GET_REQUEST(_r, _idx)                                      \
  68.247   (&((_r)->sring->ring[                                                  \
  68.248 -     __SHARED_RING_MASK((_p), (_r)->sring, (_idx))                      \
  68.249 +     ((_idx) & (RING_SIZE(_r) - 1))                                     \
  68.250       ].req))
  68.251  
  68.252 -#define RING_GET_RESPONSE(_p, _r, _idx)                                 \
  68.253 +#define RING_GET_RESPONSE(_r, _idx)                                     \
  68.254   (&((_r)->sring->ring[                                                  \
  68.255 -     __SHARED_RING_MASK((_p), (_r)->sring, (_idx))                      \
  68.256 +     ((_idx) & (RING_SIZE(_r) - 1))                                     \
  68.257       ].rsp))   
  68.258      
  68.259 -/* Loop termination condition: Would the specified index overflow the 
  68.260 - * ring? 
  68.261 - */
  68.262 -#define RING_REQUEST_CONS_OVERFLOW(_p, _r, _cons)                      \
  68.263 -    (((_cons) - (_r)->rsp_prod_pvt) >= SRING_SIZE((_p), (_r)->sring))
  68.264 +/* Loop termination condition: Would the specified index overflow the ring? */
  68.265 +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
  68.266 +    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
  68.267  
  68.268  #endif /* __XEN_PUBLIC_IO_RING_H__ */
    69.1 --- a/xen/include/public/io/usbif.h	Tue Mar 08 22:57:09 2005 +0000
    69.2 +++ b/xen/include/public/io/usbif.h	Wed Mar 09 20:19:23 2005 +0000
    69.3 @@ -54,8 +54,7 @@ typedef struct {
    69.4  #define USBIF_RSP_ERROR  -1 /* non-specific 'error' */
    69.5  #define USBIF_RSP_OKAY    0 /* non-specific 'okay'  */
    69.6  
    69.7 -#define USBIF_RING RING_PARAMS(usbif_request_t, usbif_response_t, PAGE_SIZE)
    69.8 -DEFINE_RING_TYPES(usbif, USBIF_RING);
    69.9 +DEFINE_RING_TYPES(usbif, usbif_request_t, usbif_response_t, PAGE_SIZE);
   69.10  
   69.11  typedef struct {
   69.12      unsigned long length; /* IN = expected, OUT = actual */
    70.1 --- a/xen/include/xen/grant_table.h	Tue Mar 08 22:57:09 2005 +0000
    70.2 +++ b/xen/include/xen/grant_table.h	Wed Mar 09 20:19:23 2005 +0000
    70.3 @@ -105,4 +105,9 @@ void
    70.4  gnttab_notify_transfer(
    70.5      struct domain *rd, grant_ref_t ref, unsigned long frame);
    70.6  
    70.7 +/* Pre-domain destruction release of all mappings of other domains */
    70.8 +void
    70.9 +gnttab_release_all_mappings(grant_table_t *gt);
   70.10 +
   70.11 +
   70.12  #endif /* __XEN_GRANT_H__ */
    71.1 --- a/xen/include/xen/sched.h	Tue Mar 08 22:57:09 2005 +0000
    71.2 +++ b/xen/include/xen/sched.h	Wed Mar 09 20:19:23 2005 +0000
    71.3 @@ -251,8 +251,9 @@ void domain_sleep(struct exec_domain *d)
    71.4  
    71.5  void __enter_scheduler(void);
    71.6  
    71.7 -extern void switch_to(struct exec_domain *prev, 
    71.8 -                      struct exec_domain *next);
    71.9 +extern void context_switch(
   71.10 +    struct exec_domain *prev, 
   71.11 +    struct exec_domain *next);
   71.12  
   71.13  void domain_init(void);
   71.14  
    72.1 --- a/xen/include/xen/time.h	Tue Mar 08 22:57:09 2005 +0000
    72.2 +++ b/xen/include/xen/time.h	Wed Mar 09 20:19:23 2005 +0000
    72.3 @@ -54,7 +54,7 @@ s_time_t get_s_time(void);
    72.4  #define MILLISECS(_ms)  (((s_time_t)(_ms)) * 1000000ULL )
    72.5  #define MICROSECS(_us)  (((s_time_t)(_us)) * 1000ULL )
    72.6  
    72.7 -extern void update_dom_time(struct domain *d);
    72.8 +extern int update_dom_time(struct exec_domain *ed);
    72.9  extern void do_settime(unsigned long secs, unsigned long usecs, 
   72.10                         u64 system_time_base);
   72.11