direct-io.hg

changeset 3430:10e3000a78b4

bitkeeper revision 1.1159.1.531 (41e647dade9qic3amH6tFdmo5cAYRw)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Thu Jan 13 10:05:14 2005 +0000 (2005-01-13)
parents b25937a57863 cd78cc21face
children 657d51f27c5b
files .hgtags BitKeeper/etc/ignore buildconfigs/mk.linux-2.6-xen0 buildconfigs/mk.linux-2.6-xenU linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c tools/Makefile tools/ioemu/Makefile tools/ioemu/gui/Makefile tools/ioemu/iodev/Makefile tools/ioemu/memory/Makefile tools/ioemu/memory/misc_mem.cc tools/libxc/xc.h tools/libxc/xc_linux_save.c tools/misc/netfix tools/misc/xend tools/misc/xensv tools/misc/xm tools/python/Makefile xen/arch/x86/memory.c xen/arch/x86/traps.c xen/arch/x86/x86_32/seg_fixup.c xen/common/string.c xen/include/asm-x86/mm.h xen/include/asm-x86/processor.h xen/include/asm-x86/x86_32/string.h
line diff
     1.1 --- a/.hgtags	Tue Jan 11 09:28:54 2005 +0000
     1.2 +++ b/.hgtags	Thu Jan 13 10:05:14 2005 +0000
     1.3 @@ -1,6 +1,7 @@
     1.4  42882b3e0dda89f3a8ec00da568f86e9b3c230f1 RELEASE-2.0.0
     1.5  475a162b66e2c19b1e9468b234a4ba705334905e RELEASE-2.0.1
     1.6  dc2f08429f17e6614fd2f1ab88cc09ca0a850f32 RELEASE-2.0.2
     1.7 +6e1bbc13911751efa0b1c018425c1b085820fa02 RELEASE-2.0.3
     1.8  487b2ee37d1cecb5f3e7a546b05ad097a0226f2f beta1
     1.9  3d330e41f41ce1bc118c02346e18949ad5d67f6b latest-semistable
    1.10  30c521db4c71960b0cf1d9c9e1b658e77b535a3e latest-stable
     2.1 --- a/BitKeeper/etc/ignore	Tue Jan 11 09:28:54 2005 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Thu Jan 13 10:05:14 2005 +0000
     2.3 @@ -20,29 +20,9 @@ docs/*.log
     2.4  docs/*.pdf
     2.5  docs/*.ps
     2.6  docs/*.toc
     2.7 -docs/figs/xenserver.eps
     2.8  docs/html/*
     2.9 -docs/interface/WARNINGS
    2.10 -docs/interface/images.pl
    2.11 -docs/interface/images.tex
    2.12 -docs/interface/img1.png
    2.13 -docs/interface/index.html
    2.14 -docs/interface/interface.css
    2.15 -docs/interface/interface.html
    2.16 -docs/interface/labels.pl
    2.17  docs/pdf/*
    2.18  docs/ps/*
    2.19 -docs/user/WARNINGS
    2.20 -docs/user/images.pl
    2.21 -docs/user/images.tex
    2.22 -docs/user/img1.png
    2.23 -docs/user/img2.png
    2.24 -docs/user/img3.png
    2.25 -docs/user/index.html
    2.26 -docs/user/internals.pl
    2.27 -docs/user/labels.pl
    2.28 -docs/user/user.css
    2.29 -docs/user/user.html
    2.30  extras/mini-os/h/hypervisor-ifs
    2.31  install/*
    2.32  linux-*-xen0/*
    2.33 @@ -61,6 +41,7 @@ pristine-*
    2.34  tools/*/build/lib*/*.py
    2.35  tools/balloon/balloon
    2.36  tools/check/.*
    2.37 +tools/ioemu/iodev/device-model
    2.38  tools/libxc/xen/*
    2.39  tools/misc/miniterm/miniterm
    2.40  tools/misc/xen_cpuperf
     3.1 --- a/buildconfigs/mk.linux-2.6-xen0	Tue Jan 11 09:28:54 2005 +0000
     3.2 +++ b/buildconfigs/mk.linux-2.6-xen0	Thu Jan 13 10:05:14 2005 +0000
     3.3 @@ -11,12 +11,14 @@ include buildconfigs/Rules.mk
     3.4  
     3.5  .PHONY: build clean delete
     3.6  
     3.7 +# The real action starts here!
     3.8  build: $(LINUX_DIR)/include/linux/autoconf.h
     3.9 -	$(MAKE) -C $(LINUX_DIR) ARCH=xen modules
    3.10 -	$(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_MOD_PATH=$(INSTALL_DIR) modules_install
    3.11 +	if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \
    3.12 +	    $(MAKE) -C $(LINUX_DIR) ARCH=xen modules ; \
    3.13 +	    $(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_MOD_PATH=$(INSTALL_DIR) modules_install ; \
    3.14 +	fi
    3.15  	$(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_PATH=$(INSTALL_DIR) install
    3.16  
    3.17 -# The real action starts here!
    3.18  $(LINUX_DIR)/include/linux/autoconf.h: pristine-linux-$(LINUX_VER)
    3.19  	rm -rf $(LINUX_DIR)
    3.20  	cp -al $< $(LINUX_DIR)
     4.1 --- a/buildconfigs/mk.linux-2.6-xenU	Tue Jan 11 09:28:54 2005 +0000
     4.2 +++ b/buildconfigs/mk.linux-2.6-xenU	Thu Jan 13 10:05:14 2005 +0000
     4.3 @@ -13,11 +13,12 @@ include buildconfigs/Rules.mk
     4.4  
     4.5  # The real action starts here!
     4.6  build: $(LINUX_DIR)/include/linux/autoconf.h
     4.7 -	$(MAKE) -C $(LINUX_DIR) ARCH=xen modules
     4.8 -	$(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_MOD_PATH=$(INSTALL_DIR) modules_install
     4.9 +	if grep "^CONFIG_MODULES=" $(LINUX_DIR)/.config ; then \
    4.10 +	    $(MAKE) -C $(LINUX_DIR) ARCH=xen modules ; \
    4.11 +	    $(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_MOD_PATH=$(INSTALL_DIR) modules_install ; \
    4.12 +	fi
    4.13  	$(MAKE) -C $(LINUX_DIR) ARCH=xen INSTALL_PATH=$(INSTALL_DIR) install
    4.14  
    4.15 -
    4.16  $(LINUX_DIR)/include/linux/autoconf.h: pristine-linux-$(LINUX_VER)
    4.17  	rm -rf $(LINUX_DIR)
    4.18  	cp -al $< $(LINUX_DIR)
     5.1 --- a/linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c	Tue Jan 11 09:28:54 2005 +0000
     5.2 +++ b/linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c	Thu Jan 13 10:05:14 2005 +0000
     5.3 @@ -33,10 +33,17 @@
     5.4  #include <linux/slab.h>
     5.5  #include <linux/kernel.h>
     5.6  #include <linux/delay.h>
     5.7 +#include <linux/version.h>
     5.8  
     5.9  #define DP(_f) printk(KERN_ALERT "  " _f "\n")
    5.10  
    5.11 -asmlinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
    5.12 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    5.13 +#define __LINKAGE fastcall
    5.14 +#else
    5.15 +#define __LINKAGE asmlinkage
    5.16 +#endif
    5.17 +
    5.18 +__LINKAGE void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
    5.19  {
    5.20      static unsigned long printed = 0;
    5.21      int i;
     6.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c	Tue Jan 11 09:28:54 2005 +0000
     6.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c	Thu Jan 13 10:05:14 2005 +0000
     6.3 @@ -62,21 +62,6 @@ static void vbd_update(void){};
     6.4  #define BLKIF_STATE_DISCONNECTED 1
     6.5  #define BLKIF_STATE_CONNECTED    2
     6.6  
     6.7 -#ifdef VERBOSE
     6.8 -static char *blkif_state_name[] = {
     6.9 -    [BLKIF_STATE_CLOSED]       = "closed",
    6.10 -    [BLKIF_STATE_DISCONNECTED] = "disconnected",
    6.11 -    [BLKIF_STATE_CONNECTED]    = "connected",
    6.12 -};
    6.13 -
    6.14 -static char * blkif_status_name[] = {
    6.15 -    [BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
    6.16 -    [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
    6.17 -    [BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
    6.18 -    [BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
    6.19 -};
    6.20 -#endif
    6.21 -
    6.22  #define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args)
    6.23  
    6.24  static int blkif_handle = 0;
    6.25 @@ -331,11 +316,11 @@ static int blkif_queue_request(struct re
    6.26      blkif_request_t *ring_req;
    6.27      struct bio *bio;
    6.28      struct bio_vec *bvec;
    6.29 -    int idx, s;
    6.30 +    int idx;
    6.31      unsigned long id;
    6.32      unsigned int fsect, lsect;
    6.33  
    6.34 -    if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
    6.35 +    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
    6.36          return 1;
    6.37  
    6.38      /* Fill out a communications ring structure. */
    6.39 @@ -349,29 +334,25 @@ static int blkif_queue_request(struct re
    6.40      ring_req->sector_number = (blkif_sector_t)req->sector;
    6.41      ring_req->device = di->xd_device;
    6.42  
    6.43 -    s = 0;
    6.44      ring_req->nr_segments = 0;
    6.45 -    rq_for_each_bio(bio, req) {
    6.46 -        bio_for_each_segment(bvec, bio, idx) {
    6.47 +    rq_for_each_bio(bio, req)
    6.48 +    {
    6.49 +        bio_for_each_segment(bvec, bio, idx)
    6.50 +        {
    6.51 +            if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST )
    6.52 +                BUG();
    6.53              buffer_ma = page_to_phys(bvec->bv_page);
    6.54 -            if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
    6.55 -                BUG();
    6.56 -
    6.57              fsect = bvec->bv_offset >> 9;
    6.58              lsect = fsect + (bvec->bv_len >> 9) - 1;
    6.59 -            if (unlikely(lsect > 7))
    6.60 -                BUG();
    6.61 -
    6.62              ring_req->frame_and_sects[ring_req->nr_segments++] =
    6.63                  buffer_ma | (fsect << 3) | lsect;
    6.64 -            s += bvec->bv_len >> 9;
    6.65          }
    6.66      }
    6.67  
    6.68      blk_ring.req_prod_pvt++;
    6.69      
    6.70      /* Keep a private copy so we can reissue requests when recovering. */
    6.71 -    translate_req_to_pfn( &rec_ring[id], ring_req);
    6.72 +    translate_req_to_pfn(&rec_ring[id], ring_req);
    6.73  
    6.74      return 0;
    6.75  }
    6.76 @@ -439,14 +420,14 @@ static irqreturn_t blkif_int(int irq, vo
    6.77  
    6.78      for ( i = blk_ring.rsp_cons; i != rp; i++ )
    6.79      {
    6.80 -	unsigned long id;
    6.81 +        unsigned long id;
    6.82  
    6.83          bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
    6.84 -	id = bret->id;
    6.85 -	req = (struct request *)rec_ring[id].id;
    6.86 -	blkif_completion( &rec_ring[id] );
    6.87 +        id = bret->id;
    6.88 +        req = (struct request *)rec_ring[id].id;
    6.89 +        blkif_completion( &rec_ring[id] );
    6.90  
    6.91 -	ADD_ID_TO_FREELIST(id); /* overwrites req */
    6.92 +        ADD_ID_TO_FREELIST(id); /* overwrites req */
    6.93  
    6.94          switch ( bret->operation )
    6.95          {
    6.96 @@ -455,7 +436,7 @@ static irqreturn_t blkif_int(int irq, vo
    6.97              if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
    6.98                  DPRINTK("Bad return from blkdev data request: %x\n",
    6.99                          bret->status);
   6.100 -	    
   6.101 +     
   6.102              if ( unlikely(end_that_request_first
   6.103                            (req, 
   6.104                             (bret->status == BLKIF_RSP_OKAY),
   6.105 @@ -528,7 +509,7 @@ static void kick_pending_request_queues(
   6.106      /* We kick pending request queues if the ring is reasonably empty. */
   6.107      if ( (nr_pending != 0) && 
   6.108           (RING_PENDING_REQUESTS(BLKIF_RING, &blk_ring) < 
   6.109 -            (RING_SIZE(BLKIF_RING, &blk_ring) >> 1)) )
   6.110 +          (RING_SIZE(BLKIF_RING, &blk_ring) >> 1)) )
   6.111      {
   6.112          /* Attempt to drain the queue, but bail if the ring becomes full. */
   6.113          while ( (nr_pending != 0) && !RING_FULL(BLKIF_RING, &blk_ring) )
   6.114 @@ -651,7 +632,7 @@ int blkif_ioctl(struct inode *inode, str
   6.115          if (!argument) return -EINVAL;
   6.116  
   6.117          /* We don't have real geometry info, but let's at least return
   6.118 -	   values consistent with the size of the device */
   6.119 +           values consistent with the size of the device */
   6.120  
   6.121          heads = 0xff;
   6.122          sectors = 0x3f; 
   6.123 @@ -669,7 +650,7 @@ int blkif_ioctl(struct inode *inode, str
   6.124          if (!argument) return -EINVAL;
   6.125  
   6.126          /* We don't have real geometry info, but let's at least return
   6.127 -	   values consistent with the size of the device */
   6.128 +           values consistent with the size of the device */
   6.129  
   6.130          heads = 0xff;
   6.131          sectors = 0x3f; 
   6.132 @@ -826,13 +807,13 @@ static int blkif_queue_request(unsigned 
   6.133               (sg_next_sect == sector_number) )
   6.134          {
   6.135              req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, 
   6.136 -                    blk_ring.req_prod_pvt - 1);
   6.137 +                                   blk_ring.req_prod_pvt - 1);
   6.138              bh = (struct buffer_head *)id;
   6.139 -	    
   6.140 +     
   6.141              bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
   6.142 -	    
   6.143 +     
   6.144  
   6.145 -	    rec_ring[req->id].id = id;
   6.146 +            rec_ring[req->id].id = id;
   6.147  
   6.148              req->frame_and_sects[req->nr_segments] = 
   6.149                  buffer_ma | (fsect<<3) | lsect;
   6.150 @@ -985,16 +966,16 @@ static void blkif_int(int irq, void *dev
   6.151  
   6.152      for ( i = blk_ring.rsp_cons; i != rp; i++ )
   6.153      {
   6.154 -	unsigned long id;
   6.155 +        unsigned long id;
   6.156          blkif_response_t *bret;
   6.157          
   6.158          bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
   6.159 -	id = bret->id;
   6.160 -	bh = (struct buffer_head *)rec_ring[id].id; 
   6.161 +        id = bret->id;
   6.162 +        bh = (struct buffer_head *)rec_ring[id].id; 
   6.163  
   6.164 -	blkif_completion( &rec_ring[id] );
   6.165 +        blkif_completion( &rec_ring[id] );
   6.166  
   6.167 -	ADD_ID_TO_FREELIST(id);
   6.168 +        ADD_ID_TO_FREELIST(id);
   6.169  
   6.170          switch ( bret->operation )
   6.171          {
   6.172 @@ -1019,7 +1000,7 @@ static void blkif_int(int irq, void *dev
   6.173              BUG();
   6.174          }
   6.175  
   6.176 -	}
   6.177 +    }
   6.178      blk_ring.rsp_cons = i;
   6.179      
   6.180      kick_pending_request_queues();
   6.181 @@ -1168,7 +1149,7 @@ static void blkif_recover(void)
   6.182          if ( rec_ring[i].id >= PAGE_OFFSET )
   6.183          {
   6.184              req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, 
   6.185 -                    blk_ring.req_prod_pvt);
   6.186 +                                   blk_ring.req_prod_pvt);
   6.187              translate_req_to_mfn(req, &rec_ring[i]);
   6.188              blk_ring.req_prod_pvt++;
   6.189          }
   6.190 @@ -1178,7 +1159,7 @@ static void blkif_recover(void)
   6.191      for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) 
   6.192      {
   6.193          req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, i);
   6.194 -        rec_ring[i].id = req->id;		
   6.195 +        rec_ring[i].id = req->id;  
   6.196          req->id = i;
   6.197          translate_req_to_pfn(&rec_ring[i], req);
   6.198      }
   6.199 @@ -1241,9 +1222,8 @@ static void blkif_connect(blkif_fe_inter
   6.200  
   6.201  static void unexpected(blkif_fe_interface_status_t *status)
   6.202  {
   6.203 -    DPRINTK(" Unexpected blkif status %s in state %s\n", 
   6.204 -            blkif_status_name[status->status],
   6.205 -            blkif_state_name[blkif_state]);
   6.206 +    DPRINTK(" Unexpected blkif status %u in state %u\n", 
   6.207 +            status->status, blkif_state);
   6.208  }
   6.209  
   6.210  static void blkif_status(blkif_fe_interface_status_t *status)
   6.211 @@ -1303,7 +1283,7 @@ static void blkif_status(blkif_fe_interf
   6.212          }
   6.213          break;
   6.214  
   6.215 -   case BLKIF_INTERFACE_STATUS_CHANGED:
   6.216 +    case BLKIF_INTERFACE_STATUS_CHANGED:
   6.217          switch ( blkif_state )
   6.218          {
   6.219          case BLKIF_STATE_CLOSED:
   6.220 @@ -1314,7 +1294,7 @@ static void blkif_status(blkif_fe_interf
   6.221              vbd_update();
   6.222              break;
   6.223          }
   6.224 -       break;
   6.225 +        break;
   6.226  
   6.227      default:
   6.228          WPRINTK(" Invalid blkif status: %d\n", status->status);
   6.229 @@ -1382,7 +1362,7 @@ int __init xlblk_init(void)
   6.230  
   6.231      rec_ring_free = 0;
   6.232      for ( i = 0; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
   6.233 -	rec_ring[i].id = i+1;
   6.234 +        rec_ring[i].id = i+1;
   6.235      rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)-1].id = 0x0fffffff;
   6.236  
   6.237      (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
   6.238 @@ -1411,13 +1391,13 @@ void blkif_completion(blkif_request_t *r
   6.239      switch ( req->operation )
   6.240      {
   6.241      case BLKIF_OP_READ:
   6.242 -	for ( i = 0; i < req->nr_segments; i++ )
   6.243 -	{
   6.244 -	    unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
   6.245 -	    unsigned long mfn = phys_to_machine_mapping[pfn];
   6.246 -	    xen_machphys_update(mfn, pfn);
   6.247 -	}
   6.248 -	break;
   6.249 +        for ( i = 0; i < req->nr_segments; i++ )
   6.250 +        {
   6.251 +            unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
   6.252 +            unsigned long mfn = phys_to_machine_mapping[pfn];
   6.253 +            xen_machphys_update(mfn, pfn);
   6.254 +        }
   6.255 +        break;
   6.256      }
   6.257      
   6.258  }
     7.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h	Tue Jan 11 09:28:54 2005 +0000
     7.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h	Thu Jan 13 10:05:14 2005 +0000
     7.3 @@ -64,12 +64,12 @@
     7.4  #endif
     7.5  
     7.6  struct xlbd_type_info {
     7.7 -	int partn_shift;
     7.8 -	int partn_per_major;
     7.9 -	int devs_per_major;
    7.10 -	int hardsect_size;
    7.11 -	int max_sectors;
    7.12 -	char *name;
    7.13 +    int partn_shift;
    7.14 +    int partn_per_major;
    7.15 +    int devs_per_major;
    7.16 +    int hardsect_size;
    7.17 +    int max_sectors;
    7.18 +    char *name;
    7.19  };
    7.20  
    7.21  /*
    7.22 @@ -78,19 +78,19 @@ struct xlbd_type_info {
    7.23   * putting all kinds of interesting stuff here :-)
    7.24   */
    7.25  struct xlbd_major_info {
    7.26 -	int major;
    7.27 -	int index;
    7.28 -	int usage;
    7.29 -	struct xlbd_type_info *type;
    7.30 +    int major;
    7.31 +    int index;
    7.32 +    int usage;
    7.33 +    struct xlbd_type_info *type;
    7.34  };
    7.35  
    7.36  struct xlbd_disk_info {
    7.37 -	int xd_device;
    7.38 -	struct xlbd_major_info *mi;
    7.39 +    int xd_device;
    7.40 +    struct xlbd_major_info *mi;
    7.41  };
    7.42  
    7.43  typedef struct xen_block {
    7.44 -	int usage;
    7.45 +    int usage;
    7.46  } xen_block_t;
    7.47  
    7.48  extern struct request_queue *xlbd_blk_queue;
    7.49 @@ -99,7 +99,7 @@ extern spinlock_t blkif_io_lock;
    7.50  extern int blkif_open(struct inode *inode, struct file *filep);
    7.51  extern int blkif_release(struct inode *inode, struct file *filep);
    7.52  extern int blkif_ioctl(struct inode *inode, struct file *filep,
    7.53 -                           unsigned command, unsigned long argument);
    7.54 +                       unsigned command, unsigned long argument);
    7.55  extern int blkif_check(dev_t dev);
    7.56  extern int blkif_revalidate(dev_t dev);
    7.57  extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
     8.1 --- a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c	Tue Jan 11 09:28:54 2005 +0000
     8.2 +++ b/linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c	Thu Jan 13 10:05:14 2005 +0000
     8.3 @@ -43,39 +43,37 @@
     8.4  #define NUM_VBD_MAJORS 1
     8.5  
     8.6  static struct xlbd_type_info xlbd_ide_type = {
     8.7 -	.partn_shift = 6,
     8.8 -	.partn_per_major = 2,
     8.9 -	// XXXcl todo blksize_size[major]  = 1024;
    8.10 -	.hardsect_size = 512,
    8.11 -	.max_sectors = 128,  /* 'hwif->rqsize' if we knew it */
    8.12 -	// XXXcl todo read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
    8.13 -	.name = "hd",
    8.14 +    .partn_shift = 6,
    8.15 +    .partn_per_major = 2,
    8.16 +    // XXXcl todo blksize_size[major]  = 1024;
    8.17 +    .hardsect_size = 512,
    8.18 +    .max_sectors = 128,  /* 'hwif->rqsize' if we knew it */
    8.19 +    // XXXcl todo read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
    8.20 +    .name = "hd",
    8.21  };
    8.22  
    8.23  static struct xlbd_type_info xlbd_scsi_type = {
    8.24 -	.partn_shift = 4,
    8.25 -	.partn_per_major = 16,
    8.26 -	// XXXcl todo blksize_size[major]  = 1024; /* XXX 512; */
    8.27 -	.hardsect_size = 512,
    8.28 -	.max_sectors = 128*8, /* XXX 128; */
    8.29 -	// XXXcl todo read_ahead[major]    = 0; /* XXX 8; -- guessing */
    8.30 -	.name = "sd",
    8.31 +    .partn_shift = 4,
    8.32 +    .partn_per_major = 16,
    8.33 +    // XXXcl todo blksize_size[major]  = 1024; /* XXX 512; */
    8.34 +    .hardsect_size = 512,
    8.35 +    .max_sectors = 128*8, /* XXX 128; */
    8.36 +    // XXXcl todo read_ahead[major]    = 0; /* XXX 8; -- guessing */
    8.37 +    .name = "sd",
    8.38  };
    8.39  
    8.40  static struct xlbd_type_info xlbd_vbd_type = {
    8.41 -	.partn_shift = 4,
    8.42 -	.partn_per_major = 16,
    8.43 -	// XXXcl todo blksize_size[major]  = 512;
    8.44 -	.hardsect_size = 512,
    8.45 -	.max_sectors = 128,
    8.46 -	// XXXcl todo read_ahead[major]    = 8;
    8.47 -	.name = "xvd",
    8.48 +    .partn_shift = 4,
    8.49 +    .partn_per_major = 16,
    8.50 +    // XXXcl todo blksize_size[major]  = 512;
    8.51 +    .hardsect_size = 512,
    8.52 +    .max_sectors = 128,
    8.53 +    // XXXcl todo read_ahead[major]    = 8;
    8.54 +    .name = "xvd",
    8.55  };
    8.56  
    8.57 -/* XXXcl handle cciss after finding out why it's "hacked" in */
    8.58 -
    8.59  static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
    8.60 -					 NUM_VBD_MAJORS];
    8.61 +                                         NUM_VBD_MAJORS];
    8.62  
    8.63  /* Information about our VBDs. */
    8.64  #define MAX_VBDS 64
    8.65 @@ -84,15 +82,15 @@ static vdisk_t *vbd_info;
    8.66  
    8.67  struct request_queue *xlbd_blk_queue = NULL;
    8.68  
    8.69 -#define MAJOR_XEN(dev)	((dev)>>8)
    8.70 -#define MINOR_XEN(dev)	((dev) & 0xff)
    8.71 +#define MAJOR_XEN(dev) ((dev)>>8)
    8.72 +#define MINOR_XEN(dev) ((dev) & 0xff)
    8.73  
    8.74  static struct block_device_operations xlvbd_block_fops = 
    8.75  {
    8.76 -	.owner		= THIS_MODULE,
    8.77 -	.open		= blkif_open,
    8.78 -	.release	= blkif_release,
    8.79 -	.ioctl		= blkif_ioctl,
    8.80 +    .owner  = THIS_MODULE,
    8.81 +    .open  = blkif_open,
    8.82 +    .release = blkif_release,
    8.83 +    .ioctl  = blkif_ioctl,
    8.84  #if 0
    8.85      check_media_change: blkif_check,
    8.86      revalidate:         blkif_revalidate,
    8.87 @@ -122,7 +120,7 @@ static int xlvbd_get_vbd_info(vdisk_t *d
    8.88      }
    8.89  
    8.90      if ( (nr = rsp.status) > MAX_VBDS )
    8.91 -         nr = MAX_VBDS;
    8.92 +        nr = MAX_VBDS;
    8.93      memcpy(disk_info, buf, nr * sizeof(vdisk_t));
    8.94  
    8.95      free_page((unsigned long)buf);
    8.96 @@ -132,153 +130,154 @@ static int xlvbd_get_vbd_info(vdisk_t *d
    8.97  
    8.98  static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
    8.99  {
   8.100 -	int mi_idx, new_major;
   8.101 -	int xd_major = MAJOR_XEN(xd_device); 
   8.102 -	int xd_minor = MINOR_XEN(xd_device);
   8.103 -
   8.104 -	*minor = xd_minor;
   8.105 -
   8.106 -	switch (xd_major) {
   8.107 -	case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
   8.108 -	case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
   8.109 -	case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
   8.110 -	case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
   8.111 -	case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
   8.112 -	case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
   8.113 -	case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
   8.114 -	case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
   8.115 -	case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
   8.116 -	case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
   8.117 -	case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
   8.118 -	case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
   8.119 -		mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
   8.120 -		new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
   8.121 -		break;
   8.122 -	case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
   8.123 -	default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
   8.124 -	}
   8.125 -
   8.126 -	if (major_info[mi_idx])
   8.127 -		return major_info[mi_idx];
   8.128 +    int mi_idx, new_major;
   8.129 +    int xd_major = MAJOR_XEN(xd_device); 
   8.130 +    int xd_minor = MINOR_XEN(xd_device);
   8.131  
   8.132 -	major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
   8.133 -	if (major_info[mi_idx] == NULL)
   8.134 -		return NULL;
   8.135 -
   8.136 -	memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
   8.137 +    *minor = xd_minor;
   8.138  
   8.139 -	switch (mi_idx) {
   8.140 -	case 0 ... (NUM_IDE_MAJORS - 1):
   8.141 -		major_info[mi_idx]->type = &xlbd_ide_type;
   8.142 -		major_info[mi_idx]->index = mi_idx;
   8.143 -		break;
   8.144 -	case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
   8.145 -		major_info[mi_idx]->type = &xlbd_scsi_type;
   8.146 -		major_info[mi_idx]->index = mi_idx - NUM_IDE_MAJORS;
   8.147 -		break;
   8.148 -	case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
   8.149 -		(NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
   8.150 -		major_info[mi_idx]->type = &xlbd_vbd_type;
   8.151 -		major_info[mi_idx]->index = mi_idx -
   8.152 -			(NUM_IDE_MAJORS + NUM_SCSI_MAJORS);
   8.153 -		break;
   8.154 -	}
   8.155 -	major_info[mi_idx]->major = new_major;
   8.156 +    switch (xd_major) {
   8.157 +    case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
   8.158 +    case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
   8.159 +    case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
   8.160 +    case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
   8.161 +    case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
   8.162 +    case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
   8.163 +    case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
   8.164 +    case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
   8.165 +    case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
   8.166 +    case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
   8.167 +    case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
   8.168 +    case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
   8.169 +        mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
   8.170 +        new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
   8.171 +        break;
   8.172 +    case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
   8.173 +    default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
   8.174 +    }
   8.175  
   8.176 -	if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
   8.177 -		printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
   8.178 -		    major_info[mi_idx]->major, major_info[mi_idx]->type->name);
   8.179 -		goto out;
   8.180 -	}
   8.181 +    if (major_info[mi_idx])
   8.182 +        return major_info[mi_idx];
   8.183  
   8.184 -	devfs_mk_dir(major_info[mi_idx]->type->name);
   8.185 +    major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
   8.186 +    if (major_info[mi_idx] == NULL)
   8.187 +        return NULL;
   8.188  
   8.189 -	return major_info[mi_idx];
   8.190 +    memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
   8.191 +
   8.192 +    switch (mi_idx) {
   8.193 +    case 0 ... (NUM_IDE_MAJORS - 1):
   8.194 +        major_info[mi_idx]->type = &xlbd_ide_type;
   8.195 +        major_info[mi_idx]->index = mi_idx;
   8.196 +        break;
   8.197 +    case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
   8.198 +        major_info[mi_idx]->type = &xlbd_scsi_type;
   8.199 +        major_info[mi_idx]->index = mi_idx - NUM_IDE_MAJORS;
   8.200 +        break;
   8.201 +        case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
   8.202 +            (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
   8.203 +                major_info[mi_idx]->type = &xlbd_vbd_type;
   8.204 +        major_info[mi_idx]->index = mi_idx -
   8.205 +            (NUM_IDE_MAJORS + NUM_SCSI_MAJORS);
   8.206 +        break;
   8.207 +    }
   8.208 +    major_info[mi_idx]->major = new_major;
   8.209 +
   8.210 +    if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
   8.211 +        printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
   8.212 +               major_info[mi_idx]->major, major_info[mi_idx]->type->name);
   8.213 +        goto out;
   8.214 +    }
   8.215 +
   8.216 +    devfs_mk_dir(major_info[mi_idx]->type->name);
   8.217 +
   8.218 +    return major_info[mi_idx];
   8.219  
   8.220   out:
   8.221 -	kfree(major_info[mi_idx]);
   8.222 -	major_info[mi_idx] = NULL;
   8.223 -	return NULL;
   8.224 +    kfree(major_info[mi_idx]);
   8.225 +    major_info[mi_idx] = NULL;
   8.226 +    return NULL;
   8.227  }
   8.228  
   8.229  static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
   8.230 -					 int xd_minor, vdisk_t *xd)
   8.231 +                                         int xd_minor, vdisk_t *xd)
   8.232  {
   8.233 -	struct gendisk *gd;
   8.234 -	struct xlbd_disk_info *di;
   8.235 -	int device, partno;
   8.236 -
   8.237 -	device = MKDEV(mi->major, xd_minor);
   8.238 -	gd = get_gendisk(device, &partno);
   8.239 -	if (gd)
   8.240 -		return gd;
   8.241 -
   8.242 -	di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
   8.243 -	if (di == NULL)
   8.244 -		return NULL;
   8.245 -	di->mi = mi;
   8.246 -	di->xd_device = xd->device;
   8.247 -
   8.248 -	/* Construct an appropriate gendisk structure. */
   8.249 -	gd = alloc_disk(1);
   8.250 -	if (gd == NULL)
   8.251 -		goto out;
   8.252 -
   8.253 -	gd->major = mi->major;
   8.254 -	gd->first_minor = xd_minor;
   8.255 -	gd->fops = &xlvbd_block_fops;
   8.256 -	gd->private_data = di;
   8.257 -	sprintf(gd->disk_name, "%s%c%d", mi->type->name,
   8.258 -	    'a' + mi->index * mi->type->partn_per_major +
   8.259 -		(xd_minor >> mi->type->partn_shift),
   8.260 -	    xd_minor & ((1 << mi->type->partn_shift) - 1));
   8.261 -	/*  sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
   8.262 -
   8.263 -	set_capacity(gd, xd->capacity);
   8.264 +    struct gendisk *gd;
   8.265 +    struct xlbd_disk_info *di;
   8.266 +    int device, partno;
   8.267  
   8.268 -	if (xlbd_blk_queue == NULL) {
   8.269 -		xlbd_blk_queue = blk_init_queue(do_blkif_request,
   8.270 -						&blkif_io_lock);
   8.271 -		if (xlbd_blk_queue == NULL)
   8.272 -			goto out;
   8.273 -		elevator_init(xlbd_blk_queue, "noop");
   8.274 -
   8.275 -		/*
   8.276 -		 * Turn off barking 'headactive' mode. We dequeue
   8.277 -		 * buffer heads as soon as we pass them to back-end
   8.278 -		 * driver.
   8.279 -		 */
   8.280 -		blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
   8.281 -
   8.282 -		blk_queue_hardsect_size(xlbd_blk_queue,
   8.283 -					mi->type->hardsect_size);
   8.284 -		blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
   8.285 +    device = MKDEV(mi->major, xd_minor);
   8.286 +    gd = get_gendisk(device, &partno);
   8.287 +    if ( gd != NULL )
   8.288 +        return gd;
   8.289  
   8.290 -		/* XXXcl: set mask to PAGE_SIZE for now, to improve either use 
   8.291 -		   - blk_queue_merge_bvec to merge requests with adjacent ma's
   8.292 -		   - the tags infrastructure
   8.293 -		   - the dma infrastructure
   8.294 -		*/
   8.295 -		blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
   8.296 +    di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
   8.297 +    if ( di == NULL )
   8.298 +        return NULL;
   8.299 +    di->mi = mi;
   8.300 +    di->xd_device = xd->device;
   8.301  
   8.302 -		blk_queue_max_phys_segments(xlbd_blk_queue,
   8.303 -                    BLKIF_MAX_SEGMENTS_PER_REQUEST);
   8.304 -		blk_queue_max_hw_segments(xlbd_blk_queue,
   8.305 -                    BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
   8.306 -
   8.307 +    /* Construct an appropriate gendisk structure. */
   8.308 +    gd = alloc_disk(1);
   8.309 +    if ( gd == NULL )
   8.310 +        goto out;
   8.311  
   8.312 -	}
   8.313 -	gd->queue = xlbd_blk_queue;
   8.314 +    gd->major = mi->major;
   8.315 +    gd->first_minor = xd_minor;
   8.316 +    gd->fops = &xlvbd_block_fops;
   8.317 +    gd->private_data = di;
   8.318 +    sprintf(gd->disk_name, "%s%c%d", mi->type->name,
   8.319 +            'a' + mi->index * mi->type->partn_per_major +
   8.320 +            (xd_minor >> mi->type->partn_shift),
   8.321 +            xd_minor & ((1 << mi->type->partn_shift) - 1));
   8.322  
   8.323 -	add_disk(gd);
   8.324 +    set_capacity(gd, xd->capacity);
   8.325  
   8.326 -	return gd;
   8.327 +    if ( xlbd_blk_queue == NULL )
   8.328 +    {
   8.329 +        xlbd_blk_queue = blk_init_queue(do_blkif_request,
   8.330 +                                        &blkif_io_lock);
   8.331 +        if ( xlbd_blk_queue == NULL )
   8.332 +            goto out;
   8.333 +        elevator_init(xlbd_blk_queue, "noop");
   8.334 +
   8.335 +        /*
   8.336 +         * Turn off barking 'headactive' mode. We dequeue
   8.337 +         * buffer heads as soon as we pass them to back-end
   8.338 +         * driver.
   8.339 +         */
   8.340 +        blk_queue_headactive(xlbd_blk_queue, 0);
   8.341 +
   8.342 +        /* Hard sector size and max sectors impersonate the equiv. hardware. */
   8.343 +        blk_queue_hardsect_size(
   8.344 +            xlbd_blk_queue, mi->type->hardsect_size);
   8.345 +        blk_queue_max_sectors(
   8.346 +            xlbd_blk_queue, mi->type->max_sectors);
   8.347 +
   8.348 +        /* Each segment in a request is up to an aligned page in size. */
   8.349 +        blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
   8.350 +        blk_queue_max_segment_size(xlbd_blk_queue, PAGE_SIZE);
   8.351 +
   8.352 +        /* Ensure a merged request will fit in a single I/O ring slot. */
   8.353 +        blk_queue_max_phys_segments(
   8.354 +            xlbd_blk_queue, BLKIF_MAX_SEGMENTS_PER_REQUEST);
   8.355 +        blk_queue_max_hw_segments(
   8.356 +            xlbd_blk_queue, BLKIF_MAX_SEGMENTS_PER_REQUEST);
   8.357 +
   8.358 +        /* Make sure buffer addresses are sector-aligned. */
   8.359 +        blk_queue_dma_alignment(xlbd_blk_queue, 511);
   8.360 +    }
   8.361 +    gd->queue = xlbd_blk_queue;
   8.362 +
   8.363 +    add_disk(gd);
   8.364 +
   8.365 +    return gd;
   8.366  
   8.367   out:
   8.368 -	if (gd)
   8.369 -		del_gendisk(gd);
   8.370 -	kfree(di);
   8.371 -	return NULL;
   8.372 +    if ( gd != NULL )
   8.373 +        del_gendisk(gd);
   8.374 +    kfree(di);
   8.375 +    return NULL;
   8.376  }
   8.377  
   8.378  /*
   8.379 @@ -294,62 +293,62 @@ static struct gendisk *xlvbd_get_gendisk
   8.380   */
   8.381  static int xlvbd_init_device(vdisk_t *xd)
   8.382  {
   8.383 -	struct block_device *bd;
   8.384 -	struct gendisk *gd;
   8.385 -	struct xlbd_major_info *mi;
   8.386 -	int device;
   8.387 -	int minor;
   8.388 -
   8.389 -	int err = -ENOMEM;
   8.390 -
   8.391 -	mi = xlbd_get_major_info(xd->device, &minor);
   8.392 -	if (mi == NULL)
   8.393 -		return -EPERM;
   8.394 -
   8.395 -	device = MKDEV(mi->major, minor);
   8.396 -
   8.397 -	if ((bd = bdget(device)) == NULL)
   8.398 -		return -EPERM;
   8.399 -
   8.400 -	/*
   8.401 -	 * Update of partition info, and check of usage count, is protected
   8.402 -	 * by the per-block-device semaphore.
   8.403 -	 */
   8.404 -	down(&bd->bd_sem);
   8.405 +    struct block_device *bd;
   8.406 +    struct gendisk *gd;
   8.407 +    struct xlbd_major_info *mi;
   8.408 +    int device;
   8.409 +    int minor;
   8.410  
   8.411 -	gd = xlvbd_get_gendisk(mi, minor, xd);
   8.412 -	if (mi == NULL) {
   8.413 -		err = -EPERM;
   8.414 -		goto out;
   8.415 -	}
   8.416 -
   8.417 -	if (VDISK_READONLY(xd->info))
   8.418 -		set_disk_ro(gd, 1); 
   8.419 +    int err = -ENOMEM;
   8.420  
   8.421 -	/* Some final fix-ups depending on the device type */
   8.422 -	switch (VDISK_TYPE(xd->info)) { 
   8.423 -	case VDISK_TYPE_CDROM:
   8.424 -		gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 
   8.425 -		/* FALLTHROUGH */
   8.426 -	case VDISK_TYPE_FLOPPY: 
   8.427 -	case VDISK_TYPE_TAPE:
   8.428 -		gd->flags |= GENHD_FL_REMOVABLE; 
   8.429 -		break; 
   8.430 +    mi = xlbd_get_major_info(xd->device, &minor);
   8.431 +    if (mi == NULL)
   8.432 +        return -EPERM;
   8.433  
   8.434 -	case VDISK_TYPE_DISK:
   8.435 -		break; 
   8.436 +    device = MKDEV(mi->major, minor);
   8.437  
   8.438 -	default:
   8.439 -		printk(KERN_ALERT "XenLinux: unknown device type %d\n", 
   8.440 -		    VDISK_TYPE(xd->info)); 
   8.441 -		break; 
   8.442 -	}
   8.443 +    if ((bd = bdget(device)) == NULL)
   8.444 +        return -EPERM;
   8.445  
   8.446 -	err = 0;
   8.447 +    /*
   8.448 +     * Update of partition info, and check of usage count, is protected
   8.449 +     * by the per-block-device semaphore.
   8.450 +     */
   8.451 +    down(&bd->bd_sem);
   8.452 +
   8.453 +    gd = xlvbd_get_gendisk(mi, minor, xd);
   8.454 +    if (mi == NULL) {
   8.455 +        err = -EPERM;
   8.456 +        goto out;
   8.457 +    }
   8.458 +
   8.459 +    if (VDISK_READONLY(xd->info))
   8.460 +        set_disk_ro(gd, 1); 
   8.461 +
   8.462 +    /* Some final fix-ups depending on the device type */
   8.463 +    switch (VDISK_TYPE(xd->info)) { 
   8.464 +    case VDISK_TYPE_CDROM:
   8.465 +        gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD; 
   8.466 +        /* FALLTHROUGH */
   8.467 +    case VDISK_TYPE_FLOPPY: 
   8.468 +    case VDISK_TYPE_TAPE:
   8.469 +        gd->flags |= GENHD_FL_REMOVABLE; 
   8.470 +        break; 
   8.471 +
   8.472 +    case VDISK_TYPE_DISK:
   8.473 +        break; 
   8.474 +
   8.475 +    default:
   8.476 +        printk(KERN_ALERT "XenLinux: unknown device type %d\n", 
   8.477 +               VDISK_TYPE(xd->info)); 
   8.478 +        break; 
   8.479 +    }
   8.480 +
   8.481 +    err = 0;
   8.482   out:
   8.483 -	up(&bd->bd_sem);
   8.484 -	bdput(bd);    
   8.485 -	return err;
   8.486 +    up(&bd->bd_sem);
   8.487 +    bdput(bd);    
   8.488 +    return err;
   8.489  }
   8.490  
   8.491  #if 0
   8.492 @@ -393,7 +392,7 @@ static int xlvbd_remove_device(int devic
   8.493      {
   8.494          /* 1: The VBD is mapped to a partition rather than a whole unit. */
   8.495          invalidate_device(device, 1);
   8.496 -	gd->part[minor].start_sect = 0;
   8.497 +        gd->part[minor].start_sect = 0;
   8.498          gd->part[minor].nr_sects   = 0;
   8.499          gd->sizes[minor]           = 0;
   8.500  
   8.501 @@ -531,31 +530,31 @@ void xlvbd_update_vbds(void)
   8.502   */
   8.503  int xlvbd_init(void)
   8.504  {
   8.505 -	int i;
   8.506 -
   8.507 -	/*
   8.508 -	 * If compiled as a module, we don't support unloading yet. We
   8.509 -	 * therefore permanently increment the reference count to
   8.510 -	 * disallow it.
   8.511 -	 */
   8.512 -	/* MOD_INC_USE_COUNT; */
   8.513 -
   8.514 -	memset(major_info, 0, sizeof(major_info));
   8.515 -
   8.516 -	for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
   8.517 -	}
   8.518 +    int i;
   8.519  
   8.520 -	vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
   8.521 -	nr_vbds  = xlvbd_get_vbd_info(vbd_info);
   8.522 +    /*
   8.523 +     * If compiled as a module, we don't support unloading yet. We
   8.524 +     * therefore permanently increment the reference count to
   8.525 +     * disallow it.
   8.526 +     */
   8.527 +    /* MOD_INC_USE_COUNT; */
   8.528  
   8.529 -	if (nr_vbds < 0) {
   8.530 -		kfree(vbd_info);
   8.531 -		vbd_info = NULL;
   8.532 -		nr_vbds  = 0;
   8.533 -	} else {
   8.534 -		for (i = 0; i < nr_vbds; i++)
   8.535 -			xlvbd_init_device(&vbd_info[i]);
   8.536 -	}
   8.537 +    memset(major_info, 0, sizeof(major_info));
   8.538  
   8.539 -	return 0;
   8.540 +    for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
   8.541 +    }
   8.542 +
   8.543 +    vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
   8.544 +    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
   8.545 +
   8.546 +    if (nr_vbds < 0) {
   8.547 +        kfree(vbd_info);
   8.548 +        vbd_info = NULL;
   8.549 +        nr_vbds  = 0;
   8.550 +    } else {
   8.551 +        for (i = 0; i < nr_vbds; i++)
   8.552 +            xlvbd_init_device(&vbd_info[i]);
   8.553 +    }
   8.554 +
   8.555 +    return 0;
   8.556  }
     9.1 --- a/tools/Makefile	Tue Jan 11 09:28:54 2005 +0000
     9.2 +++ b/tools/Makefile	Thu Jan 13 10:05:14 2005 +0000
     9.3 @@ -9,6 +9,7 @@ all:
     9.4  	$(MAKE) -C python
     9.5  	$(MAKE) -C xfrd
     9.6  	$(MAKE) -C xcs
     9.7 +	$(MAKE) -C ioemu
     9.8  
     9.9  install: 
    9.10  ifneq ($(dist),yes)
    9.11 @@ -23,6 +24,7 @@ endif
    9.12  	$(MAKE) -C xfrd install
    9.13  	$(MAKE) -C sv install
    9.14  	$(MAKE) -C xcs install
    9.15 +	$(MAKE) -C ioemu install
    9.16  
    9.17  dist: $(TARGET)
    9.18  	$(MAKE) prefix=`pwd`/../../install dist=yes install
    9.19 @@ -37,4 +39,4 @@ clean:
    9.20  	$(MAKE) -C python clean
    9.21  	$(MAKE) -C xfrd clean
    9.22  	$(MAKE) -C xcs clean
    9.23 -
    9.24 +	$(MAKE) -C ioemu clean
    10.1 --- a/tools/ioemu/Makefile	Tue Jan 11 09:28:54 2005 +0000
    10.2 +++ b/tools/ioemu/Makefile	Thu Jan 13 10:05:14 2005 +0000
    10.3 @@ -1,12 +1,12 @@
    10.4  # Order is important!
    10.5  SUBDIRS=gui memory iodev
    10.6  
    10.7 -all: $(SUBDIRS)
    10.8 +.PHONY: all clean install
    10.9  
   10.10 -$(SUBDIRS):
   10.11 -	$(MAKE) -C $@
   10.12 -
   10.13 -.PHONY: all $(SUBDIRS)
   10.14 +all:
   10.15 +	@for subdir in $(SUBDIRS); do 				\
   10.16 +		$(MAKE) -C $$subdir $(MAKEDEFS) $@ || exit -1; 	\
   10.17 +	done
   10.18  
   10.19  clean:
   10.20  	@for subdir in $(SUBDIRS); do 				\
    11.1 --- a/tools/ioemu/gui/Makefile	Tue Jan 11 09:28:54 2005 +0000
    11.2 +++ b/tools/ioemu/gui/Makefile	Thu Jan 13 10:05:14 2005 +0000
    11.3 @@ -8,3 +8,5 @@ libgui.a: $(OBJS)
    11.4  	$(AR) $(ARFLAGS) $@ $(OBJS)
    11.5  
    11.6  include $(TOPDIR)/mk/helix.mk
    11.7 +
    11.8 +install:: all
    12.1 --- a/tools/ioemu/iodev/Makefile	Tue Jan 11 09:28:54 2005 +0000
    12.2 +++ b/tools/ioemu/iodev/Makefile	Thu Jan 13 10:05:14 2005 +0000
    12.3 @@ -11,5 +11,5 @@ device-model: $(OBJS) $(BXLIBS)
    12.4  
    12.5  include $(TOPDIR)/mk/helix.mk
    12.6  
    12.7 -install::
    12.8 +install:: all
    12.9  	install device-model $(prefix)/usr/sbin
    13.1 --- a/tools/ioemu/memory/Makefile	Tue Jan 11 09:28:54 2005 +0000
    13.2 +++ b/tools/ioemu/memory/Makefile	Thu Jan 13 10:05:14 2005 +0000
    13.3 @@ -9,3 +9,4 @@ libmemory.a: $(OBJS)
    13.4  
    13.5  include $(TOPDIR)/mk/helix.mk
    13.6  
    13.7 +install:: all
    14.1 --- a/tools/ioemu/memory/misc_mem.cc	Tue Jan 11 09:28:54 2005 +0000
    14.2 +++ b/tools/ioemu/memory/misc_mem.cc	Thu Jan 13 10:05:14 2005 +0000
    14.3 @@ -80,7 +80,7 @@ BX_MEM_C::alloc_vector_aligned (size_t b
    14.4    actual_vector = new Bit8u [bytes+test_mask];
    14.5    // round address forward to nearest multiple of alignment.  Alignment 
    14.6    // MUST BE a power of two for this to work.
    14.7 -  Bit64u masked = ((Bit64u)(actual_vector + test_mask)) & ~test_mask;
    14.8 +  unsigned long masked = ((unsigned long)(actual_vector + test_mask)) & ~test_mask;
    14.9    vector = (Bit8u *)masked;
   14.10    // sanity check: no lost bits during pointer conversion
   14.11    BX_ASSERT (sizeof(masked) >= sizeof(vector));
    15.1 --- a/tools/libxc/xc.h	Tue Jan 11 09:28:54 2005 +0000
    15.2 +++ b/tools/libxc/xc.h	Thu Jan 13 10:05:14 2005 +0000
    15.3 @@ -10,7 +10,6 @@
    15.4  #define __XC_H__
    15.5  
    15.6  #include <stdint.h>
    15.7 -#include "linux_boot_params.h"
    15.8  
    15.9  typedef uint8_t            u8;
   15.10  typedef uint16_t           u16;
   15.11 @@ -25,7 +24,6 @@ typedef int64_t            s64;
   15.12  #include <xen/dom0_ops.h>
   15.13  #include <xen/event_channel.h>
   15.14  #include <xen/sched_ctl.h>
   15.15 -#include <xen/io/domain_controller.h>
   15.16  
   15.17  /* Obtain or relinquish a handle on the 'xc' library. */
   15.18  int xc_interface_open(void);
   15.19 @@ -107,6 +105,7 @@ xc_plan9_build (int xc_handle,
   15.20  		unsigned int control_evtchn, 
   15.21  		unsigned long flags);
   15.22  
   15.23 +struct mem_map;
   15.24  int xc_vmx_build(int xc_handle,
   15.25                   u32 domid,
   15.26                   const char *image_name,
    16.1 --- a/tools/libxc/xc_linux_save.c	Tue Jan 11 09:28:54 2005 +0000
    16.2 +++ b/tools/libxc/xc_linux_save.c	Thu Jan 13 10:05:14 2005 +0000
    16.3 @@ -10,6 +10,7 @@
    16.4  #include <sys/time.h>
    16.5  #include "xc_private.h"
    16.6  #include <xen/linux/suspend.h>
    16.7 +#include <xen/io/domain_controller.h>
    16.8  #include <time.h>
    16.9  
   16.10  #define BATCH_SIZE 1024   /* 1024 pages (4MB) at a time */
    17.1 --- a/tools/misc/netfix	Tue Jan 11 09:28:54 2005 +0000
    17.2 +++ b/tools/misc/netfix	Thu Jan 13 10:05:14 2005 +0000
    17.3 @@ -10,7 +10,7 @@
    17.4  from getopt import getopt
    17.5  
    17.6  # Default install path for Xen binary packages.
    17.7 -sys.path.append('/lib/python')
    17.8 +sys.path = [ '/usr/lib/python' ] + sys.path
    17.9  from xen.util.Brctl import *
   17.10  
   17.11  short_options = 'hvqni:b:c'
    18.1 --- a/tools/misc/xend	Tue Jan 11 09:28:54 2005 +0000
    18.2 +++ b/tools/misc/xend	Thu Jan 13 10:05:14 2005 +0000
    18.3 @@ -29,7 +29,7 @@ XCS_EXEC = "/usr/sbin/xcs"
    18.4  XCS_LOGFILE = "/var/log/xcs.log"
    18.5  
    18.6  # Default install path for Xen binary packages.
    18.7 -sys.path.append('/lib/python')
    18.8 +sys.path = [ '/usr/lib/python' ] + sys.path
    18.9  from xen.xend.server import SrvDaemon
   18.10  
   18.11  class CheckError(ValueError):
    19.1 --- a/tools/misc/xensv	Tue Jan 11 09:28:54 2005 +0000
    19.2 +++ b/tools/misc/xensv	Thu Jan 13 10:05:14 2005 +0000
    19.3 @@ -23,7 +23,7 @@ import sys
    19.4  import re
    19.5  
    19.6  # Default install path for Xen binary packages.
    19.7 -sys.path.append('/lib/python')
    19.8 +sys.path = [ '/usr/lib/python' ] + sys.path
    19.9  from xen.xend.server.params import XEND_PID_FILE
   19.10  
   19.11  class CheckError(ValueError):
    20.1 --- a/tools/misc/xm	Tue Jan 11 09:28:54 2005 +0000
    20.2 +++ b/tools/misc/xm	Thu Jan 13 10:05:14 2005 +0000
    20.3 @@ -3,7 +3,7 @@
    20.4  import sys
    20.5  
    20.6  # Default install path for Xen binary packages.
    20.7 -sys.path.append('/lib/python')
    20.8 +sys.path = [ '/usr/lib/python' ] + sys.path
    20.9  from xen.xm import main
   20.10  
   20.11  main.main(sys.argv)
    21.1 --- a/tools/python/Makefile	Tue Jan 11 09:28:54 2005 +0000
    21.2 +++ b/tools/python/Makefile	Thu Jan 13 10:05:14 2005 +0000
    21.3 @@ -3,13 +3,7 @@ all:
    21.4  	python setup.py build
    21.5  
    21.6  install: all
    21.7 -	if [ "$(prefix)" = "" ]; then                   \
    21.8 -	    python setup.py install;                    \
    21.9 -	elif [ "$(dist)" = "yes" ]; then                \
   21.10 -	    python setup.py install --home="$(prefix)"; \
   21.11 -	else                                            \
   21.12 -	    python setup.py install --root="$(prefix)"; \
   21.13 -	fi
   21.14 +	python setup.py install --home="$(prefix)/usr"
   21.15  
   21.16  clean:
   21.17  	rm -rf build *.pyc *.pyo *.o *.a *~
    22.1 --- a/xen/arch/x86/memory.c	Tue Jan 11 09:28:54 2005 +0000
    22.2 +++ b/xen/arch/x86/memory.c	Thu Jan 13 10:05:14 2005 +0000
    22.3 @@ -937,12 +937,41 @@ int get_page_type(struct pfn_info *page,
    22.4  }
    22.5  
    22.6  
    22.7 +int new_guest_cr3(unsigned long pfn)
    22.8 +{
    22.9 +    struct exec_domain *ed = current;
   22.10 +    struct domain *d = ed->domain;
   22.11 +    int okay, cpu = smp_processor_id();
   22.12 +    unsigned long old_base_pfn;
   22.13 +    
   22.14 +    okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d);
   22.15 +    if ( likely(okay) )
   22.16 +    {
   22.17 +        invalidate_shadow_ldt(ed);
   22.18 +
   22.19 +        percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
   22.20 +        old_base_pfn = pagetable_val(ed->mm.pagetable) >> PAGE_SHIFT;
   22.21 +        ed->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   22.22 +
   22.23 +        shadow_mk_pagetable(&ed->mm);
   22.24 +
   22.25 +        write_ptbase(&ed->mm);
   22.26 +
   22.27 +        put_page_and_type(&frame_table[old_base_pfn]);
   22.28 +    }
   22.29 +    else
   22.30 +    {
   22.31 +        MEM_LOG("Error while installing new baseptr %08lx", ptr);
   22.32 +    }
   22.33 +
   22.34 +    return okay;
   22.35 +}
   22.36 +
   22.37  static int do_extended_command(unsigned long ptr, unsigned long val)
   22.38  {
   22.39      int okay = 1, cpu = smp_processor_id();
   22.40      unsigned int cmd = val & MMUEXT_CMD_MASK;
   22.41      unsigned long pfn = ptr >> PAGE_SHIFT;
   22.42 -    unsigned long old_base_pfn;
   22.43      struct pfn_info *page = &frame_table[pfn];
   22.44      struct exec_domain *ed = current;
   22.45      struct domain *d = ed->domain, *nd, *e;
   22.46 @@ -1003,25 +1032,7 @@ static int do_extended_command(unsigned 
   22.47          break;
   22.48  
   22.49      case MMUEXT_NEW_BASEPTR:
   22.50 -        okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d);
   22.51 -        if ( likely(okay) )
   22.52 -        {
   22.53 -            invalidate_shadow_ldt(ed);
   22.54 -
   22.55 -            percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB;
   22.56 -            old_base_pfn = pagetable_val(ed->mm.pagetable) >> PAGE_SHIFT;
   22.57 -            ed->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
   22.58 -
   22.59 -            shadow_mk_pagetable(&ed->mm);
   22.60 -
   22.61 -            write_ptbase(&ed->mm);
   22.62 -
   22.63 -            put_page_and_type(&frame_table[old_base_pfn]);
   22.64 -        }
   22.65 -        else
   22.66 -        {
   22.67 -            MEM_LOG("Error while installing new baseptr %08lx", ptr);
   22.68 -        }
   22.69 +        okay = new_guest_cr3(pfn);
   22.70          break;
   22.71          
   22.72      case MMUEXT_TLB_FLUSH:
    23.1 --- a/xen/arch/x86/traps.c	Tue Jan 11 09:28:54 2005 +0000
    23.2 +++ b/xen/arch/x86/traps.c	Thu Jan 13 10:05:14 2005 +0000
    23.3 @@ -383,14 +383,30 @@ asmlinkage void do_machine_check(struct 
    23.4      fatal_trap(TRAP_machine_check, regs);
    23.5  }
    23.6  
    23.7 +void propagate_page_fault(unsigned long addr, u16 error_code)
    23.8 +{
    23.9 +    trap_info_t *ti;
   23.10 +    struct exec_domain *ed = current;
   23.11 +    struct trap_bounce *tb = &ed->thread.trap_bounce;
   23.12 +
   23.13 +    ti = ed->thread.traps + 14;
   23.14 +    tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
   23.15 +    tb->cr2        = addr;
   23.16 +    tb->error_code = error_code;
   23.17 +    tb->cs         = ti->cs;
   23.18 +    tb->eip        = ti->address;
   23.19 +    if ( TI_GET_IF(ti) )
   23.20 +        ed->vcpu_info->evtchn_upcall_mask = 1;
   23.21 +
   23.22 +    ed->mm.guest_cr2 = addr;
   23.23 +}
   23.24 +
   23.25  asmlinkage int do_page_fault(struct xen_regs *regs)
   23.26  {
   23.27 -    trap_info_t *ti;
   23.28      unsigned long off, addr, fixup;
   23.29      struct exec_domain *ed = current;
   23.30      struct domain *d = ed->domain;
   23.31      extern int map_ldt_shadow_page(unsigned int);
   23.32 -    struct trap_bounce *tb = &ed->thread.trap_bounce;
   23.33      int cpu = ed->processor;
   23.34      int ret;
   23.35  
   23.36 @@ -447,14 +463,7 @@ asmlinkage int do_page_fault(struct xen_
   23.37      if ( !GUEST_FAULT(regs) )
   23.38          goto xen_fault;
   23.39  
   23.40 -    ti = ed->thread.traps + 14;
   23.41 -    tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
   23.42 -    tb->cr2        = addr;
   23.43 -    tb->error_code = regs->error_code;
   23.44 -    tb->cs         = ti->cs;
   23.45 -    tb->eip        = ti->address;
   23.46 -    if ( TI_GET_IF(ti) )
   23.47 -        ed->vcpu_info->evtchn_upcall_mask = 1;
   23.48 +    propagate_page_fault(addr, regs->error_code);
   23.49      return 0; 
   23.50  
   23.51   xen_fault:
   23.52 @@ -497,45 +506,126 @@ asmlinkage int do_page_fault(struct xen_
   23.53  
   23.54  static int emulate_privileged_op(struct xen_regs *regs)
   23.55  {
   23.56 -    u16 opcode;
   23.57 +    extern long do_fpu_taskswitch(void);
   23.58 +    extern void *decode_reg(struct xen_regs *regs, u8 b);
   23.59  
   23.60 -    if ( get_user(opcode, (u16 *)regs->eip) || ((opcode & 0xff) != 0x0f) )
   23.61 -        return 0;
   23.62 +    struct exec_domain *ed = current;
   23.63 +    unsigned long *reg, eip = regs->eip;
   23.64 +    u8 opcode;
   23.65  
   23.66 -    switch ( opcode >> 8 )
   23.67 +    if ( get_user(opcode, (u8 *)eip) )
   23.68 +        goto page_fault;
   23.69 +    eip += 1;
   23.70 +    if ( (opcode & 0xff) != 0x0f )
   23.71 +        goto fail;
   23.72 +
   23.73 +    if ( get_user(opcode, (u8 *)eip) )
   23.74 +        goto page_fault;
   23.75 +    eip += 1;
   23.76 +
   23.77 +    switch ( opcode )
   23.78      {
   23.79 +    case 0x06: /* CLTS */
   23.80 +        (void)do_fpu_taskswitch();
   23.81 +        break;
   23.82 +
   23.83      case 0x09: /* WBINVD */
   23.84 -        if ( !IS_CAPABLE_PHYSDEV(current->domain) )
   23.85 +        if ( !IS_CAPABLE_PHYSDEV(ed->domain) )
   23.86          {
   23.87              DPRINTK("Non-physdev domain attempted WBINVD.\n");
   23.88 -            return 0;
   23.89 +            goto fail;
   23.90          }
   23.91          wbinvd();
   23.92 -        regs->eip += 2;
   23.93 -        return 1;
   23.94 -        
   23.95 +        break;
   23.96 +
   23.97 +    case 0x20: /* MOV CR?,<reg> */
   23.98 +        if ( get_user(opcode, (u8 *)eip) )
   23.99 +            goto page_fault;
  23.100 +        eip += 1;
  23.101 +        if ( (opcode & 0xc0) != 0xc0 )
  23.102 +            goto fail;
  23.103 +        reg = decode_reg(regs, opcode);
  23.104 +        switch ( (opcode >> 3) & 7 )
  23.105 +        {
  23.106 +        case 0: /* Read CR0 */
  23.107 +            *reg = 
  23.108 +                (read_cr0() & ~X86_CR0_TS) | 
  23.109 +                (test_bit(EDF_GUEST_STTS, &ed->ed_flags) ? X86_CR0_TS : 0);
  23.110 +            break;
  23.111 +
  23.112 +        case 2: /* Read CR2 */
  23.113 +            *reg = ed->mm.guest_cr2;
  23.114 +            break;
  23.115 +            
  23.116 +        case 3: /* Read CR3 */
  23.117 +            *reg = pagetable_val(ed->mm.pagetable);
  23.118 +            break;
  23.119 +
  23.120 +        default:
  23.121 +            goto fail;
  23.122 +        }
  23.123 +        break;
  23.124 +
  23.125 +    case 0x22: /* MOV <reg>,CR? */
  23.126 +        if ( get_user(opcode, (u8 *)eip) )
  23.127 +            goto page_fault;
  23.128 +        eip += 1;
  23.129 +        if ( (opcode & 0xc0) != 0xc0 )
  23.130 +            goto fail;
  23.131 +        reg = decode_reg(regs, opcode);
  23.132 +        switch ( (opcode >> 3) & 7 )
  23.133 +        {
  23.134 +        case 0: /* Write CR0 */
  23.135 +            if ( *reg & X86_CR0_TS ) /* XXX ignore all but TS bit */
  23.136 +                (void)do_fpu_taskswitch;
  23.137 +            break;
  23.138 +
  23.139 +        case 2: /* Write CR2 */
  23.140 +            ed->mm.guest_cr2 = *reg;
  23.141 +            break;
  23.142 +            
  23.143 +        case 3: /* Write CR3 */
  23.144 +            LOCK_BIGLOCK(ed->domain);
  23.145 +            (void)new_guest_cr3(*reg);
  23.146 +            UNLOCK_BIGLOCK(ed->domain);
  23.147 +            break;
  23.148 +
  23.149 +        default:
  23.150 +            goto fail;
  23.151 +        }
  23.152 +        break;
  23.153 +
  23.154      case 0x30: /* WRMSR */
  23.155 -        if ( !IS_PRIV(current->domain) )
  23.156 +        if ( !IS_PRIV(ed->domain) )
  23.157          {
  23.158              DPRINTK("Non-priv domain attempted WRMSR.\n");
  23.159 -            return 0;
  23.160 +            goto fail;
  23.161          }
  23.162          wrmsr(regs->ecx, regs->eax, regs->edx);
  23.163 -        regs->eip += 2;
  23.164 -        return 1;
  23.165 +        break;
  23.166  
  23.167      case 0x32: /* RDMSR */
  23.168 -        if ( !IS_PRIV(current->domain) )
  23.169 +        if ( !IS_PRIV(ed->domain) )
  23.170          {
  23.171              DPRINTK("Non-priv domain attempted RDMSR.\n");
  23.172 -            return 0;
  23.173 +            goto fail;
  23.174          }
  23.175          rdmsr(regs->ecx, regs->eax, regs->edx);
  23.176 -        regs->eip += 2;
  23.177 -        return 1;
  23.178 +        break;
  23.179 +
  23.180 +    default:
  23.181 +        goto fail;
  23.182      }
  23.183  
  23.184 +    regs->eip = eip;
  23.185 +    return EXCRET_fault_fixed;
  23.186 +
  23.187 + fail:
  23.188      return 0;
  23.189 +
  23.190 + page_fault:
  23.191 +    propagate_page_fault(eip, 0);
  23.192 +    return EXCRET_fault_fixed;
  23.193  }
  23.194  
  23.195  asmlinkage int do_general_protection(struct xen_regs *regs)
    24.1 --- a/xen/arch/x86/x86_32/seg_fixup.c	Tue Jan 11 09:28:54 2005 +0000
    24.2 +++ b/xen/arch/x86/x86_32/seg_fixup.c	Thu Jan 13 10:05:14 2005 +0000
    24.3 @@ -75,7 +75,7 @@ static unsigned char insn_decode[256] = 
    24.4      X, X, X, X, X, X, X, X,
    24.5      X, X, X, X, X, X, X, X,
    24.6      /* 0xA0 - 0xAF */
    24.7 -    O|1, O|4, O|1, O|4, X, X, X, X,
    24.8 +    O|4, O|4, O|4, O|4, X, X, X, X,
    24.9      X, X, X, X, X, X, X, X,
   24.10      /* 0xB0 - 0xBF */
   24.11      X, X, X, X, X, X, X, X,
   24.12 @@ -182,7 +182,7 @@ int fixup_seg(u16 seg, unsigned long off
   24.13          table = (unsigned long *)LDT_VIRT_START(d);
   24.14          if ( idx >= d->mm.ldt_ents )
   24.15          {
   24.16 -            DPRINTK("Segment %04x out of LDT range (%d)\n",
   24.17 +            DPRINTK("Segment %04x out of LDT range (%ld)\n",
   24.18                      seg, d->mm.ldt_ents);
   24.19              goto fail;
   24.20          }
   24.21 @@ -231,16 +231,9 @@ int fixup_seg(u16 seg, unsigned long off
   24.22      }
   24.23      else
   24.24      {
   24.25 -        /*
   24.26 -         * Expands-up: All the way to Xen space? Assume 4GB if so.
   24.27 -         * NB: we compare offset with limit-15, instead of the "real"
   24.28 -         * comparison of offset+15 (worst case) with limit,
   24.29 -         * to avoid possible unsigned int overflow of offset+15.
   24.30 -         * limit-15 will not underflow here because we don't allow expand-up
   24.31 -         * segments with maxlimit.
   24.32 -         */
   24.33 +        /* Expands-up: All the way to Xen space? Assume 4GB if so. */
   24.34          if ( ((PAGE_OFFSET - (base + limit)) < PAGE_SIZE) &&
   24.35 -             ((offset) > (limit-15)) )
   24.36 +             (offset > limit) )
   24.37          {
   24.38              /* Flip to expands-down. */
   24.39              limit = -(base & PAGE_MASK);
   24.40 @@ -248,8 +241,8 @@ int fixup_seg(u16 seg, unsigned long off
   24.41          }
   24.42      }
   24.43  
   24.44 -    DPRINTK("None of the above! (%08lx:%08lx, %d, %08lx, %08lx, %08lx)\n", 
   24.45 -            a, b, positive_access, base, limit, base+limit);
   24.46 +    DPRINTK("None of the above! (%08lx:%08lx, %08lx, %08lx, %08lx)\n", 
   24.47 +            a, b, base, limit, base+limit);
   24.48  
   24.49   fail:
   24.50      return 0;
   24.51 @@ -285,9 +278,7 @@ void *decode_reg(struct xen_regs *regs, 
   24.52  
   24.53  /*
   24.54   * Called from the general-protection fault handler to attempt to decode
   24.55 - * and emulate an instruction that depends on 4GB segments. At this point
   24.56 - * we assume that the instruction itself is paged into memory (the CPU
   24.57 - * must have triggered this in order to decode the instruction itself).
   24.58 + * and emulate an instruction that depends on 4GB segments.
   24.59   */
   24.60  int gpf_emulate_4gb(struct xen_regs *regs)
   24.61  {
   24.62 @@ -312,7 +303,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
   24.63  
   24.64      if ( !linearise_address((u16)regs->cs, regs->eip, (unsigned long *)&eip) )
   24.65      {
   24.66 -        DPRINTK("Cannot linearise %04x:%08lx\n", regs->cs, regs->eip);
   24.67 +        DPRINTK("Cannot linearise %04x:%08x\n", regs->cs, regs->eip);
   24.68          goto fail;
   24.69      }
   24.70  
   24.71 @@ -322,7 +313,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
   24.72          if ( get_user(b, pb) )
   24.73          {
   24.74              DPRINTK("Fault while accessing byte %d of instruction\n", pb-eip);
   24.75 -            goto fail;
   24.76 +            goto page_fault;
   24.77          }
   24.78  
   24.79          if ( (pb - eip) >= 15 )
   24.80 @@ -340,25 +331,25 @@ int gpf_emulate_4gb(struct xen_regs *reg
   24.81          case 0xf0: /* LOCK */
   24.82          case 0xf2: /* REPNE/REPNZ */
   24.83          case 0xf3: /* REP/REPE/REPZ */
   24.84 -            continue;
   24.85 +            break;
   24.86          case 0x2e: /* CS override */
   24.87              pseg = &regs->cs;
   24.88 -            continue;
   24.89 +            break;
   24.90          case 0x3e: /* DS override */
   24.91              pseg = &regs->ds;
   24.92 -            continue;
   24.93 +            break;
   24.94          case 0x26: /* ES override */
   24.95              pseg = &regs->es;
   24.96 -            continue;
   24.97 +            break;
   24.98          case 0x64: /* FS override */
   24.99              pseg = &regs->fs;
  24.100 -            continue;
  24.101 +            break;
  24.102          case 0x65: /* GS override */
  24.103              pseg = &regs->gs;
  24.104 -            continue;
  24.105 +            break;
  24.106          case 0x36: /* SS override */
  24.107              pseg = &regs->ss;
  24.108 -            continue;
  24.109 +            break;
  24.110          default: /* Not a prefix byte */
  24.111              goto done_prefix;
  24.112          }
  24.113 @@ -375,17 +366,17 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.114      
  24.115      if ( !(decode & HAS_MODRM) )
  24.116      {
  24.117 -        switch ( decode & 7 )
  24.118 +        if ( (decode & 7) != 4 )
  24.119 +            goto fail;
  24.120 +
  24.121 +        if ( get_user(offset, (u32 *)pb) )
  24.122          {
  24.123 -        case 1:
  24.124 -            offset = (long)(*(char *)pb);
  24.125 -            goto skip_modrm;
  24.126 -        case 4:
  24.127 -            offset = *(long *)pb;
  24.128 -            goto skip_modrm;
  24.129 -        default:
  24.130 -            goto fail;
  24.131 +            DPRINTK("Fault while extracting <disp8>.\n");
  24.132 +            goto page_fault;
  24.133          }
  24.134 +        pb += 4;
  24.135 +
  24.136 +        goto skip_modrm;
  24.137      }
  24.138  
  24.139      /*
  24.140 @@ -395,7 +386,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.141      if ( get_user(modrm, pb) )
  24.142      {
  24.143          DPRINTK("Fault while extracting modrm byte\n");
  24.144 -        goto fail;
  24.145 +        goto page_fault;
  24.146      }
  24.147  
  24.148      pb++;
  24.149 @@ -427,7 +418,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.150              if ( get_user(disp32, (u32 *)pb) )
  24.151              {
  24.152                  DPRINTK("Fault while extracting <disp8>.\n");
  24.153 -                goto fail;
  24.154 +                goto page_fault;
  24.155              }
  24.156              pb += 4;
  24.157          }
  24.158 @@ -439,7 +430,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.159          if ( get_user(disp8, pb) )
  24.160          {
  24.161              DPRINTK("Fault while extracting <disp8>.\n");
  24.162 -            goto fail;
  24.163 +            goto page_fault;
  24.164          }
  24.165          pb++;
  24.166          disp32 = (disp8 & 0x80) ? (disp8 | ~0xff) : disp8;;
  24.167 @@ -451,7 +442,7 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.168          if ( get_user(disp32, (u32 *)pb) )
  24.169          {
  24.170              DPRINTK("Fault while extracting <disp8>.\n");
  24.171 -            goto fail;
  24.172 +            goto page_fault;
  24.173          }
  24.174          pb += 4;
  24.175          break;
  24.176 @@ -485,14 +476,18 @@ int gpf_emulate_4gb(struct xen_regs *reg
  24.177              d->vcpu_info->evtchn_upcall_mask = 1;
  24.178      }
  24.179  
  24.180 -    return 1;
  24.181 +    return EXCRET_fault_fixed;
  24.182  
  24.183   fixme:
  24.184      DPRINTK("Undecodable instruction %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x "
  24.185 -            "caused GPF(0) at %04x:%08lx\n",
  24.186 +            "caused GPF(0) at %04x:%08x\n",
  24.187              eip[0], eip[1], eip[2], eip[3],
  24.188              eip[4], eip[5], eip[6], eip[7],
  24.189              regs->cs, regs->eip);
  24.190   fail:
  24.191      return 0;
  24.192 +
  24.193 + page_fault:
  24.194 +    propagate_page_fault((unsigned long)pb, 4);
  24.195 +    return EXCRET_fault_fixed;
  24.196  }
    25.1 --- a/xen/common/string.c	Tue Jan 11 09:28:54 2005 +0000
    25.2 +++ b/xen/common/string.c	Thu Jan 13 10:05:14 2005 +0000
    25.3 @@ -391,6 +391,7 @@ char * bcopy(const char * src, char * de
    25.4  }
    25.5  #endif
    25.6  
    25.7 +#ifndef __HAVE_ARCH_MEMCPY
    25.8  /**
    25.9   * memcpy - Copy one area of memory to another
   25.10   * @dest: Where to copy to
   25.11 @@ -400,7 +401,6 @@ char * bcopy(const char * src, char * de
   25.12   * You should not use this function to access IO space, use memcpy_toio()
   25.13   * or memcpy_fromio() instead.
   25.14   */
   25.15 -#undef memcpy
   25.16  void * memcpy(void * dest,const void *src,size_t count)
   25.17  {
   25.18  	char *tmp = (char *) dest, *s = (char *) src;
   25.19 @@ -410,6 +410,7 @@ void * memcpy(void * dest,const void *sr
   25.20  
   25.21  	return dest;
   25.22  }
   25.23 +#endif
   25.24  
   25.25  #ifndef __HAVE_ARCH_MEMMOVE
   25.26  /**
    26.1 --- a/xen/include/asm-x86/mm.h	Tue Jan 11 09:28:54 2005 +0000
    26.2 +++ b/xen/include/asm-x86/mm.h	Thu Jan 13 10:05:14 2005 +0000
    26.3 @@ -278,6 +278,8 @@ extern ptwr_info_t ptwr_info[];
    26.4  void ptwr_flush(const int);
    26.5  int ptwr_do_page_fault(unsigned long);
    26.6  
    26.7 +int new_guest_cr3(unsigned long pfn);
    26.8 +
    26.9  #define __cleanup_writable_pagetable(_what)                                 \
   26.10  do {                                                                        \
   26.11      int cpu = smp_processor_id();                                           \
   26.12 @@ -303,4 +305,6 @@ void audit_domains(void);
   26.13  #define audit_domains()  ((void)0)
   26.14  #endif
   26.15  
   26.16 +void propagate_page_fault(unsigned long addr, u16 error_code);
   26.17 +
   26.18  #endif /* __ASM_X86_MM_H__ */
    27.1 --- a/xen/include/asm-x86/processor.h	Tue Jan 11 09:28:54 2005 +0000
    27.2 +++ b/xen/include/asm-x86/processor.h	Thu Jan 13 10:05:14 2005 +0000
    27.3 @@ -485,6 +485,9 @@ struct mm_struct {
    27.4      unsigned long min_pfn;		/* min host physical */
    27.5      unsigned long max_pfn;		/* max host physical */
    27.6  
    27.7 +    /* Virtual CR2 value. Can be read/written by guest. */
    27.8 +    unsigned long guest_cr2;
    27.9 +
   27.10      /* shadow mode status and controls */
   27.11      unsigned int shadow_mode;  /* flags to control shadow table operation */
   27.12      pagetable_t  shadow_table;
    28.1 --- a/xen/include/asm-x86/x86_32/string.h	Tue Jan 11 09:28:54 2005 +0000
    28.2 +++ b/xen/include/asm-x86/x86_32/string.h	Thu Jan 13 10:05:14 2005 +0000
    28.3 @@ -206,7 +206,7 @@ return (to);
    28.4   * This looks horribly ugly, but the compiler can optimize it totally,
    28.5   * as the count is constant.
    28.6   */
    28.7 -static inline void * __constant_memcpy(void * to, const void * from, size_t n)
    28.8 +static always_inline void * __constant_memcpy(void * to, const void * from, size_t n)
    28.9  {
   28.10  	switch (n) {
   28.11  		case 0:
   28.12 @@ -272,12 +272,13 @@ static inline void * __constant_memcpy(v
   28.13  }
   28.14  
   28.15  #define __HAVE_ARCH_MEMCPY
   28.16 -
   28.17 -#define memcpy(t, f, n) \
   28.18 -(__builtin_constant_p(n) ? \
   28.19 - __constant_memcpy((t),(f),(n)) : \
   28.20 - __memcpy((t),(f),(n)))
   28.21 -
   28.22 +static always_inline __attribute_used__
   28.23 +void memcpy(void *t, const void *f, size_t n)
   28.24 +{
   28.25 +	(__builtin_constant_p(n) ?
   28.26 +	 __constant_memcpy((t),(f),(n)) :
   28.27 +	 __memcpy((t),(f),(n)));
   28.28 +}
   28.29  
   28.30  /*
   28.31   * struct_cpy(x,y), copy structure *x into (matching structure) *y.
   28.32 @@ -410,7 +411,7 @@ return __res;
   28.33   * This looks horribly ugly, but the compiler can optimize it totally,
   28.34   * as we by now know that both pattern and count is constant..
   28.35   */
   28.36 -static inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
   28.37 +static always_inline void * __constant_c_and_count_memset(void * s, unsigned long pattern, size_t count)
   28.38  {
   28.39  	switch (count) {
   28.40  		case 0: