ia64/xen-unstable

changeset 1337:b4a9837c89b1

bitkeeper revision 1.885 (4094465dmX2TefiJicy4taNzc0jiaw)

manual merge
author iap10@labyrinth.cl.cam.ac.uk
date Sun May 02 00:52:45 2004 +0000 (2004-05-02)
parents 75286d976ac6 60e0bd84bf4d
children 343bab45b371
files .rootkeys tools/examples/xc_dom_create.py tools/xc/lib/xc.h tools/xc/lib/xc_linux_build.c tools/xc/lib/xc_netbsd_build.c tools/xc/py/Xc.c tools/xend/lib/blkif.py tools/xend/lib/console.py tools/xend/lib/domain_controller.h tools/xend/lib/main.py tools/xend/lib/manager.py tools/xend/lib/utils.c tools/xend/setup.py xen/arch/i386/pdb-stub.c xen/common/debug-linux.c xen/common/event_channel.c xen/common/kernel.c xen/common/physdev.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h xenolinux-2.4.26-sparse/include/asm-xen/io.h xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h xenolinux-2.4.26-sparse/mm/vmalloc.c
line diff
     1.1 --- a/.rootkeys	Sun May 02 00:27:58 2004 +0000
     1.2 +++ b/.rootkeys	Sun May 02 00:52:45 2004 +0000
     1.3 @@ -102,6 +102,7 @@ 3fbd4bd6GtGwZGxYUJPOheYIR7bPaA tools/xen
     1.4  4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py
     1.5  40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile
     1.6  4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py
     1.7 +4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py
     1.8  4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py
     1.9  4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h
    1.10  4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py
    1.11 @@ -665,8 +666,8 @@ 4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux
    1.12  4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c
    1.13  40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h
    1.14  4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile
    1.15 -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c
    1.16 -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h
    1.17 +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h
    1.18 +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c
    1.19  4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c
    1.20  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile
    1.21  3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
     2.1 --- a/tools/examples/xc_dom_create.py	Sun May 02 00:27:58 2004 +0000
     2.2 +++ b/tools/examples/xc_dom_create.py	Sun May 02 00:52:45 2004 +0000
     2.3 @@ -1,7 +1,7 @@
     2.4  #!/usr/bin/env python
     2.5  
     2.6  import string, sys, os, time, socket, getopt, signal, syslog
     2.7 -import Xc, xenctl.utils, xenctl.console_client
     2.8 +import Xc, xenctl.utils, xenctl.console_client, re
     2.9  
    2.10  config_dir  = '/etc/xc/'
    2.11  config_file = xc_config_file = config_dir + 'defaults'
    2.12 @@ -195,6 +195,15 @@ output('VM cmdline         : "%s"' % cmd
    2.13  if dryrun:
    2.14      sys.exit(1)
    2.15  
    2.16 +##### HACK HACK HACK
    2.17 +##### Until everyone moves to the new I/O world, and a more robust domain
    2.18 +##### controller (xend), we use this little trick to discover whether we
    2.19 +##### are in a testing environment for new I/O stuff.
    2.20 +new_io_world = True
    2.21 +for line in os.popen('cat /proc/interrupts').readlines():
    2.22 +    if re.search('blkdev', line):
    2.23 +        new_io_world = False
    2.24 +
    2.25  ##### Code beyond this point is actually used to manage the mechanics of
    2.26  ##### starting (and watching if necessary) guest virtual machines.
    2.27  
    2.28 @@ -228,19 +237,23 @@ def make_domain():
    2.29  
    2.30      cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')'
    2.31  
    2.32 -    xend_response = xenctl.utils.xend_control_message(cmsg)
    2.33 +    cons_response = xenctl.utils.xend_control_message(cmsg)
    2.34  
    2.35 -    if not xend_response['success']:
    2.36 +    if not cons_response['success']:
    2.37  	print "Error creating initial event channel"
    2.38 -	print "Error type: " + xend_response['error_type']
    2.39 -	if xend_response['error_type'] == 'exception':
    2.40 -	    print "Exception type: " + xend_response['exception_type']
    2.41 -	    print "Exception value: " + xend_response['exception_value']
    2.42 +	print "Error type: " + cons_response['error_type']
    2.43 +	if cons_response['error_type'] == 'exception':
    2.44 +	    print "Exception type: " + cons_response['exception_type']
    2.45 +	    print "Exception value: " + cons_response['exception_value']
    2.46  	xc.domain_destroy ( dom=id )
    2.47  	sys.exit()
    2.48  
    2.49 +    # will the domain have IO privileges?
    2.50 +    if pci_device_list != []: io_priv = True
    2.51 +    else:                     io_priv = False
    2.52 +
    2.53      if restore:
    2.54 -        ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn)
    2.55 +        ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1, io_priv=%d )' % (builder_fn, io_priv))
    2.56          if ret < 0:
    2.57              print "Error restoring domain"
    2.58              print "Return code = " + str(ret)
    2.59 @@ -248,7 +261,7 @@ def make_domain():
    2.60              sys.exit()
    2.61      else:
    2.62  
    2.63 -        ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn)
    2.64 +        ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"], io_priv=%d )' % (builder_fn, io_priv) )
    2.65          if ret < 0:
    2.66              print "Error building Linux guest OS: "
    2.67              print "Return code = " + str(ret)
    2.68 @@ -259,6 +272,18 @@ def make_domain():
    2.69  
    2.70      # set the expertise level appropriately
    2.71      xenctl.utils.VBD_EXPERT_MODE = vbd_expert
    2.72 +
    2.73 +    if new_io_world:
    2.74 +        cmsg = 'new_block_interface(dom='+str(id)+')'
    2.75 +        xend_response = xenctl.utils.xend_control_message(cmsg)
    2.76 +        if not xend_response['success']:
    2.77 +            print "Error creating block interface"
    2.78 +            print "Error type: " + xend_response['error_type']
    2.79 +            if xend_response['error_type'] == 'exception':
    2.80 +                print "Exception type: " + xend_response['exception_type']
    2.81 +                print "Exception val:  " + xend_response['exception_value']
    2.82 +            xc.domain_destroy ( dom=id )
    2.83 +            sys.exit()
    2.84      
    2.85      for ( uname, virt_name, rw ) in vbd_list:
    2.86  	virt_dev = xenctl.utils.blkdev_name_to_number( virt_name )
    2.87 @@ -269,42 +294,70 @@ def make_domain():
    2.88  	    xc.domain_destroy ( dom=id )
    2.89  	    sys.exit()
    2.90  
    2.91 -        # check that setting up this VBD won't violate the sharing
    2.92 -        # allowed by the current VBD expertise level
    2.93 -        if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0:
    2.94 -            xc.domain_destroy( dom = id )
    2.95 -            sys.exit()
    2.96 +        if new_io_world:
    2.97 +            if len(segments) > 1:
    2.98 +                print "New I/O world cannot deal with multi-extent vdisks"
    2.99 +                xc.domain_destroy ( dom=id )
   2.100 +                sys.exit()
   2.101 +            seg = segments[0]
   2.102 +            cmsg = 'new_block_device(dom=' + str(id) + \
   2.103 +                   ',handle=0,vdev=' + str(virt_dev) + \
   2.104 +                   ',pdev=' + str(seg['device']) + \
   2.105 +                   ',start_sect=' + str(seg['start_sector']) + \
   2.106 +                   ',nr_sect=' + str(seg['nr_sectors']) + \
   2.107 +                   ',readonly=' + str(not re.match('w',rw)) + ')'
   2.108 +            xend_response = xenctl.utils.xend_control_message(cmsg)
   2.109 +            if not xend_response['success']:
   2.110 +                print "Error creating virtual block device"
   2.111 +                print "Error type: " + xend_response['error_type']
   2.112 +                if xend_response['error_type'] == 'exception':
   2.113 +                    print "Exception type: " + xend_response['exception_type']
   2.114 +                    print "Exception val:  " + xend_response['exception_value']
   2.115 +                xc.domain_destroy ( dom=id )
   2.116 +                sys.exit()
   2.117 +        else:
   2.118 +            # check that setting up this VBD won't violate the sharing
   2.119 +            # allowed by the current VBD expertise level
   2.120 +            if xenctl.utils.vd_extents_validate(segments,
   2.121 +                                                rw=='w' or rw=='rw') < 0:
   2.122 +                xc.domain_destroy( dom = id )
   2.123 +                sys.exit()
   2.124              
   2.125 -	if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ):
   2.126 -	    print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw)
   2.127 -	    xc.domain_destroy ( dom=id )
   2.128 -	    sys.exit()
   2.129 +            if xc.vbd_create( dom=id, vbd=virt_dev,
   2.130 +                              writeable= rw=='w' or rw=='rw' ):
   2.131 +                print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw)
   2.132 +                xc.domain_destroy ( dom=id )
   2.133 +                sys.exit()
   2.134  	
   2.135 -        if xc.vbd_setextents( dom=id,
   2.136 -                              vbd=virt_dev,
   2.137 -                              extents=segments):
   2.138 -            print "Error populating VBD vbd=%d\n" % virt_dev
   2.139 -            xc.domain_destroy ( dom=id )
   2.140 -            sys.exit()
   2.141 +            if xc.vbd_setextents( dom=id,
   2.142 +                                  vbd=virt_dev,
   2.143 +                                  extents=segments):
   2.144 +                print "Error populating VBD vbd=%d\n" % virt_dev
   2.145 +                xc.domain_destroy ( dom=id )
   2.146 +                sys.exit()
   2.147  
   2.148 -    # setup virtual firewall rules for all aliases
   2.149 -    for ip in vfr_ipaddr:
   2.150 -	xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
   2.151 +    if not new_io_world:
   2.152 +        # setup virtual firewall rules for all aliases
   2.153 +        for ip in vfr_ipaddr:
   2.154 +            xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip )
   2.155  
   2.156 -    # check for physical device access
   2.157 -    for (pci_bus, pci_dev, pci_func) in pci_device_list:
   2.158 -        if xc.physdev_pci_access_modify(
   2.159 -            dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0:
   2.160 -            print "Non-fatal error enabling PCI device access."
   2.161 -        else:
   2.162 -            print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func)
   2.163 +    if new_io_world:
   2.164 +        # check for physical device access
   2.165 +        for (pci_bus, pci_dev, pci_func) in pci_device_list:
   2.166 +            if xc.physdev_pci_access_modify(
   2.167 +                dom=id, bus=pci_bus, dev=pci_dev,
   2.168 +                func=pci_func, enable=1 ) < 0:
   2.169 +                print "Non-fatal error enabling PCI device access."
   2.170 +            else:
   2.171 +                print "Enabled PCI access (%d:%d:%d)." % \
   2.172 +                      (pci_bus,pci_dev,pci_func)
   2.173  
   2.174      if xc.domain_start( dom=id ) < 0:
   2.175          print "Error starting domain"
   2.176          xc.domain_destroy ( dom=id )
   2.177          sys.exit()
   2.178  
   2.179 -    return (id, xend_response['console_port'])
   2.180 +    return (id, cons_response['console_port'])
   2.181  # end of make_domain()
   2.182  
   2.183  def mkpidfile():
     3.1 --- a/tools/xc/lib/xc.h	Sun May 02 00:27:58 2004 +0000
     3.2 +++ b/tools/xc/lib/xc.h	Sun May 02 00:52:45 2004 +0000
     3.3 @@ -64,7 +64,7 @@ int xc_linux_save(int xc_handle,
     3.4                    int verbose);
     3.5  
     3.6  int xc_linux_restore(int xc_handle,
     3.7 -		     u64 domid,
     3.8 +                     u64 domid,
     3.9                       const char *state_file, 
    3.10                       int verbose,
    3.11                       u64 *pdomid);
    3.12 @@ -74,13 +74,15 @@ int xc_linux_build(int xc_handle,
    3.13                     const char *image_name,
    3.14                     const char *ramdisk_name,
    3.15                     const char *cmdline,
    3.16 -                   unsigned int control_evtchn);
    3.17 +                   unsigned int control_evtchn,
    3.18 +                   int io_priv);
    3.19  
    3.20  int xc_netbsd_build(int xc_handle,
    3.21                      u64 domid,
    3.22                      const char *image_name,
    3.23                      const char *cmdline,
    3.24 -                    unsigned int control_evtchn);
    3.25 +                    unsigned int control_evtchn,
    3.26 +                    int io_priv);
    3.27  
    3.28  int xc_bvtsched_global_set(int xc_handle,
    3.29                             unsigned long ctx_allow);
    3.30 @@ -248,15 +250,15 @@ int xc_shadow_control(int xc_handle,
    3.31  
    3.32  int xc_domain_setname(int xc_handle,
    3.33                        u64 domid, 
    3.34 -		      char *name);
    3.35 +                      char *name);
    3.36  
    3.37  int xc_domain_setinitialmem(int xc_handle,
    3.38 -			    u64 domid, 
    3.39 -			    unsigned int initial_memkb);
    3.40 +                            u64 domid, 
    3.41 +                            unsigned int initial_memkb);
    3.42  
    3.43  int xc_domain_setmaxmem(int xc_handle,
    3.44 -			    u64 domid, 
    3.45 -			    unsigned int max_memkb);
    3.46 +                            u64 domid, 
    3.47 +                            unsigned int max_memkb);
    3.48  
    3.49  
    3.50  #endif /* __XC_H__ */
     4.1 --- a/tools/xc/lib/xc_linux_build.c	Sun May 02 00:27:58 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_build.c	Sun May 02 00:52:45 2004 +0000
     4.3 @@ -74,7 +74,8 @@ static int setup_guestos(int xc_handle,
     4.4  			 full_execution_context_t *ctxt,
     4.5                           const char *cmdline,
     4.6                           unsigned long shared_info_frame,
     4.7 -                         unsigned int control_evtchn)
     4.8 +                         unsigned int control_evtchn,
     4.9 +                         int io_priv)
    4.10  {
    4.11      l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
    4.12      l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
    4.13 @@ -268,7 +269,7 @@ static int setup_guestos(int xc_handle,
    4.14      memset(start_info, 0, sizeof(*start_info));
    4.15      start_info->nr_pages     = nr_pages;
    4.16      start_info->shared_info  = shared_info_frame << PAGE_SHIFT;
    4.17 -    start_info->flags        = 0;
    4.18 +    start_info->flags        = io_priv ? SIF_PRIVILEGED : 0;
    4.19      start_info->pt_base      = vpt_start;
    4.20      start_info->nr_pt_frames = nr_pt_pages;
    4.21      start_info->mfn_list     = vphysmap_start;
    4.22 @@ -381,7 +382,8 @@ int xc_linux_build(int xc_handle,
    4.23                     const char *image_name,
    4.24                     const char *ramdisk_name,
    4.25                     const char *cmdline,
    4.26 -                   unsigned int control_evtchn)
    4.27 +                   unsigned int control_evtchn,
    4.28 +                   int io_priv)
    4.29  {
    4.30      dom0_op_t launch_op, op;
    4.31      int initrd_fd = -1;
    4.32 @@ -446,7 +448,7 @@ int xc_linux_build(int xc_handle,
    4.33                         &vstartinfo_start, &vkern_entry,
    4.34                         ctxt, cmdline,
    4.35                         op.u.getdomaininfo.shared_info_frame,
    4.36 -                       control_evtchn) < 0 )
    4.37 +                       control_evtchn, io_priv) < 0 )
    4.38      {
    4.39          ERROR("Error constructing guest OS");
    4.40          goto error_out;
    4.41 @@ -560,13 +562,13 @@ static int readelfimage_base_and_size(ch
    4.42  
    4.43      if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
    4.44      {
    4.45 -	ERROR("ELF program headers extend beyond end of image.");
    4.46 +        ERROR("ELF program headers extend beyond end of image.");
    4.47          return -EINVAL;
    4.48      }
    4.49  
    4.50      if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
    4.51      {
    4.52 -	ERROR("ELF section headers extend beyond end of image.");
    4.53 +        ERROR("ELF section headers extend beyond end of image.");
    4.54          return -EINVAL;
    4.55      }
    4.56  
    4.57 @@ -642,7 +644,7 @@ static int loadelfimage(char *elfbase, i
    4.58      {
    4.59          phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
    4.60          if ( !is_loadable_phdr(phdr) )
    4.61 -	    continue;
    4.62 +            continue;
    4.63          
    4.64          for ( done = 0; done < phdr->p_filesz; done += chunksz )
    4.65          {
     5.1 --- a/tools/xc/lib/xc_netbsd_build.c	Sun May 02 00:27:58 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_netbsd_build.c	Sun May 02 00:52:45 2004 +0000
     5.3 @@ -63,7 +63,8 @@ static int setup_guestos(int xc_handle,
     5.4  			 full_execution_context_t *ctxt,
     5.5                           const char *cmdline,
     5.6                           unsigned long shared_info_frame,
     5.7 -                         unsigned int control_evtchn)
     5.8 +                         unsigned int control_evtchn,
     5.9 +                         int io_priv)
    5.10  {
    5.11      l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
    5.12      l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
    5.13 @@ -175,7 +176,7 @@ static int setup_guestos(int xc_handle,
    5.14      start_info->mod_len     = symtab_len;
    5.15      start_info->nr_pages    = tot_pages;
    5.16      start_info->shared_info = shared_info_frame << PAGE_SHIFT;
    5.17 -    start_info->flags       = 0;
    5.18 +    start_info->flags       = io_priv ? SIF_PRIVILEGED : 0;
    5.19      start_info->domain_controller_evtchn = control_evtchn;
    5.20      strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE);
    5.21      start_info->cmd_line[MAX_CMDLINE-1] = '\0';
    5.22 @@ -212,7 +213,8 @@ int xc_netbsd_build(int xc_handle,
    5.23                      u64 domid,
    5.24                      const char *image_name,
    5.25                      const char *cmdline,
    5.26 -                    unsigned int control_evtchn)
    5.27 +                    unsigned int control_evtchn,
    5.28 +                    int io_priv)
    5.29  {
    5.30      dom0_op_t launch_op, op;
    5.31      unsigned long load_addr;
    5.32 @@ -269,7 +271,7 @@ int xc_netbsd_build(int xc_handle,
    5.33                         &virt_startinfo_addr,
    5.34                         &load_addr, &st_ctxt, cmdline,
    5.35                         op.u.getdomaininfo.shared_info_frame,
    5.36 -                       control_evtchn) < 0 )
    5.37 +                       control_evtchn, io_priv) < 0 )
    5.38      {
    5.39          ERROR("Error constructing guest OS");
    5.40          goto error_out;
     6.1 --- a/tools/xc/py/Xc.c	Sun May 02 00:27:58 2004 +0000
     6.2 +++ b/tools/xc/py/Xc.c	Sun May 02 00:52:45 2004 +0000
     6.3 @@ -228,18 +228,19 @@ static PyObject *pyxc_linux_build(PyObje
     6.4  
     6.5      u64   dom;
     6.6      char *image, *ramdisk = NULL, *cmdline = "";
     6.7 -    int   control_evtchn;
     6.8 +    int   control_evtchn, io_priv = 0;
     6.9  
    6.10      static char *kwd_list[] = { "dom", "control_evtchn", 
    6.11 -                                "image", "ramdisk", "cmdline", NULL };
    6.12 +                                "image", "ramdisk", "cmdline", "io_priv",
    6.13 +				NULL };
    6.14  
    6.15 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, 
    6.16 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, 
    6.17                                        &dom, &control_evtchn, 
    6.18 -                                      &image, &ramdisk, &cmdline) )
    6.19 +                                      &image, &ramdisk, &cmdline, &io_priv) )
    6.20          return NULL;
    6.21  
    6.22      if ( xc_linux_build(xc->xc_handle, dom, image, 
    6.23 -                        ramdisk, cmdline, control_evtchn) != 0 )
    6.24 +                        ramdisk, cmdline, control_evtchn, io_priv) != 0 )
    6.25          return PyErr_SetFromErrno(xc_error);
    6.26      
    6.27      Py_INCREF(zero);
    6.28 @@ -254,18 +255,19 @@ static PyObject *pyxc_netbsd_build(PyObj
    6.29  
    6.30      u64   dom;
    6.31      char *image, *ramdisk = NULL, *cmdline = "";
    6.32 -    int   control_evtchn;
    6.33 +    int   control_evtchn, io_priv = 0;
    6.34  
    6.35      static char *kwd_list[] = { "dom", "control_evtchn",
    6.36 -                                "image", "ramdisk", "cmdline", NULL };
    6.37 +                                "image", "ramdisk", "cmdline", "io_priv",
    6.38 +				NULL };
    6.39  
    6.40 -    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, 
    6.41 +    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, 
    6.42                                        &dom, &control_evtchn,
    6.43 -                                      &image, &ramdisk, &cmdline) )
    6.44 +                                      &image, &ramdisk, &cmdline, &io_priv) )
    6.45          return NULL;
    6.46  
    6.47      if ( xc_netbsd_build(xc->xc_handle, dom, image, 
    6.48 -                         cmdline, control_evtchn) != 0 )
    6.49 +                         cmdline, control_evtchn, io_priv) != 0 )
    6.50          return PyErr_SetFromErrno(xc_error);
    6.51      
    6.52      Py_INCREF(zero);
    6.53 @@ -1160,7 +1162,8 @@ static PyMethodDef pyxc_methods[] = {
    6.54        " dom     [long]:     Identifier of domain to build into.\n"
    6.55        " image   [str]:      Name of kernel image file. May be gzipped.\n"
    6.56        " ramdisk [str, n/a]: Name of ramdisk file, if any.\n"
    6.57 -      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
    6.58 +      " cmdline [str, n/a]: Kernel parameters, if any.\n"
    6.59 +      " io_priv [boolean]:  Does the domain have IO privileges?\n\n"
    6.60        "Returns: [int] 0 on success; -1 on error.\n" },
    6.61  
    6.62      { "netbsd_build", 
    6.63 @@ -1169,7 +1172,8 @@ static PyMethodDef pyxc_methods[] = {
    6.64        "Build a new NetBSD guest OS.\n"
    6.65        " dom     [long]:     Identifier of domain to build into.\n"
    6.66        " image   [str]:      Name of kernel image file. May be gzipped.\n"
    6.67 -      " cmdline [str, n/a]: Kernel parameters, if any.\n\n"
    6.68 +      " cmdline [str, n/a]: Kernel parameters, if any.\n"
    6.69 +      " io_priv [boolean]:  Does the domain have IO privileges?\n\n"
    6.70        "Returns: [int] 0 on success; -1 on error.\n" },
    6.71  
    6.72      { "bvtsched_global_set",
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/tools/xend/lib/blkif.py	Sun May 02 00:52:45 2004 +0000
     7.3 @@ -0,0 +1,143 @@
     7.4 +
     7.5 +#################################################################
     7.6 +## xend/blkif.py -- Block-interface management functions for Xend
     7.7 +## Copyright (c) 2004, K A Fraser (University of Cambridge)
     7.8 +#################################################################
     7.9 +
    7.10 +import errno, re, os, select, signal, socket, struct, sys
    7.11 +import xend.main, xend.console, xend.manager, xend.utils, Xc
    7.12 +
    7.13 +CMSG_BLKIF_BE = 1
    7.14 +CMSG_BLKIF_FE = 2
    7.15 +CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED =  0
    7.16 +CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED    = 32
    7.17 +CMSG_BLKIF_FE_INTERFACE_CONNECT        = 33
    7.18 +CMSG_BLKIF_FE_INTERFACE_DISCONNECT     = 34
    7.19 +CMSG_BLKIF_BE_CREATE      = 0
    7.20 +CMSG_BLKIF_BE_DESTROY     = 1
    7.21 +CMSG_BLKIF_BE_CONNECT     = 2
    7.22 +CMSG_BLKIF_BE_DISCONNECT  = 3
    7.23 +CMSG_BLKIF_BE_VBD_CREATE  = 4
    7.24 +CMSG_BLKIF_BE_VBD_DESTROY = 5
    7.25 +CMSG_BLKIF_BE_VBD_GROW    = 6
    7.26 +CMSG_BLKIF_BE_VBD_SHRINK  = 7
    7.27 +
    7.28 +pendmsg = None
    7.29 +pendaddr = None
    7.30 +
    7.31 +def backend_tx_req(msg):
    7.32 +    port = xend.main.dom0_port
    7.33 +    if port.space_to_write_request():
    7.34 +        port.write_request(msg)
    7.35 +        port.notify()
    7.36 +    else:
    7.37 +        xend.blkif.pendmsg = msg
    7.38 +
    7.39 +def backend_rx_req(port, msg):
    7.40 +    port.write_response(msg)
    7.41 +
    7.42 +def backend_rx_rsp(port, msg):
    7.43 +    subtype = (msg.get_header())['subtype']
    7.44 +    print "Received blkif-be response, subtype %d" % subtype
    7.45 +    if subtype == CMSG_BLKIF_BE_CREATE:
    7.46 +        rsp = { 'success': True }
    7.47 +        xend.main.send_management_response(rsp, xend.blkif.pendaddr)
    7.48 +    elif subtype == CMSG_BLKIF_BE_CONNECT:
    7.49 +        (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload())
    7.50 +        blkif = interface.list[xend.main.port_from_dom(dom).local_port]
    7.51 +        msg = xend.utils.message(CMSG_BLKIF_FE, \
    7.52 +                                 CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
    7.53 +        msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2']))
    7.54 +        blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg)
    7.55 +    elif subtype == CMSG_BLKIF_BE_VBD_CREATE:
    7.56 +        (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload())
    7.57 +        blkif = interface.list[xend.main.port_from_dom(dom).local_port]
    7.58 +        (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev]
    7.59 +        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0)
    7.60 +        msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \
    7.61 +                                       pdev,start_sect,nr_sect,0))
    7.62 +        backend_tx_req(msg)
    7.63 +    elif subtype == CMSG_BLKIF_BE_VBD_GROW:
    7.64 +        rsp = { 'success': True }
    7.65 +        xend.main.send_management_response(rsp, xend.blkif.pendaddr)
    7.66 +
    7.67 +def backend_do_work(port):
    7.68 +    global pendmsg
    7.69 +    if pendmsg and port.space_to_write_request():
    7.70 +        port.write_request(pendmsg)
    7.71 +        pendmsg = None
    7.72 +        return True
    7.73 +    return False
    7.74 +
    7.75 +
    7.76 +class interface:
    7.77 +
    7.78 +    # Dictionary of all block-device interfaces.
    7.79 +    list = {}
    7.80 +
    7.81 +
    7.82 +    # NB. 'key' is an opaque value that has no meaning in this class.
    7.83 +    def __init__(self, dom, key):
    7.84 +        self.dom     = dom
    7.85 +        self.key     = key
    7.86 +        self.devices = {}
    7.87 +        self.pendmsg = None
    7.88 +        interface.list[key] = self
    7.89 +        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0)
    7.90 +        msg.append_payload(struct.pack("QII",dom,0,0))
    7.91 +        xend.blkif.pendaddr = xend.main.mgmt_req_addr
    7.92 +        backend_tx_req(msg)
    7.93 +
    7.94 +    # Attach a device to the specified interface
    7.95 +    def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly):
    7.96 +        if self.devices.has_key(vdev):
    7.97 +            return False
    7.98 +        self.devices[vdev] = (pdev, start_sect, nr_sect, readonly)
    7.99 +        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0)
   7.100 +        msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0))
   7.101 +        xend.blkif.pendaddr = xend.main.mgmt_req_addr
   7.102 +        backend_tx_req(msg)
   7.103 +        return True
   7.104 +
   7.105 +
   7.106 +    # Completely destroy this interface.
   7.107 +    def destroy(self):
   7.108 +        del interface.list[self.key]
   7.109 +        msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0)
   7.110 +        msg.append_payload(struct.pack("QII",self.dom,0,0))
   7.111 +        backend_tx_req(msg)        
   7.112 +
   7.113 +
   7.114 +    # The parameter @port is the control-interface event channel. This method
   7.115 +    # returns True if messages were written to the control interface.
   7.116 +    def ctrlif_transmit_work(self, port):
   7.117 +        if self.pendmsg and port.space_to_write_request():
   7.118 +            port.write_request(self.pendmsg)
   7.119 +            self.pendmsg = None
   7.120 +            return True
   7.121 +        return False
   7.122 +
   7.123 +    def ctrlif_tx_req(self, port, msg):
   7.124 +        if port.space_to_write_request():
   7.125 +            port.write_request(msg)
   7.126 +            port.notify()
   7.127 +        else:
   7.128 +            self.pendmsg = msg
   7.129 +
   7.130 +    def ctrlif_rx_req(self, port, msg):
   7.131 +        port.write_response(msg)
   7.132 +        subtype = (msg.get_header())['subtype']
   7.133 +        if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
   7.134 +            msg = xend.utils.message(CMSG_BLKIF_FE, \
   7.135 +                                     CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0)
   7.136 +            msg.append_payload(struct.pack("III",0,1,0))
   7.137 +            self.ctrlif_tx_req(port, msg)
   7.138 +        elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT:
   7.139 +            (hnd,frame) = struct.unpack("IL", msg.get_payload())
   7.140 +            xc = Xc.new()
   7.141 +            self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom)
   7.142 +            msg = xend.utils.message(CMSG_BLKIF_BE, \
   7.143 +                                     CMSG_BLKIF_BE_CONNECT, 0)
   7.144 +            msg.append_payload(struct.pack("QIILI",self.dom,0, \
   7.145 +                                           self.evtchn['port1'],frame,0))
   7.146 +            backend_tx_req(msg)
     8.1 --- a/tools/xend/lib/console.py	Sun May 02 00:27:58 2004 +0000
     8.2 +++ b/tools/xend/lib/console.py	Sun May 02 00:52:45 2004 +0000
     8.3 @@ -5,7 +5,7 @@
     8.4  #############################################################
     8.5  
     8.6  import errno, re, os, select, signal, socket, struct, sys
     8.7 -
     8.8 +import xend.blkif, xend.main, xend.manager, xend.utils, Xc
     8.9  
    8.10  ##
    8.11  ## interface:
    8.12 @@ -16,7 +16,7 @@ import errno, re, os, select, signal, so
    8.13  ##   CONNECTED: sending/receiving console data on TCP port 'self.port'
    8.14  ##
    8.15  ##  A dictionary of all active interfaces, indexed by TCP socket descriptor,
    8.16 -##  is accessible as 'interface.interface_list'.
    8.17 +##  is accessible as 'interface.list_by_fd'.
    8.18  ##
    8.19  ##  NB. When a class instance is to be destroyed you *must* call the 'close'
    8.20  ##  method. Otherwise a stale reference will eb left in the interface list.
    8.21 @@ -30,7 +30,11 @@ class interface:
    8.22  
    8.23  
    8.24      # Dictionary of all active (non-closed) console interfaces.
    8.25 -    interface_list = {}
    8.26 +    list_by_fd = {}
    8.27 +
    8.28 +
    8.29 +    # Dictionary of all console interfaces, closed and open.
    8.30 +    list = {}
    8.31  
    8.32  
    8.33      # NB. 'key' is an opaque value that has no meaning in this class.
    8.34 @@ -38,6 +42,9 @@ class interface:
    8.35          self.status = interface.CLOSED
    8.36          self.port   = port
    8.37          self.key    = key
    8.38 +        self.rbuf   = xend.utils.buffer()
    8.39 +        self.wbuf   = xend.utils.buffer()
    8.40 +        interface.list[key] = self
    8.41  
    8.42  
    8.43      # Is this interface closed (inactive)?
    8.44 @@ -58,14 +65,14 @@ class interface:
    8.45      # Close the interface, if it is not closed already.
    8.46      def close(self):
    8.47          if not self.closed():
    8.48 -            del interface.interface_list[self.sock.fileno()]
    8.49 +            del interface.list_by_fd[self.sock.fileno()]
    8.50              self.sock.close()
    8.51              del self.sock
    8.52              self.status = interface.CLOSED
    8.53  
    8.54  
    8.55      # Move the interface into the 'listening' state. Opens a new listening
    8.56 -    # socket and updates 'interface_list'.
    8.57 +    # socket and updates 'list_by_fd'.
    8.58      def listen(self):
    8.59          # Close old socket (if any), and create a fresh one.
    8.60          self.close()
    8.61 @@ -80,7 +87,7 @@ class interface:
    8.62  
    8.63              # Announce the new status of thsi interface.
    8.64              self.status = interface.LISTENING
    8.65 -            interface.interface_list[self.sock.fileno()] = self
    8.66 +            interface.list_by_fd[self.sock.fileno()] = self
    8.67  
    8.68          except:
    8.69              # In case of trouble ensure we get rid of dangling socket reference
    8.70 @@ -105,7 +112,69 @@ class interface:
    8.71          # Publish the new socket and the new interface state.
    8.72          self.sock = sock
    8.73          self.status = interface.CONNECTED
    8.74 -        interface.interface_list[self.sock.fileno()] = self
    8.75 +        interface.list_by_fd[self.sock.fileno()] = self
    8.76          return 1
    8.77  
    8.78  
    8.79 +    # Completely sestroy a console interface.
    8.80 +    def destroy(self):
    8.81 +        self.close()
    8.82 +        del interface.list[self.key]
    8.83 +
    8.84 +
    8.85 +    # Do work triggered by resource availability on a console-interface socket.
    8.86 +    def socket_work(self):
    8.87 +        # If the interface is listening, check for pending connections.
    8.88 +        if self.listening():
    8.89 +            self.connect()
    8.90 +
    8.91 +        # All done if the interface is not connected.
    8.92 +        if not self.connected():
    8.93 +            return
    8.94 +
    8.95 +        # Send as much pending data as possible via the socket.
    8.96 +        while not self.rbuf.empty():
    8.97 +            try:
    8.98 +                bytes = self.sock.send(self.rbuf.peek())
    8.99 +                if bytes > 0:
   8.100 +                    self.rbuf.discard(bytes)
   8.101 +            except socket.error, error:
   8.102 +                pass
   8.103 +
   8.104 +        # Read as much data as is available. Don't worry about
   8.105 +        # overflowing our buffer: it's more important to read the
   8.106 +        # incoming data stream and detect errors or closure of the
   8.107 +        # remote end in a timely manner.
   8.108 +        try:
   8.109 +            while 1:
   8.110 +                data = self.sock.recv(2048)
   8.111 +                # Return of zero means the remote end has disconnected.
   8.112 +                # We therefore return the console interface to listening.
   8.113 +                if not data:
   8.114 +                    self.listen()
   8.115 +                    break
   8.116 +                self.wbuf.write(data)
   8.117 +        except socket.error, error:
   8.118 +            # Assume that most errors mean that the connection is dead.
   8.119 +            # In such cases we return the interface to 'listening' state.
   8.120 +            if error[0] != errno.EAGAIN:
   8.121 +                print "Better return to listening"
   8.122 +                self.listen()
   8.123 +                print "New status: " + str(self.status)
   8.124 +
   8.125 +
   8.126 +    # The parameter @port is the control-interface event channel. This method
   8.127 +    # returns True if messages were written to the control interface.
   8.128 +    def ctrlif_transmit_work(self, port):
   8.129 +        work_done = False
   8.130 +        while not self.wbuf.empty() and port.space_to_write_request():
   8.131 +            msg = xend.utils.message(0, 0, 0)
   8.132 +            msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD))
   8.133 +            port.write_request(msg)
   8.134 +            work_done = True
   8.135 +        return work_done
   8.136 +
   8.137 +
   8.138 +    def ctrlif_rx_req(self, port, msg):
   8.139 +        self.rbuf.write(msg.get_payload())
   8.140 +        port.write_response(msg)
     9.1 --- a/tools/xend/lib/domain_controller.h	Sun May 02 00:27:58 2004 +0000
     9.2 +++ b/tools/xend/lib/domain_controller.h	Sun May 02 00:52:45 2004 +0000
     9.3 @@ -56,29 +56,113 @@ typedef struct {
     9.4  #define CMSG_BLKIF_BE           1  /* Block-device backend  */
     9.5  #define CMSG_BLKIF_FE           2  /* Block-device frontend */
     9.6  
     9.7 +
     9.8 +/******************************************************************************
     9.9 + * CONSOLE DEFINITIONS
    9.10 + */
    9.11 +
    9.12  /*
    9.13   * Subtypes for console messages.
    9.14   */
    9.15  #define CMSG_CONSOLE_DATA       0
    9.16  
    9.17 -/*
    9.18 - * Subtypes for block-device messages.
    9.19 - */
    9.20 -#define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
    9.21 -#define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
    9.22 -#define CMSG_BLKIF_BE_VBD_CREATE  2  /* Create a new VBD for an interface.   */
    9.23 -#define CMSG_BLKIF_BE_VBD_DESTROY 3  /* Delete a VBD from an interface.      */
    9.24 -#define CMSG_BLKIF_BE_VBD_GROW    4  /* Append an extent to a given VBD.     */
    9.25 -#define CMSG_BLKIF_BE_VBD_SHRINK  5  /* Remove last extent from a given VBD. */
    9.26  
    9.27 -/*
    9.28 - * Message request/response defintions for block-device messages.
    9.29 +/******************************************************************************
    9.30 + * BLOCK-INTERFACE FRONTEND DEFINITIONS
    9.31   */
    9.32  
    9.33 +/* Messages from domain controller to guest. */
    9.34 +#define CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED   0
    9.35 +
    9.36 +/* Messages from guest to domain controller. */
    9.37 +#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED     32
    9.38 +#define CMSG_BLKIF_FE_INTERFACE_CONNECT         33
    9.39 +#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT      34
    9.40 +
    9.41 +/* These are used by both front-end and back-end drivers. */
    9.42  #define blkif_vdev_t   u16
    9.43  #define blkif_pdev_t   u16
    9.44  #define blkif_sector_t u64
    9.45  
    9.46 +/*
    9.47 + * CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
    9.48 + *  Notify a guest about a status change on one of its block interfaces.
    9.49 + *  If the interface is DESTROYED or DOWN then the interface is disconnected:
    9.50 + *   1. The shared-memory frame is available for reuse.
    9.51 + *   2. Any unacknowledged messgaes pending on the interface were dropped.
    9.52 + */
    9.53 +#define BLKIF_INTERFACE_STATUS_DESTROYED    0 /* Interface doesn't exist.    */
    9.54 +#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */
    9.55 +#define BLKIF_INTERFACE_STATUS_CONNECTED    2 /* Exists and is connected.    */
    9.56 +typedef struct {
    9.57 +    unsigned int handle;
    9.58 +    unsigned int status;
    9.59 +    unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */
    9.60 +} blkif_fe_interface_status_changed_t;
    9.61 +
    9.62 +/*
    9.63 + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED:
    9.64 + *  Notify the domain controller that the front-end driver is DOWN or UP.
    9.65 + *  When the driver goes DOWN then the controller will send no more
    9.66 + *  status-change notifications. When the driver comes UP then the controller
    9.67 + *  will send a notification for each interface that currently exists.
    9.68 + *  If the driver goes DOWN while interfaces are still UP, the domain
    9.69 + *  will automatically take the interfaces DOWN.
    9.70 + */
    9.71 +#define BLKIF_DRIVER_STATUS_DOWN   0
    9.72 +#define BLKIF_DRIVER_STATUS_UP     1
    9.73 +typedef struct {
    9.74 +    unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
    9.75 +} blkif_fe_driver_status_changed_t;
    9.76 +
    9.77 +/*
    9.78 + * CMSG_BLKIF_FE_INTERFACE_CONNECT:
    9.79 + *  If successful, the domain controller will acknowledge with a
    9.80 + *  STATUS_CONNECTED message.
    9.81 + */
    9.82 +typedef struct {
    9.83 +    unsigned int  handle;
    9.84 +    unsigned long shmem_frame;
    9.85 +} blkif_fe_interface_connect_t;
    9.86 +
    9.87 +/*
    9.88 + * CMSG_BLKIF_FE_INTERFACE_DISCONNECT:
    9.89 + *  If successful, the domain controller will acknowledge with a
    9.90 + *  STATUS_DISCONNECTED message.
    9.91 + */
    9.92 +typedef struct {
    9.93 +    /* IN */
    9.94 +    unsigned int handle;
    9.95 +    /* OUT */
    9.96 +    /*
    9.97 +     * Tells driver how many interfaces it should expect to immediately
    9.98 +     * receive notifications about.
    9.99 +     */
   9.100 +    unsigned int nr_interfaces;
   9.101 +} blkif_fe_interface_disconnect_t;
   9.102 +
   9.103 +
   9.104 +/******************************************************************************
   9.105 + * BLOCK-INTERFACE BACKEND DEFINITIONS
   9.106 + */
   9.107 +
   9.108 +/* Messages from domain controller. */
   9.109 +#define CMSG_BLKIF_BE_CREATE      0  /* Create a new block-device interface. */
   9.110 +#define CMSG_BLKIF_BE_DESTROY     1  /* Destroy a block-device interface.    */
   9.111 +#define CMSG_BLKIF_BE_CONNECT     2  /* Connect i/f to remote driver.        */
   9.112 +#define CMSG_BLKIF_BE_DISCONNECT  3  /* Disconnect i/f from remote driver.   */
   9.113 +#define CMSG_BLKIF_BE_VBD_CREATE  4  /* Create a new VBD for an interface.   */
   9.114 +#define CMSG_BLKIF_BE_VBD_DESTROY 5  /* Delete a VBD from an interface.      */
   9.115 +#define CMSG_BLKIF_BE_VBD_GROW    6  /* Append an extent to a given VBD.     */
   9.116 +#define CMSG_BLKIF_BE_VBD_SHRINK  7  /* Remove last extent from a given VBD. */
   9.117 +
   9.118 +/* Messages to domain controller. */
   9.119 +#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32
   9.120 +
   9.121 +/*
   9.122 + * Message request/response definitions for block-device messages.
   9.123 + */
   9.124 +
   9.125  typedef struct {
   9.126      blkif_pdev_t   device;
   9.127      blkif_sector_t sector_start;
   9.128 @@ -86,21 +170,66 @@ typedef struct {
   9.129  } blkif_extent_t;
   9.130  
   9.131  /* Non-specific 'okay' return. */
   9.132 -#define BLKIF_STATUS_OKAY                0
   9.133 +#define BLKIF_BE_STATUS_OKAY                0
   9.134  /* Non-specific 'error' return. */
   9.135 -#define BLKIF_STATUS_ERROR               1
   9.136 +#define BLKIF_BE_STATUS_ERROR               1
   9.137  /* The following are specific error returns. */
   9.138 -#define BLKIF_STATUS_INTERFACE_EXISTS    2
   9.139 -#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3
   9.140 +#define BLKIF_BE_STATUS_INTERFACE_EXISTS    2
   9.141 +#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3
   9.142 +#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4
   9.143 +#define BLKIF_BE_STATUS_VBD_EXISTS          5
   9.144 +#define BLKIF_BE_STATUS_VBD_NOT_FOUND       6
   9.145 +#define BLKIF_BE_STATUS_OUT_OF_MEMORY       7
   9.146 +#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND    8
   9.147 +#define BLKIF_BE_STATUS_MAPPING_ERROR       9
   9.148  
   9.149  /* This macro can be used to create an array of descriptive error strings. */
   9.150 -#define BLKIF_STATUS_ERRORS {    \
   9.151 -    "Okay",                      \
   9.152 -    "Non-specific error",        \
   9.153 -    "Interface already exists",  \
   9.154 -    "Interface not found" }
   9.155 +#define BLKIF_BE_STATUS_ERRORS {    \
   9.156 +    "Okay",                         \
   9.157 +    "Non-specific error",           \
   9.158 +    "Interface already exists",     \
   9.159 +    "Interface not found",          \
   9.160 +    "Interface is still connected", \
   9.161 +    "VBD already exists",           \
   9.162 +    "VBD not found",                \
   9.163 +    "Out of memory",                \
   9.164 +    "Extent not found for VBD",     \
   9.165 +    "Could not map domain memory" }
   9.166  
   9.167 -/* CMSG_BLKIF_CREATE */
   9.168 +/*
   9.169 + * CMSG_BLKIF_BE_CREATE:
   9.170 + *  When the driver sends a successful response then the interface is fully
   9.171 + *  created. The controller will send a DOWN notification to the front-end
   9.172 + *  driver.
   9.173 + */
   9.174 +typedef struct { 
   9.175 +    /* IN */
   9.176 +    domid_t        domid;             /* Domain attached to new interface.   */
   9.177 +    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
   9.178 +    /* OUT */
   9.179 +    unsigned int   status;
   9.180 +} blkif_be_create_t; 
   9.181 +
   9.182 +/*
   9.183 + * CMSG_BLKIF_BE_DESTROY:
   9.184 + *  When the driver sends a successful response then the interface is fully
   9.185 + *  torn down. The controller will send a DESTROYED notification to the
   9.186 + *  front-end driver.
   9.187 + */
   9.188 +typedef struct { 
   9.189 +    /* IN */
   9.190 +    domid_t        domid;             /* Identify interface to be destroyed. */
   9.191 +    unsigned int   blkif_handle;      /* ...ditto...                         */
   9.192 +    /* OUT */
   9.193 +    unsigned int   status;
   9.194 +} blkif_be_destroy_t; 
   9.195 +
   9.196 +/*
   9.197 + * CMSG_BLKIF_BE_CONNECT:
   9.198 + *  When the driver sends a successful response then the interface is fully
   9.199 + *  connected. The controller will send a CONNECTED notification to the
   9.200 + *  front-end driver.
   9.201 + */
   9.202  typedef struct { 
   9.203      /* IN */
   9.204      domid_t        domid;             /* Domain attached to new interface.   */
   9.205 @@ -109,18 +238,23 @@ typedef struct {
   9.206      unsigned long  shmem_frame;       /* Page cont. shared comms window.     */
   9.207      /* OUT */
   9.208      unsigned int   status;
   9.209 -} blkif_create_t; 
   9.210 +} blkif_be_connect_t; 
   9.211  
   9.212 -/* CMSG_BLKIF_DESTROY */
   9.213 +/*
   9.214 + * CMSG_BLKIF_BE_DISCONNECT:
   9.215 + *  When the driver sends a successful response then the interface is fully
   9.216 + *  disconnected. The controller will send a DOWN notification to the front-end
   9.217 + *  driver.
   9.218 + */
   9.219  typedef struct { 
   9.220      /* IN */
   9.221 -    domid_t        domid;             /* Identify interface to be destroyed. */
   9.222 -    unsigned int   blkif_handle;      /* ...ditto...                         */
   9.223 +    domid_t        domid;             /* Domain attached to new interface.   */
   9.224 +    unsigned int   blkif_handle;      /* Domain-specific interface handle.   */
   9.225      /* OUT */
   9.226      unsigned int   status;
   9.227 -} blkif_destroy_t; 
   9.228 +} blkif_be_disconnect_t; 
   9.229  
   9.230 -/* CMSG_BLKIF_VBD_CREATE */
   9.231 +/* CMSG_BLKIF_BE_VBD_CREATE */
   9.232  typedef struct { 
   9.233      /* IN */
   9.234      domid_t        domid;             /* Identify blkdev interface.          */
   9.235 @@ -129,9 +263,9 @@ typedef struct {
   9.236      int            readonly;          /* Non-zero -> VBD isn't writeable.    */
   9.237      /* OUT */
   9.238      unsigned int   status;
   9.239 -} blkif_vbd_create_t; 
   9.240 +} blkif_be_vbd_create_t; 
   9.241  
   9.242 -/* CMSG_BLKIF_VBD_DESTROY */
   9.243 +/* CMSG_BLKIF_BE_VBD_DESTROY */
   9.244  typedef struct {
   9.245      /* IN */
   9.246      domid_t        domid;             /* Identify blkdev interface.          */
   9.247 @@ -139,9 +273,9 @@ typedef struct {
   9.248      blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   9.249      /* OUT */
   9.250      unsigned int   status;
   9.251 -} blkif_vbd_destroy_t; 
   9.252 +} blkif_be_vbd_destroy_t; 
   9.253  
   9.254 -/* CMSG_BLKIF_VBD_GROW */
   9.255 +/* CMSG_BLKIF_BE_VBD_GROW */
   9.256  typedef struct { 
   9.257      /* IN */
   9.258      domid_t        domid;             /* Identify blkdev interface.          */
   9.259 @@ -150,9 +284,9 @@ typedef struct {
   9.260      blkif_extent_t extent;            /* Physical extent to append to VBD.   */
   9.261      /* OUT */
   9.262      unsigned int   status;
   9.263 -} blkif_vbd_grow_t; 
   9.264 +} blkif_be_vbd_grow_t; 
   9.265  
   9.266 -/* CMSG_BLKIF_VBD_SHRINK */
   9.267 +/* CMSG_BLKIF_BE_VBD_SHRINK */
   9.268  typedef struct { 
   9.269      /* IN */
   9.270      domid_t        domid;             /* Identify blkdev interface.          */
   9.271 @@ -160,6 +294,23 @@ typedef struct {
   9.272      blkif_vdev_t   vdevice;           /* Interface-specific id of the VBD.   */
   9.273      /* OUT */
   9.274      unsigned int   status;
   9.275 -} blkif_vbd_shrink_t; 
   9.276 +} blkif_be_vbd_shrink_t; 
   9.277 +
   9.278 +/*
   9.279 + * CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED:
   9.280 + *  Notify the domain controller that the back-end driver is DOWN or UP.
   9.281 + *  If the driver goes DOWN while interfaces are still UP, the domain
   9.282 + *  will automatically send DOWN notifications.
   9.283 + */
   9.284 +typedef struct {
   9.285 +    /* IN */
   9.286 +    unsigned int status; /* BLKIF_DRIVER_STATUS_??? */
   9.287 +    /* OUT */
   9.288 +    /*
   9.289 +     * Tells driver how many interfaces it should expect to immediately
   9.290 +     * receive notifications about.
   9.291 +     */
   9.292 +    unsigned int nr_interfaces;
   9.293 +} blkif_be_driver_status_changed_t;
   9.294  
   9.295  #endif /* __DOMAIN_CONTROLLER_H__ */
    10.1 --- a/tools/xend/lib/main.py	Sun May 02 00:27:58 2004 +0000
    10.2 +++ b/tools/xend/lib/main.py	Sun May 02 00:52:45 2004 +0000
    10.3 @@ -5,7 +5,7 @@
    10.4  ###########################################################
    10.5  
    10.6  import errno, re, os, pwd, select, signal, socket, struct, sys, time
    10.7 -import xend.console, xend.manager, xend.utils, Xc
    10.8 +import xend.blkif, xend.console, xend.manager, xend.utils, Xc
    10.9  
   10.10  
   10.11  # The following parameters could be placed in a configuration file.
   10.12 @@ -16,13 +16,35 @@ CONTROL_DIR  = '/var/run/xend'
   10.13  UNIX_SOCK    = 'management_sock' # relative to CONTROL_DIR
   10.14  
   10.15  
   10.16 +CMSG_CONSOLE  = 0
   10.17 +CMSG_BLKIF_BE = 1
   10.18 +CMSG_BLKIF_FE = 2
   10.19 +
   10.20 +
   10.21 +def port_from_dom(dom):
   10.22 +    global port_list
   10.23 +    for idx, port in port_list.items():
   10.24 +        if port.remote_dom == dom:
   10.25 +            return port
   10.26 +    return None
   10.27 +
   10.28 +
   10.29 +def send_management_response(response, addr):
   10.30 +    try:
   10.31 +        response = str(response)
   10.32 +        print "Mgmt_rsp[%s]: %s" % (addr, response)
   10.33 +        management_interface.sendto(response, addr)
   10.34 +    except socket.error, error:
   10.35 +        pass
   10.36 +
   10.37 +
   10.38  def daemon_loop():
   10.39      # Could we do this more nicely? The xend.manager functions need access
   10.40      # to this global state to do their work.
   10.41 -    global control_list, notifier
   10.42 +    global port_list, notifier, management_interface, mgmt_req_addr, dom0_port
   10.43  
   10.44 -    # List of all control interfaces, indexed by local event-channel port.
   10.45 -    control_list = {}
   10.46 +    # Lists of all interfaces, indexed by local event-channel port.
   10.47 +    port_list = {}
   10.48  
   10.49      xc = Xc.new()
   10.50  
   10.51 @@ -44,6 +66,13 @@ def daemon_loop():
   10.52      # notifications.
   10.53      notifier = xend.utils.notifier()
   10.54  
   10.55 +    # The DOM0 control interface is not set up via the management interface.
   10.56 +    # Note that console messages don't come our way (actually, only driver
   10.57 +    # back-ends should use the DOM0 control interface).
   10.58 +    dom0_port = xend.utils.port(0)
   10.59 +    notifier.bind(dom0_port.local_port)
   10.60 +    port_list[dom0_port.local_port] = dom0_port
   10.61 +
   10.62      ##
   10.63      ## MAIN LOOP
   10.64      ## 
   10.65 @@ -58,10 +87,10 @@ def daemon_loop():
   10.66          waitset = select.poll()
   10.67          waitset.register(management_interface, select.POLLIN)
   10.68          waitset.register(notifier, select.POLLIN)
   10.69 -        for idx, (port, rbuf, wbuf, con_if) in control_list.items():
   10.70 +        for idx, con_if in xend.console.interface.list_by_fd.items():
   10.71              if not con_if.closed():
   10.72                  pflags = select.POLLIN
   10.73 -                if not rbuf.empty() and con_if.connected():
   10.74 +                if not con_if.rbuf.empty() and con_if.connected():
   10.75                      pflags = select.POLLIN | select.POLLOUT
   10.76                  waitset.register(con_if.sock.fileno(), pflags)
   10.77  
   10.78 @@ -72,16 +101,16 @@ def daemon_loop():
   10.79          # These should consist of executable Python statements that call
   10.80          # well-known management functions (e.g., new_control_interface(dom=9)).
   10.81          try:
   10.82 -            data, addr = management_interface.recvfrom(2048)
   10.83 +            data, mgmt_req_addr = management_interface.recvfrom(2048)
   10.84          except socket.error, error:
   10.85              if error[0] != errno.EAGAIN:
   10.86                  raise
   10.87          else:
   10.88 -            if addr:
   10.89 +            if mgmt_req_addr:
   10.90                  # Evaluate the request in an exception-trapping sandbox.
   10.91                  try:
   10.92 -                    print "Mgmt_req[%s]: %s" % (addr, data)
   10.93 -                    response = str(eval('xend.manager.'+data))
   10.94 +                    print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data)
   10.95 +                    response = eval('xend.manager.'+data)
   10.96  
   10.97                  except:
   10.98                      # Catch all exceptions and turn into an error response:
   10.99 @@ -97,69 +126,20 @@ def daemon_loop():
  10.100                      response = str(response)
  10.101  
  10.102                  # Try to send a response to the requester.
  10.103 -                try:
  10.104 -                    print "Mgmt_rsp[%s]: %s" % (addr, response)
  10.105 -                    management_interface.sendto(response, addr)
  10.106 -                except socket.error, error:
  10.107 -                    pass
  10.108 +                if response:
  10.109 +                    send_management_response(response, mgmt_req_addr)
  10.110                  
  10.111          # Do work for every console interface that hit in the poll set.
  10.112          for (fd, events) in fdset:
  10.113 -            if not xend.console.interface.interface_list.has_key(fd):
  10.114 -                continue
  10.115 -            con_if = xend.console.interface.interface_list[fd]
  10.116 -
  10.117 -            # If the interface is listening, check for pending connections.
  10.118 -            if con_if.listening():
  10.119 -                con_if.connect()
  10.120 -
  10.121 -            # All done if the interface is not connected.
  10.122 -            if not con_if.connected():
  10.123 -                continue
  10.124 -            (port, rbuf, wbuf, con_if) = control_list[con_if.key]
  10.125 -
  10.126 -            # Send as much pending data as possible via the socket.
  10.127 -            while not rbuf.empty():
  10.128 -                try:
  10.129 -                    bytes = con_if.sock.send(rbuf.peek())
  10.130 -                    if bytes > 0:
  10.131 -                        rbuf.discard(bytes)
  10.132 -                except socket.error, error:
  10.133 -                    pass
  10.134 -
  10.135 -            # Read as much data as is available. Don't worry about
  10.136 -            # overflowing our buffer: it's more important to read the
  10.137 -            # incoming data stream and detect errors or closure of the
  10.138 -            # remote end in a timely manner.
  10.139 -            try:
  10.140 -                while 1:
  10.141 -                    data = con_if.sock.recv(2048)
  10.142 -                    # Return of zero means the remote end has disconnected.
  10.143 -                    # We therefore return the console interface to listening.
  10.144 -                    if not data:
  10.145 -                        con_if.listen()
  10.146 -                        break
  10.147 -                    wbuf.write(data)
  10.148 -            except socket.error, error:
  10.149 -                # Assume that most errors mean that the connection is dead.
  10.150 -                # In such cases we return the interface to 'listening' state.
  10.151 -                if error[0] != errno.EAGAIN:
  10.152 -                    print "Better return to listening"
  10.153 -                    con_if.listen()
  10.154 -                    print "New status: " + str(con_if.status)
  10.155 -
  10.156 -            # We may now have pending data to send via the relevant
  10.157 -            # inter-domain control interface. If so then we send all we can
  10.158 -            # and notify the remote end.
  10.159 -            work_done = False
  10.160 -            while not wbuf.empty() and port.space_to_write_request():
  10.161 -                msg = xend.utils.message(0, 0, 0)
  10.162 -                msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
  10.163 -                port.write_request(msg)
  10.164 -                work_done = True
  10.165 -            if work_done:
  10.166 -                port.notify()
  10.167 -
  10.168 +            if xend.console.interface.list_by_fd.has_key(fd):
  10.169 +                con_if = xend.console.interface.list_by_fd[fd]
  10.170 +                con_if.socket_work()
  10.171 +                # We may now have pending data to send via the control
  10.172 +                # interface. If so then send all we can and notify the remote.
  10.173 +                port = port_list[con_if.key]
  10.174 +                if con_if.ctrlif_transmit_work(port):
  10.175 +                    port.notify()
  10.176 +                    
  10.177          # Process control-interface notifications from other guest OSes.
  10.178          while 1:            
  10.179              # Grab a notification, if there is one.
  10.180 @@ -168,42 +148,69 @@ def daemon_loop():
  10.181                  break
  10.182              (idx, type) = notification
  10.183  
  10.184 -            if not control_list.has_key(idx):
  10.185 +            if not port_list.has_key(idx):
  10.186                  continue
  10.187  
  10.188 -            (port, rbuf, wbuf, con_if) = control_list[idx]
  10.189 +            port = port_list[idx]
  10.190              work_done = False
  10.191  
  10.192 +            con_if = False
  10.193 +            if xend.console.interface.list.has_key(idx):
  10.194 +                con_if = xend.console.interface.list[idx]
  10.195 +
  10.196 +            blk_if = False
  10.197 +            if xend.blkif.interface.list.has_key(idx):
  10.198 +                blk_if = xend.blkif.interface.list[idx]
  10.199 +
  10.200              # If we pick up a disconnect notification then we do any necessary
  10.201              # cleanup.
  10.202              if type == notifier.EXCEPTION:
  10.203                  ret = xc.evtchn_status(idx)
  10.204                  if ret['status'] == 'unbound':
  10.205                      notifier.unbind(idx)
  10.206 -                    con_if.close()
  10.207 -                    del control_list[idx], port, rbuf, wbuf, con_if
  10.208 +                    del port_list[idx], port
  10.209 +                    if con_if:
  10.210 +                        con_if.destroy()
  10.211 +                        del con_if
  10.212 +                    if blk_if:
  10.213 +                        blk_if.destroy()
  10.214 +                        del blk_if
  10.215                      continue
  10.216  
  10.217 -            # Read incoming requests. Currently assume that request
  10.218 -            # message always containb console data.
  10.219 +            # Process incoming requests.
  10.220              while port.request_to_read():
  10.221                  msg = port.read_request()
  10.222 -                rbuf.write(msg.get_payload())
  10.223 -                port.write_response(msg)
  10.224                  work_done = True
  10.225 +                type = (msg.get_header())['type']
  10.226 +                if type == CMSG_CONSOLE and con_if:
  10.227 +                    con_if.ctrlif_rx_req(port, msg)
  10.228 +                elif type == CMSG_BLKIF_FE and blk_if:
  10.229 +                    blk_if.ctrlif_rx_req(port, msg)
  10.230 +                elif type == CMSG_BLKIF_BE and port == dom0_port:
  10.231 +                    xend.blkif.backend_rx_req(port, msg)
  10.232 +                else:
  10.233 +                    port.write_response(msg)
  10.234  
  10.235 -            # Incoming responses are currently thrown on the floor.
  10.236 +            # Process incoming responses.
  10.237              while port.response_to_read():
  10.238                  msg = port.read_response()
  10.239                  work_done = True
  10.240 +                type = (msg.get_header())['type']
  10.241 +                if type == CMSG_BLKIF_BE and port == dom0_port:
  10.242 +                    xend.blkif.backend_rx_rsp(port, msg)
  10.243  
  10.244 -            # Send as much pending console data as there is room for.
  10.245 -            while not wbuf.empty() and port.space_to_write_request():
  10.246 -                msg = xend.utils.message(0, 0, 0)
  10.247 -                msg.append_payload(wbuf.read(msg.MAX_PAYLOAD))
  10.248 -                port.write_request(msg)
  10.249 +            # Send console data.
  10.250 +            if con_if and con_if.ctrlif_transmit_work(port):
  10.251                  work_done = True
  10.252  
  10.253 +            # Send blkif messages.
  10.254 +            if blk_if and blk_if.ctrlif_transmit_work(port):
  10.255 +                work_done = True
  10.256 +
  10.257 +            # Back-end block-device work.
  10.258 +            if port == dom0_port and xend.blkif.backend_do_work(port):
  10.259 +                work_done = True
  10.260 +                
  10.261              # Finally, notify the remote end of any work that we did.
  10.262              if work_done:
  10.263                  port.notify()
    11.1 --- a/tools/xend/lib/manager.py	Sun May 02 00:27:58 2004 +0000
    11.2 +++ b/tools/xend/lib/manager.py	Sun May 02 00:52:45 2004 +0000
    11.3 @@ -4,13 +4,13 @@
    11.4  ## Copyright (c) 2004, K A Fraser (University of Cambridge)
    11.5  #############################################################
    11.6  
    11.7 -import xend.console, xend.main, xend.utils
    11.8 +import xend.blkif, xend.console, xend.main, xend.utils
    11.9  
   11.10  
   11.11  ##
   11.12  ## new_control_interface:
   11.13 -##  Create a new control interface with the specified domain 'dom'.
   11.14 -##  The console port may also be specified; otehrwise a suitable port is
   11.15 +##  Create a new control interface with the specified domain @dom.
   11.16 +##  The console port may also be specified; otherwise a suitable port is
   11.17  ##  automatically allocated.
   11.18  ##
   11.19  def new_control_interface(dom, console_port=-1):
   11.20 @@ -26,9 +26,8 @@ def new_control_interface(dom, console_p
   11.21      con_if = xend.console.interface(console_port, port.local_port)
   11.22      con_if.listen()
   11.23  
   11.24 -    # Add control state to the master list.
   11.25 -    xend.main.control_list[port.local_port] = \
   11.26 -      (port, xend.utils.buffer(), xend.utils.buffer(), con_if)
   11.27 +    # Update the master port list.
   11.28 +    xend.main.port_list[port.local_port] = port
   11.29  
   11.30      # Construct the successful response to be returned to the requester.
   11.31      response = { 'success': True }
   11.32 @@ -36,3 +35,81 @@ def new_control_interface(dom, console_p
   11.33      response['remote_port']  = port.remote_port
   11.34      response['console_port'] = console_port
   11.35      return response
   11.36 +
   11.37 +
   11.38 +##
   11.39 +## new_block_interface:
   11.40 +##  Create a new block interface for the specified domain @dom.
   11.41 +##
   11.42 +def new_block_interface(dom, handle=-1):
   11.43 +    # By default we create an interface with handle zero.
   11.44 +    if handle < 0:
   11.45 +        handle = 0
   11.46 +
   11.47 +    # We only support one interface per domain, which must have handle zero.
   11.48 +    if handle != 0:
   11.49 +        response = { 'success': False }
   11.50 +        response['error_type'] = 'Bad handle %d (only handle 0 ' + \
   11.51 +                                 'is supported)' % handle
   11.52 +        return response
   11.53 +
   11.54 +    # Find local event-channel port associated with the specified domain.
   11.55 +    port = xend.main.port_from_dom(dom)
   11.56 +    if not port:
   11.57 +        response = { 'success': False }
   11.58 +        response['error_type'] = 'Unknown domain %d' % dom
   11.59 +        return response
   11.60 +
   11.61 +    # The interface must not already exist.
   11.62 +    if xend.blkif.interface.list.has_key(port.local_port):
   11.63 +        response = { 'success': False }
   11.64 +        response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \
   11.65 +                                 'exists' % (dom, handle)
   11.66 +        return response
   11.67 +
   11.68 +    # Create the new interface. Initially no virtual devices are attached.
   11.69 +    xend.blkif.interface(dom, port.local_port)
   11.70 +
   11.71 +    # Response is deferred until back-end driver sends acknowledgement.
   11.72 +    return None
   11.73 +
   11.74 +
   11.75 +##
   11.76 +## new_block_device:
   11.77 +##  Attach a new virtual block device to the specified block interface
   11.78 +##  (@dom, @handle). The new device is identified by @vdev, and maps to
   11.79 +##  the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then
   11.80 +##  write requests to @vdev will be rejected.
   11.81 +##
   11.82 +def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly):
   11.83 +    # We only support one interface per domain, which must have handle zero.
   11.84 +    if handle != 0:
   11.85 +        response = { 'success': False }
   11.86 +        response['error_type'] = 'Bad handle %d (only handle 0 ' + \
   11.87 +                                 'is supported)' % handle
   11.88 +        return response
   11.89 +
   11.90 +    # Find local event-channel port associated with the specified domain.
   11.91 +    port = xend.main.port_from_dom(dom)
   11.92 +    if not port:
   11.93 +        response = { 'success': False }
   11.94 +        response['error_type'] = 'Unknown domain %d' % dom
   11.95 +        return response
   11.96 +        
   11.97 +    # The interface must exist.
   11.98 +    if not xend.blkif.interface.list.has_key(port.local_port):
   11.99 +        response = { 'success': False }
  11.100 +        response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \
  11.101 +                                 'exists' % (dom, handle)
  11.102 +        return response
  11.103 +
  11.104 +    # The virtual device must not yet exist.
  11.105 +    blkif = xend.blkif.interface.list[port.local_port]
  11.106 +    if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly):
  11.107 +        response = { 'success': False }
  11.108 +        response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \
  11.109 +                                 'already exists' % (dom, handle, vdev)
  11.110 +        return response
  11.111 +
  11.112 +    # Response is deferred until back-end driver sends acknowledgement.
  11.113 +    return None
    12.1 --- a/tools/xend/lib/utils.c	Sun May 02 00:27:58 2004 +0000
    12.2 +++ b/tools/xend/lib/utils.c	Sun May 02 00:52:45 2004 +0000
    12.3 @@ -22,6 +22,8 @@
    12.4  #include <signal.h>
    12.5  #include <xc.h>
    12.6  
    12.7 +#include <asm-xen/proc_cmd.h>
    12.8 +
    12.9  #include <hypervisor-if.h>
   12.10  #include "domain_controller.h"
   12.11  
   12.12 @@ -684,8 +686,23 @@ static PyObject *xu_port_new(PyObject *s
   12.13          goto fail2;
   12.14      }
   12.15  
   12.16 -    if ( xc_evtchn_bind_interdomain(xup->xc_handle, 
   12.17 -                                    DOMID_SELF, dom, &port1, &port2) != 0 )
   12.18 +    if ( dom == 0ULL )
   12.19 +    {
   12.20 +        /*
   12.21 +         * The control-interface event channel for DOM0 is already set up.
   12.22 +         * We use an ioctl to discover the port at our end of the channel.
   12.23 +         */
   12.24 +        port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL);
   12.25 +        port2 = -1; /* We don't need the remote end of the DOM0 link. */
   12.26 +        if ( port1 < 0 )
   12.27 +        {
   12.28 +            PyErr_SetString(port_error, "Could not open channel to DOM0");
   12.29 +            goto fail3;
   12.30 +        }
   12.31 +    }
   12.32 +    else if ( xc_evtchn_bind_interdomain(xup->xc_handle, 
   12.33 +                                         DOMID_SELF, dom, 
   12.34 +                                         &port1, &port2) != 0 )
   12.35      {
   12.36          PyErr_SetString(port_error, "Could not open channel to domain");
   12.37          goto fail3;
   12.38 @@ -744,7 +761,8 @@ static void xu_port_dealloc(PyObject *se
   12.39  {
   12.40      xu_port_object *xup = (xu_port_object *)self;
   12.41      unmap_control_interface(xup->mem_fd, xup->interface);
   12.42 -    (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
   12.43 +    if ( xup->remote_dom != 0ULL )
   12.44 +        (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port);
   12.45      (void)xc_interface_close(xup->xc_handle);
   12.46      (void)close(xup->mem_fd);
   12.47      PyObject_Del(self);
    13.1 --- a/tools/xend/setup.py	Sun May 02 00:27:58 2004 +0000
    13.2 +++ b/tools/xend/setup.py	Sun May 02 00:52:45 2004 +0000
    13.3 @@ -4,7 +4,8 @@ from distutils.core import setup, Extens
    13.4  utils = Extension("utils",
    13.5                    extra_compile_args   = ["-fno-strict-aliasing"],
    13.6                    include_dirs         = ["../xc/lib",
    13.7 -                                          "../../xen/include/hypervisor-ifs"],
    13.8 +                                          "../../xen/include/hypervisor-ifs",
    13.9 +                                          "../../xenolinux-sparse/include"],
   13.10                    library_dirs         = ["../xc/lib"],
   13.11                    libraries            = ["xc"],
   13.12                    sources              = ["lib/utils.c"])
    14.1 --- a/xen/arch/i386/pdb-stub.c	Sun May 02 00:27:58 2004 +0000
    14.2 +++ b/xen/arch/i386/pdb-stub.c	Sun May 02 00:52:45 2004 +0000
    14.3 @@ -51,6 +51,8 @@ static unsigned char  pdb_xmit_checksum;
    14.4  unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid);
    14.5  void pdb_linux_get_values(char *buffer, int length, unsigned long address,
    14.6  			  int pid, unsigned long cr3);
    14.7 +void pdb_linux_set_values(char *buffer, int length, unsigned long address,
    14.8 +			  int pid, unsigned long cr3);
    14.9  
   14.10  struct pdb_context
   14.11  {
   14.12 @@ -571,6 +573,12 @@ pdb_process_command (char *ptr, struct p
   14.13  			{
   14.14  			    hex2mem (ptr, (char *)addr, length);
   14.15  			}
   14.16 +			else if (pdb_ctx.process != -1)
   14.17 +			{
   14.18 +			    pdb_linux_set_values(ptr, length, addr,
   14.19 +						 pdb_ctx.process, 
   14.20 +						 pdb_ctx.ptbr);
   14.21 +			}
   14.22  			else
   14.23  			{
   14.24  			    pdb_set_values (ptr, length,
    15.1 --- a/xen/common/debug-linux.c	Sun May 02 00:27:58 2004 +0000
    15.2 +++ b/xen/common/debug-linux.c	Sun May 02 00:52:45 2004 +0000
    15.3 @@ -171,6 +171,44 @@ void pdb_linux_get_values(char *buffer, 
    15.4      }
    15.5  }
    15.6  
    15.7 + 
    15.8 +void pdb_linux_set_value(int pid, unsigned long cr3, unsigned long addr,
    15.9 +			 u_char *value)
   15.10 +{
   15.11 +    unsigned long pgd;
   15.12 +    unsigned long l2tab, page;
   15.13 + 
   15.14 +    /* get the process' pgd */
   15.15 +    pgd = pdb_linux_pid_ptbr(cr3, pid);
   15.16 + 
   15.17 +    /* get the l2 table entry */
   15.18 +    pdb_get_values((u_char *) &l2tab, sizeof(l2tab),
   15.19 +		   cr3, pgd + (addr >> PGDIR_SHIFT) * 4);
   15.20 +    l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK);
   15.21 + 
   15.22 +    /* get the page table entry */
   15.23 +    pdb_get_values((u_char *) &page, sizeof(page),
   15.24 +		   cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4);
   15.25 +    page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK);
   15.26 + 
   15.27 +    /* set the byte */
   15.28 +    pdb_set_values(value, sizeof(u_char), cr3, page + (addr & ~PAGE_MASK));
   15.29 +}
   15.30 + 
   15.31 +void pdb_linux_set_values(char *buffer, int length, unsigned long address,
   15.32 +			  int pid, unsigned long cr3)
   15.33 +{
   15.34 +    int loop;
   15.35 + 
   15.36 +    /* it's difficult to imagine a more inefficient algorithm */
   15.37 +    for (loop = 0; loop < length; loop++)
   15.38 +    {
   15.39 +        pdb_linux_set_value(pid, cr3, address + loop, &buffer[loop * 2]);
   15.40 +    }
   15.41 +}
   15.42 +
   15.43 +/**********************************************************************/
   15.44 +
   15.45  /*
   15.46   * return 1 if is the virtual address is in the operating system's
   15.47   * address space, else 0 
    16.1 --- a/xen/common/event_channel.c	Sun May 02 00:27:58 2004 +0000
    16.2 +++ b/xen/common/event_channel.c	Sun May 02 00:52:45 2004 +0000
    16.3 @@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtc
    16.4          goto out;
    16.5      }
    16.6  
    16.7 +    /* 'Allocate' port1 before searching for a free port2. */
    16.8 +    p1->event_channel[port1].state = ECS_INTERDOMAIN;
    16.9 +
   16.10      if ( (port2 = get_free_port(p2)) < 0 )
   16.11      {
   16.12 +        p1->event_channel[port1].state = ECS_FREE;
   16.13          rc = port2;
   16.14          goto out;
   16.15      }
   16.16  
   16.17      p1->event_channel[port1].u.remote.dom  = p2;
   16.18      p1->event_channel[port1].u.remote.port = (u16)port2;
   16.19 -    p1->event_channel[port1].state         = ECS_INTERDOMAIN;
   16.20  
   16.21      p2->event_channel[port2].u.remote.dom  = p1;
   16.22      p2->event_channel[port2].u.remote.port = (u16)port1;
    17.1 --- a/xen/common/kernel.c	Sun May 02 00:27:58 2004 +0000
    17.2 +++ b/xen/common/kernel.c	Sun May 02 00:52:45 2004 +0000
    17.3 @@ -73,6 +73,9 @@ unsigned char opt_pdb[10] = "none";
    17.4  unsigned int opt_tbuf_size = 1;
    17.5  /* opt_sched: scheduler - default to Borrowed Virtual Time */
    17.6  char opt_sched[10] = "bvt";
    17.7 +/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0
    17.8 + * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */
    17.9 +char opt_physdev_dom0_hide[20] = "";
   17.10  
   17.11  static struct {
   17.12      unsigned char *name;
   17.13 @@ -94,6 +97,7 @@ static struct {
   17.14      { "pdb",              OPT_STR,  &opt_pdb },
   17.15      { "tbuf_size",        OPT_UINT, &opt_tbuf_size },
   17.16      { "sched",            OPT_STR,  &opt_sched },
   17.17 +    { "physdev_dom0_hide",OPT_STR,  &opt_physdev_dom0_hide },
   17.18      { NULL,               0,        NULL     }
   17.19  };
   17.20  
    18.1 --- a/xen/common/physdev.c	Sun May 02 00:27:58 2004 +0000
    18.2 +++ b/xen/common/physdev.c	Sun May 02 00:52:45 2004 +0000
    18.3 @@ -115,16 +115,17 @@ static void add_dev_to_task(struct task_
    18.4  
    18.5  /*
    18.6   * physdev_pci_access_modify:
    18.7 - * Allow/disallow access to a specific PCI device. Also allow read access to 
    18.8 - * PCI devices from the device to the root of the device tree. If the given 
    18.9 - * device is a bridge, then the domain should get access to all the devices 
   18.10 - * attached to that bridge (XXX this is unimplemented!).
   18.11 + * Allow/disallow access to a specific PCI device.  Guests should not be
   18.12 + * allowed to see bridge devices as it needlessly complicates things (one
   18.13 + * possible exception to this is the AGP bridge).  If the given device is a
   18.14 + * bridge, then the domain should get access to all the leaf devices below
   18.15 + * that bridge (XXX this is unimplemented!).
   18.16   */
   18.17  int physdev_pci_access_modify(
   18.18      domid_t dom, int bus, int dev, int func, int enable)
   18.19  {
   18.20      struct task_struct *p;
   18.21 -    struct pci_dev *pdev, *rdev, *tdev;
   18.22 +    struct pci_dev *pdev;
   18.23      int rc = 0;
   18.24   
   18.25      if ( !IS_PRIV(current) )
   18.26 @@ -145,7 +146,7 @@ int physdev_pci_access_modify(
   18.27          return -ESRCH;
   18.28  
   18.29      /* Make the domain privileged. */
   18.30 -    set_bit(PF_PRIVILEGED, &p->flags); 
   18.31 +    set_bit(PF_PRIVILEGED, &p->flags);
   18.32  
   18.33      /* Grant write access to the specified device. */
   18.34      if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
   18.35 @@ -155,27 +156,10 @@ int physdev_pci_access_modify(
   18.36          goto out;
   18.37      }
   18.38      add_dev_to_task(p, pdev, ACC_WRITE);
   18.39 +
   18.40      INFO("  add RW %02x:%02x:%02x\n", pdev->bus->number,
   18.41           PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
   18.42  
   18.43 -    /* Grant read access to the root device. */
   18.44 -    if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL )
   18.45 -    {
   18.46 -        INFO("  bizarre -- no PCI root dev\n");
   18.47 -        rc = -ENODEV;
   18.48 -        goto out;
   18.49 -    }
   18.50 -    add_dev_to_task(p, rdev, ACC_READ);
   18.51 -    INFO("  add R0 %02x:%02x:%02x\n", 0, 0, 0);
   18.52 -
   18.53 -    /* Grant read access to all devices on the path to the root. */
   18.54 -    for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self )
   18.55 -    {
   18.56 -        add_dev_to_task(p, tdev, ACC_READ);
   18.57 -        INFO("  add RO %02x:%02x:%02x\n", tdev->bus->number,
   18.58 -             PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn));
   18.59 -    }
   18.60 -
   18.61      /* Is the device a bridge or cardbus? */
   18.62      if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
   18.63          INFO("XXX can't give access to bridge devices yet\n");
   18.64 @@ -256,8 +240,16 @@ static int do_base_address_access(phys_d
   18.65  
   18.66      if ( len != sizeof(u32) )
   18.67      {
   18.68 -        INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", 
   18.69 +        /* This isn't illegal, but there doesn't seem to be a very good reason
   18.70 +         * to do it for normal devices (bridges are another matter).  Since it
   18.71 +         * would complicate the code below, we don't support this for now. */
   18.72 +
   18.73 +        /* We could set *val to some value but the guest may well be in trouble
   18.74 +         * anyway if this write fails.  Hopefully the printk will give us a
   18.75 +         * clue what went wrong. */
   18.76 +        printk("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", 
   18.77               (acc == ACC_READ) ? "read" : "write", idx);
   18.78 +        
   18.79          return -EPERM;
   18.80      }
   18.81  
   18.82 @@ -420,7 +412,13 @@ static long pci_cfgreg_read(int bus, int
   18.83      phys_dev_t *pdev;
   18.84  
   18.85      if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 )
   18.86 -        return ret;
   18.87 +    {
   18.88 +        /* PCI spec states that reads from non-existent devices should return
   18.89 +         * all 1s.  In this case the domain has no read access, which should
   18.90 +         * also look like the device is non-existent. */
   18.91 +        *val = 0xFFFFFFFF;
   18.92 +        return ret; /* KAF: error return seems to matter on my test machine. */
   18.93 +    }
   18.94  
   18.95      /* Fake out read requests for some registers. */
   18.96      switch ( reg )
   18.97 @@ -608,6 +606,21 @@ long do_physdev_op(physdev_op_t *uop)
   18.98      return ret;
   18.99  }
  18.100  
  18.101 +/* Test if boot params specify this device should NOT be visible to DOM0
  18.102 + * (e.g. so that another domain can control it instead) */
  18.103 +int pcidev_dom0_hidden(struct pci_dev *dev)
  18.104 +{
  18.105 +    extern char opt_physdev_dom0_hide[];
  18.106 +    char cmp[10] = "(.......)";
  18.107 +    
  18.108 +    strncpy(&cmp[1], dev->slot_name, 7);
  18.109 +
  18.110 +    if ( strstr(opt_physdev_dom0_hide, dev->slot_name) == NULL )
  18.111 +        return 0;
  18.112 +    
  18.113 +    return 1;
  18.114 +}
  18.115 +
  18.116  
  18.117  /* Domain 0 has read access to all devices. */
  18.118  void physdev_init_dom0(struct task_struct *p)
  18.119 @@ -619,14 +632,22 @@ void physdev_init_dom0(struct task_struc
  18.120  
  18.121      pci_for_each_dev(dev)
  18.122      {
  18.123 -        /* Skip bridges and other peculiarities for now. */
  18.124 -        if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
  18.125 -            continue;
  18.126 -        pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
  18.127 -        pdev->dev = dev;
  18.128 -        pdev->flags = ACC_WRITE;
  18.129 -        pdev->state = 0;
  18.130 -        pdev->owner = p;
  18.131 -        list_add(&pdev->node, &p->pcidev_list);
  18.132 -	}    
  18.133 +        if ( !pcidev_dom0_hidden(dev) )
  18.134 +        {            
  18.135 +            /* Skip bridges and other peculiarities for now. */
  18.136 +            if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL )
  18.137 +                continue;
  18.138 +            pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL);
  18.139 +            pdev->dev = dev;
  18.140 +            pdev->flags = ACC_WRITE;
  18.141 +            pdev->state = 0;
  18.142 +            pdev->owner = p;
  18.143 +            list_add(&pdev->node, &p->pcidev_list);
  18.144 +        }
  18.145 +        else
  18.146 +        {
  18.147 +            printk("Hiding PCI device %s from DOM0\n", dev->slot_name);
  18.148 +        }
  18.149 +    }
  18.150  }
  18.151 +
    19.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Sun May 02 00:27:58 2004 +0000
    19.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h	Sun May 02 00:52:45 2004 +0000
    19.3 @@ -34,23 +34,37 @@ typedef struct blkif_st {
    19.4      unsigned int     evtchn;
    19.5      int              irq;
    19.6      /* Comms information. */
    19.7 -    blk_ring_t      *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
    19.8 +    blkif_ring_t    *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */
    19.9      BLK_RING_IDX     blk_req_cons;  /* Request consumer. */
   19.10      BLK_RING_IDX     blk_resp_prod; /* Private version of response producer. */
   19.11      /* VBDs attached to this interface. */
   19.12      rb_root_t        vbd_rb;        /* Mapping from 16-bit vdevices to VBDs. */
   19.13      spinlock_t       vbd_lock;      /* Protects VBD mapping. */
   19.14      /* Private fields. */
   19.15 +    enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
   19.16 +    /*
   19.17 +     * DISCONNECT response is deferred until pending requests are ack'ed.
   19.18 +     * We therefore need to store the id from the original request.
   19.19 +     */
   19.20 +    u8               disconnect_rspid;
   19.21      struct blkif_st *hash_next;
   19.22      struct list_head blkdev_list;
   19.23      spinlock_t       blk_ring_lock;
   19.24 +    atomic_t         refcnt;
   19.25  } blkif_t;
   19.26  
   19.27 -void blkif_create(blkif_create_t *create);
   19.28 -void blkif_destroy(blkif_destroy_t *destroy);
   19.29 +void blkif_create(blkif_be_create_t *create);
   19.30 +void blkif_destroy(blkif_be_destroy_t *destroy);
   19.31 +void blkif_connect(blkif_be_connect_t *connect);
   19.32 +int  blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id);
   19.33 +void __blkif_disconnect_complete(blkif_t *blkif);
   19.34  blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
   19.35 -void blkif_get(blkif_t *blkif);
   19.36 -void blkif_put(blkif_t *blkif);
   19.37 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
   19.38 +#define blkif_put(_b)                             \
   19.39 +    do {                                          \
   19.40 +        if ( atomic_dec_and_test(&(_b)->refcnt) ) \
   19.41 +            __blkif_disconnect_complete(_b);      \
   19.42 +    } while (0)
   19.43  
   19.44  /* An entry in a list of xen_extents. */
   19.45  typedef struct _blkif_extent_le { 
   19.46 @@ -60,25 +74,25 @@ typedef struct _blkif_extent_le {
   19.47  
   19.48  typedef struct _vbd { 
   19.49      blkif_vdev_t       vdevice;   /* what the domain refers to this vbd as */
   19.50 -    unsigned char      mode;      /* VBD_MODE_{R,W} */
   19.51 +    unsigned char      readonly;  /* Non-zero -> read-only */
   19.52      unsigned char      type;      /* XD_TYPE_xxx */
   19.53      blkif_extent_le_t *extents;   /* list of xen_extents making up this vbd */
   19.54      rb_node_t          rb;        /* for linking into R-B tree lookup struct */
   19.55  } vbd_t; 
   19.56  
   19.57 -long vbd_create(blkif_vbd_create_t *create_params); 
   19.58 -long vbd_grow(blkif_vbd_grow_t *grow_params); 
   19.59 -long vbd_shrink(blkif_vbd_shrink_t *shrink_params);
   19.60 -long vbd_destroy(blkif_vbd_destroy_t *delete_params); 
   19.61 -
   19.62 -void destroy_all_vbds(struct task_struct *p);
   19.63 +void vbd_create(blkif_be_vbd_create_t *create); 
   19.64 +void vbd_grow(blkif_be_vbd_grow_t *grow); 
   19.65 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink);
   19.66 +void vbd_destroy(blkif_be_vbd_destroy_t *delete); 
   19.67 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds);
   19.68 +void destroy_all_vbds(blkif_t *blkif);
   19.69  
   19.70  typedef struct {
   19.71      blkif_t       *blkif;
   19.72      unsigned long  id;
   19.73      atomic_t       pendcnt;
   19.74      unsigned short operation;
   19.75 -    unsigned short status;
   19.76 +    int            status;
   19.77  } pending_req_t;
   19.78  
   19.79  /* Describes a [partial] disk extent (part of a block io request) */
   19.80 @@ -91,7 +105,10 @@ typedef struct {
   19.81  
   19.82  int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 
   19.83  
   19.84 -int blkif_be_controller_init(void);
   19.85 +void blkif_interface_init(void);
   19.86 +void blkif_ctrlif_init(void);
   19.87 +
   19.88 +void blkif_deschedule(blkif_t *blkif);
   19.89  
   19.90  void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
   19.91  
    20.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	Sun May 02 00:27:58 2004 +0000
    20.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c	Sun May 02 00:52:45 2004 +0000
    20.3 @@ -10,37 +10,50 @@
    20.4  
    20.5  static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
    20.6  {
    20.7 +    DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype);
    20.8 +    
    20.9      switch ( msg->subtype )
   20.10      {
   20.11      case CMSG_BLKIF_BE_CREATE:
   20.12 -        if ( msg->length != sizeof(blkif_create_t) )
   20.13 +        if ( msg->length != sizeof(blkif_be_create_t) )
   20.14              goto parse_error;
   20.15 -        blkif_create((blkif_create_t *)&msg->msg[0]);
   20.16 +        blkif_create((blkif_be_create_t *)&msg->msg[0]);
   20.17          break;        
   20.18      case CMSG_BLKIF_BE_DESTROY:
   20.19 -        if ( msg->length != sizeof(blkif_destroy_t) )
   20.20 +        if ( msg->length != sizeof(blkif_be_destroy_t) )
   20.21 +            goto parse_error;
   20.22 +        blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]);
   20.23 +        break;        
   20.24 +    case CMSG_BLKIF_BE_CONNECT:
   20.25 +        if ( msg->length != sizeof(blkif_be_connect_t) )
   20.26              goto parse_error;
   20.27 -        blkif_destroy((blkif_destroy_t *)&msg->msg[0]);
   20.28 +        blkif_connect((blkif_be_connect_t *)&msg->msg[0]);
   20.29 +        break;        
   20.30 +    case CMSG_BLKIF_BE_DISCONNECT:
   20.31 +        if ( msg->length != sizeof(blkif_be_disconnect_t) )
   20.32 +            goto parse_error;
   20.33 +        if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) )
   20.34 +            return; /* Sending the response is deferred until later. */
   20.35          break;        
   20.36      case CMSG_BLKIF_BE_VBD_CREATE:
   20.37 -        if ( msg->length != sizeof(blkif_vbd_create_t) )
   20.38 +        if ( msg->length != sizeof(blkif_be_vbd_create_t) )
   20.39              goto parse_error;
   20.40 -        vbd_create((blkif_vbd_create_t *)&msg->msg[0]);
   20.41 +        vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]);
   20.42          break;
   20.43      case CMSG_BLKIF_BE_VBD_DESTROY:
   20.44 -        if ( msg->length != sizeof(blkif_vbd_destroy_t) )
   20.45 +        if ( msg->length != sizeof(blkif_be_vbd_destroy_t) )
   20.46              goto parse_error;
   20.47 -        vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]);
   20.48 +        vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]);
   20.49          break;
   20.50      case CMSG_BLKIF_BE_VBD_GROW:
   20.51 -        if ( msg->length != sizeof(blkif_vbd_grow_t) )
   20.52 +        if ( msg->length != sizeof(blkif_be_vbd_grow_t) )
   20.53              goto parse_error;
   20.54 -        vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]);
   20.55 +        vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]);
   20.56          break;
   20.57      case CMSG_BLKIF_BE_VBD_SHRINK:
   20.58 -        if ( msg->length != sizeof(blkif_vbd_shrink_t) )
   20.59 +        if ( msg->length != sizeof(blkif_be_vbd_shrink_t) )
   20.60              goto parse_error;
   20.61 -        vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]);
   20.62 +        vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]);
   20.63          break;
   20.64      default:
   20.65          goto parse_error;
   20.66 @@ -50,12 +63,24 @@ static void blkif_ctrlif_rx(ctrl_msg_t *
   20.67      return;
   20.68  
   20.69   parse_error:
   20.70 +    DPRINTK("Parse error while reading message subtype %d, len %d\n",
   20.71 +            msg->subtype, msg->length);
   20.72      msg->length = 0;
   20.73      ctrl_if_send_response(msg);
   20.74  }
   20.75  
   20.76 -int blkif_ctrlif_init(void)
   20.77 +void blkif_ctrlif_init(void)
   20.78  {
   20.79 +    ctrl_msg_t                       cmsg;
   20.80 +    blkif_be_driver_status_changed_t st;
   20.81 +
   20.82      (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx);
   20.83 -    return 0;
   20.84 +
   20.85 +    /* Send a driver-UP notification to the domain controller. */
   20.86 +    cmsg.type      = CMSG_BLKIF_BE;
   20.87 +    cmsg.subtype   = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED;
   20.88 +    cmsg.length    = sizeof(blkif_be_driver_status_changed_t);
   20.89 +    st.status      = BLKIF_DRIVER_STATUS_UP;
   20.90 +    memcpy(cmsg.msg, &st, sizeof(st));
   20.91 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
   20.92  }
    21.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	Sun May 02 00:27:58 2004 +0000
    21.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c	Sun May 02 00:52:45 2004 +0000
    21.3 @@ -12,85 +12,223 @@
    21.4  #define BLKIF_HASH(_d,_h) \
    21.5      (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1))
    21.6  
    21.7 -static blkif_t *blkif_hash[BLKIF_HASHSZ];
    21.8 +static kmem_cache_t *blkif_cachep;
    21.9 +static blkif_t      *blkif_hash[BLKIF_HASHSZ];
   21.10  
   21.11  blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle)
   21.12  {
   21.13      blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)];
   21.14      while ( (blkif != NULL) && 
   21.15 -            (blkif->domid != domid) && 
   21.16 -            (blkif->handle != handle) )
   21.17 +            ((blkif->domid != domid) || (blkif->handle != handle)) )
   21.18          blkif = blkif->hash_next;
   21.19      return blkif;
   21.20  }
   21.21  
   21.22 -void blkif_create(blkif_create_t *create)
   21.23 +void __blkif_disconnect_complete(blkif_t *blkif)
   21.24 +{
   21.25 +    ctrl_msg_t            cmsg;
   21.26 +    blkif_be_disconnect_t disc;
   21.27 +
   21.28 +    /*
   21.29 +     * These can't be done in __blkif_disconnect() because at that point there
   21.30 +     * may be outstanding requests at the disc whose asynchronous responses
   21.31 +     * must still be notified to the remote driver.
   21.32 +     */
   21.33 +    unbind_evtchn_from_irq(blkif->evtchn);
   21.34 +    vfree(blkif->blk_ring_base);
   21.35 +
   21.36 +    /* Construct the deferred response message. */
   21.37 +    cmsg.type         = CMSG_BLKIF_BE;
   21.38 +    cmsg.subtype      = CMSG_BLKIF_BE_DISCONNECT;
   21.39 +    cmsg.id           = blkif->disconnect_rspid;
   21.40 +    cmsg.length       = sizeof(blkif_be_disconnect_t);
   21.41 +    disc.domid        = blkif->domid;
   21.42 +    disc.blkif_handle = blkif->handle;
   21.43 +    disc.status       = BLKIF_BE_STATUS_OKAY;
   21.44 +    memcpy(cmsg.msg, &disc, sizeof(disc));
   21.45 +
   21.46 +    /*
   21.47 +     * Make sure message is constructed /before/ status change, because
   21.48 +     * after the status change the 'blkif' structure could be deallocated at
   21.49 +     * any time. Also make sure we send the response /after/ status change,
   21.50 +     * as otherwise a subsequent CONNECT request could spuriously fail if
   21.51 +     * another CPU doesn't see the status change yet.
   21.52 +     */
   21.53 +    mb();
   21.54 +    if ( blkif->status != DISCONNECTING )
   21.55 +        BUG();
   21.56 +    blkif->status = DISCONNECTED;
   21.57 +    mb();
   21.58 +
   21.59 +    /* Send the successful response. */
   21.60 +    ctrl_if_send_response(&cmsg);
   21.61 +}
   21.62 +
   21.63 +void blkif_create(blkif_be_create_t *create)
   21.64  {
   21.65      domid_t       domid  = create->domid;
   21.66      unsigned int  handle = create->blkif_handle;
   21.67 -    unsigned int  evtchn = create->evtchn;
   21.68 -    unsigned long shmem_frame = create->shmem_frame;
   21.69      blkif_t     **pblkif, *blkif;
   21.70  
   21.71 -    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
   21.72 -    while ( *pblkif == NULL )
   21.73 +    if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL )
   21.74      {
   21.75 -        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
   21.76 -            goto found_match;
   21.77 -        pblkif = &(*pblkif)->hash_next;
   21.78 +        DPRINTK("Could not create blkif: out of memory\n");
   21.79 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   21.80 +        return;
   21.81      }
   21.82  
   21.83 -    blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
   21.84      memset(blkif, 0, sizeof(*blkif));
   21.85 -    blkif->domid       = domid;
   21.86 -    blkif->handle      = handle;
   21.87 -    blkif->evtchn      = evtchn;
   21.88 -    blkif->irq         = bind_evtchn_to_irq(evtchn);
   21.89 -    blkif->shmem_frame = shmem_frame;
   21.90 -    blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE);
   21.91 +    blkif->domid  = domid;
   21.92 +    blkif->handle = handle;
   21.93 +    blkif->status = DISCONNECTED;
   21.94      spin_lock_init(&blkif->vbd_lock);
   21.95      spin_lock_init(&blkif->blk_ring_lock);
   21.96 +    atomic_set(&blkif->refcnt, 0);
   21.97  
   21.98 -    request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif);
   21.99 +    pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  21.100 +    while ( *pblkif != NULL )
  21.101 +    {
  21.102 +        if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) )
  21.103 +        {
  21.104 +            DPRINTK("Could not create blkif: already exists\n");
  21.105 +            create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS;
  21.106 +            kmem_cache_free(blkif_cachep, blkif);
  21.107 +            return;
  21.108 +        }
  21.109 +        pblkif = &(*pblkif)->hash_next;
  21.110 +    }
  21.111  
  21.112      blkif->hash_next = *pblkif;
  21.113      *pblkif = blkif;
  21.114  
  21.115 -    create->status = BLKIF_STATUS_OKAY;
  21.116 -    return;
  21.117 -
  21.118 - found_match:
  21.119 -    create->status = BLKIF_STATUS_INTERFACE_EXISTS;
  21.120 -    return;
  21.121 -
  21.122 - evtchn_in_use:
  21.123 -    unbind_evtchn_from_irq(evtchn); /* drop refcnt */
  21.124 -    create->status = BLKIF_STATUS_ERROR;
  21.125 -    return;
  21.126 +    DPRINTK("Successfully created blkif\n");
  21.127 +    create->status = BLKIF_BE_STATUS_OKAY;
  21.128  }
  21.129  
  21.130 -void blkif_destroy(blkif_destroy_t *destroy)
  21.131 +void blkif_destroy(blkif_be_destroy_t *destroy)
  21.132  {
  21.133      domid_t       domid  = destroy->domid;
  21.134      unsigned int  handle = destroy->blkif_handle;
  21.135      blkif_t     **pblkif, *blkif;
  21.136  
  21.137      pblkif = &blkif_hash[BLKIF_HASH(domid, handle)];
  21.138 -    while ( (blkif = *pblkif) == NULL )
  21.139 +    while ( (blkif = *pblkif) != NULL )
  21.140      {
  21.141          if ( (blkif->domid == domid) && (blkif->handle == handle) )
  21.142 -            goto found_match;
  21.143 +        {
  21.144 +            if ( blkif->status != DISCONNECTED )
  21.145 +                goto still_connected;
  21.146 +            goto destroy;
  21.147 +        }
  21.148          pblkif = &blkif->hash_next;
  21.149      }
  21.150  
  21.151 -    destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  21.152 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  21.153 +    return;
  21.154 +
  21.155 + still_connected:
  21.156 +    destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  21.157      return;
  21.158  
  21.159 - found_match:
  21.160 -    free_irq(blkif->irq, NULL);
  21.161 -    unbind_evtchn_from_irq(blkif->evtchn);
  21.162 + destroy:
  21.163      *pblkif = blkif->hash_next;
  21.164 +    destroy_all_vbds(blkif);
  21.165      kmem_cache_free(blkif_cachep, blkif);
  21.166 -    destroy->status = BLKIF_STATUS_OKAY;
  21.167 +    destroy->status = BLKIF_BE_STATUS_OKAY;
  21.168  }
  21.169  
  21.170 +void blkif_connect(blkif_be_connect_t *connect)
  21.171 +{
  21.172 +    domid_t       domid  = connect->domid;
  21.173 +    unsigned int  handle = connect->blkif_handle;
  21.174 +    unsigned int  evtchn = connect->evtchn;
  21.175 +    unsigned long shmem_frame = connect->shmem_frame;
  21.176 +    struct vm_struct *vma;
  21.177 +    pgprot_t      prot;
  21.178 +    int           error;
  21.179 +    blkif_t      *blkif;
  21.180 +
  21.181 +    blkif = blkif_find_by_handle(domid, handle);
  21.182 +    if ( unlikely(blkif == NULL) )
  21.183 +    {
  21.184 +        DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n", 
  21.185 +                connect->domid, connect->blkif_handle); 
  21.186 +        connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  21.187 +        return;
  21.188 +    }
  21.189 +
  21.190 +    if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL )
  21.191 +    {
  21.192 +        connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  21.193 +        return;
  21.194 +    }
  21.195 +
  21.196 +    prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED);
  21.197 +    error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr),
  21.198 +                                    shmem_frame<<PAGE_SHIFT, PAGE_SIZE,
  21.199 +                                    prot, domid);
  21.200 +    if ( error != 0 )
  21.201 +    {
  21.202 +        if ( error == -ENOMEM )
  21.203 +            connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  21.204 +        else if ( error == -EFAULT )
  21.205 +            connect->status = BLKIF_BE_STATUS_MAPPING_ERROR;
  21.206 +        else
  21.207 +            connect->status = BLKIF_BE_STATUS_ERROR;
  21.208 +        vfree(vma->addr);
  21.209 +        return;
  21.210 +    }
  21.211 +
  21.212 +    if ( blkif->status != DISCONNECTED )
  21.213 +    {
  21.214 +        connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED;
  21.215 +        vfree(vma->addr);
  21.216 +        return;
  21.217 +    }
  21.218 +
  21.219 +    blkif->evtchn        = evtchn;
  21.220 +    blkif->irq           = bind_evtchn_to_irq(evtchn);
  21.221 +    blkif->shmem_frame   = shmem_frame;
  21.222 +    blkif->blk_ring_base = (blkif_ring_t *)vma->addr;
  21.223 +    blkif->status        = CONNECTED;
  21.224 +    blkif_get(blkif);
  21.225 +
  21.226 +    request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif);
  21.227 +
  21.228 +    connect->status = BLKIF_BE_STATUS_OKAY;
  21.229 +}
  21.230 +
  21.231 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id)
  21.232 +{
  21.233 +    domid_t       domid  = disconnect->domid;
  21.234 +    unsigned int  handle = disconnect->blkif_handle;
  21.235 +    blkif_t      *blkif;
  21.236 +
  21.237 +    blkif = blkif_find_by_handle(domid, handle);
  21.238 +    if ( unlikely(blkif == NULL) )
  21.239 +    {
  21.240 +        DPRINTK("blkif_disconnect attempted for non-existent blkif"
  21.241 +                " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle); 
  21.242 +        disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  21.243 +        return 1; /* Caller will send response error message. */
  21.244 +    }
  21.245 +
  21.246 +    if ( blkif->status == CONNECTED )
  21.247 +    {
  21.248 +        blkif->status = DISCONNECTING;
  21.249 +        blkif->disconnect_rspid = rsp_id;
  21.250 +        wmb(); /* Let other CPUs see the status change. */
  21.251 +        free_irq(blkif->irq, NULL);
  21.252 +        blkif_deschedule(blkif);
  21.253 +        blkif_put(blkif);
  21.254 +    }
  21.255 +
  21.256 +    return 0; /* Caller should not send response message. */
  21.257 +}
  21.258 +
  21.259 +void __init blkif_interface_init(void)
  21.260 +{
  21.261 +    blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
  21.262 +                                     0, 0, NULL, NULL);
  21.263 +    memset(blkif_hash, 0, sizeof(blkif_hash));
  21.264 +}
    22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Sun May 02 00:27:58 2004 +0000
    22.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c	Sun May 02 00:52:45 2004 +0000
    22.3 @@ -24,6 +24,18 @@
    22.4  #define MAX_PENDING_REQS 64
    22.5  #define BATCH_PER_DOMAIN 16
    22.6  
    22.7 +static struct vm_struct *mmap_vma;
    22.8 +#define MMAP_PAGES_PER_SEGMENT \
    22.9 +    ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1)
   22.10 +#define MMAP_PAGES_PER_REQUEST \
   22.11 +    (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT)
   22.12 +#define MMAP_PAGES             \
   22.13 +    (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
   22.14 +#define MMAP_VADDR(_req,_seg)            \
   22.15 +    ((unsigned long)mmap_vma->addr +     \
   22.16 +     ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
   22.17 +     ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE))
   22.18 +
   22.19  /*
   22.20   * Each outstanding request that we've passed to the lower device layers has a 
   22.21   * 'pending_req' allocated to it. Each buffer_head that completes decrements 
   22.22 @@ -46,22 +58,11 @@ static PEND_RING_IDX pending_prod, pendi
   22.23  
   22.24  static kmem_cache_t *buffer_head_cachep;
   22.25  
   22.26 -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned;
   22.27 -
   22.28 -static int lock_buffer(blkif_t *blkif,
   22.29 -                       unsigned long buffer,
   22.30 -                       unsigned short size,
   22.31 -                       int writeable_buffer);
   22.32 -static void unlock_buffer(unsigned long buffer,
   22.33 -                          unsigned short size,
   22.34 -                          int writeable_buffer);
   22.35 -
   22.36 -static void io_schedule(unsigned long unused);
   22.37  static int do_block_io_op(blkif_t *blkif, int max_to_do);
   22.38 -static void dispatch_rw_block_io(blkif_t *blkif,
   22.39 -                                 blk_ring_req_entry_t *req);
   22.40 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req);
   22.41 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req);
   22.42  static void make_response(blkif_t *blkif, unsigned long id, 
   22.43 -                          unsigned short op, unsigned long st);
   22.44 +                          unsigned short op, int st);
   22.45  
   22.46  
   22.47  /******************************************************************
   22.48 @@ -95,7 +96,7 @@ static void add_to_blkdev_list_tail(blki
   22.49      unsigned long flags;
   22.50      if ( __on_blkdev_list(blkif) ) return;
   22.51      spin_lock_irqsave(&io_schedule_list_lock, flags);
   22.52 -    if ( !__on_blkdev_list(blkif) )
   22.53 +    if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) )
   22.54      {
   22.55          list_add_tail(&blkif->blkdev_list, &io_schedule_list);
   22.56          blkif_get(blkif);
   22.57 @@ -108,8 +109,6 @@ static void add_to_blkdev_list_tail(blki
   22.58   * SCHEDULER FUNCTIONS
   22.59   */
   22.60  
   22.61 -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
   22.62 -
   22.63  static void io_schedule(unsigned long unused)
   22.64  {
   22.65      blkif_t          *blkif;
   22.66 @@ -132,6 +131,8 @@ static void io_schedule(unsigned long un
   22.67      run_task_queue(&tq_disk);
   22.68  }
   22.69  
   22.70 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0);
   22.71 +
   22.72  static void maybe_trigger_io_schedule(void)
   22.73  {
   22.74      /*
   22.75 @@ -155,28 +156,26 @@ static void maybe_trigger_io_schedule(vo
   22.76  static void end_block_io_op(struct buffer_head *bh, int uptodate)
   22.77  {
   22.78      pending_req_t *pending_req = bh->b_private;
   22.79 +    unsigned long  flags;
   22.80  
   22.81      /* An error fails the entire request. */
   22.82      if ( !uptodate )
   22.83      {
   22.84          DPRINTK("Buffer not up-to-date at end of operation\n");
   22.85 -        pending_req->status = 2;
   22.86 +        pending_req->status = BLKIF_RSP_ERROR;
   22.87      }
   22.88  
   22.89 -    unlock_buffer(virt_to_phys(bh->b_data), 
   22.90 -                  bh->b_size, 
   22.91 -                  (pending_req->operation==READ));
   22.92 -    
   22.93      if ( atomic_dec_and_test(&pending_req->pendcnt) )
   22.94      {
   22.95 +        int pending_idx = pending_req - pending_reqs;
   22.96 +        vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
   22.97 +                          MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
   22.98          make_response(pending_req->blkif, pending_req->id,
   22.99                        pending_req->operation, pending_req->status);
  22.100          blkif_put(pending_req->blkif);
  22.101 -        spin_lock(&pend_prod_lock);
  22.102 -        pending_ring[MASK_PEND_IDX(pending_prod)] = 
  22.103 -            pending_req - pending_reqs;
  22.104 -        pending_prod++;
  22.105 -        spin_unlock(&pend_prod_lock);
  22.106 +        spin_lock_irqsave(&pend_prod_lock, flags);
  22.107 +        pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
  22.108 +        spin_unlock_irqrestore(&pend_prod_lock, flags);
  22.109          maybe_trigger_io_schedule();
  22.110      }
  22.111  }
  22.112 @@ -200,45 +199,10 @@ void blkif_be_int(int irq, void *dev_id,
  22.113   * DOWNWARD CALLS -- These interface with the block-device layer proper.
  22.114   */
  22.115  
  22.116 -static int lock_buffer(blkif_t *blkif,
  22.117 -                       unsigned long buffer,
  22.118 -                       unsigned short size,
  22.119 -                       int writeable_buffer)
  22.120 -{
  22.121 -    unsigned long    pfn;
  22.122 -
  22.123 -    for ( pfn = buffer >> PAGE_SHIFT; 
  22.124 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  22.125 -          pfn++ )
  22.126 -    {
  22.127 -    }
  22.128 -
  22.129 -    return 1;
  22.130 -
  22.131 - fail:
  22.132 -    while ( pfn-- > (buffer >> PAGE_SHIFT) )
  22.133 -    {        
  22.134 -    }
  22.135 -    return 0;
  22.136 -}
  22.137 -
  22.138 -static void unlock_buffer(unsigned long buffer,
  22.139 -                          unsigned short size,
  22.140 -                          int writeable_buffer)
  22.141 -{
  22.142 -    unsigned long pfn;
  22.143 -
  22.144 -    for ( pfn = buffer >> PAGE_SHIFT; 
  22.145 -          pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT);
  22.146 -          pfn++ )
  22.147 -    {
  22.148 -    }
  22.149 -}
  22.150 -
  22.151  static int do_block_io_op(blkif_t *blkif, int max_to_do)
  22.152  {
  22.153 -    blk_ring_t *blk_ring = blkif->blk_ring_base;
  22.154 -    blk_ring_req_entry_t *req;
  22.155 +    blkif_ring_t *blk_ring = blkif->blk_ring_base;
  22.156 +    blkif_request_t *req;
  22.157      BLK_RING_IDX i;
  22.158      int more_to_do = 0;
  22.159  
  22.160 @@ -262,11 +226,15 @@ static int do_block_io_op(blkif_t *blkif
  22.161              dispatch_rw_block_io(blkif, req);
  22.162              break;
  22.163  
  22.164 +        case BLKIF_OP_PROBE:
  22.165 +            dispatch_probe(blkif, req);
  22.166 +            break;
  22.167 +
  22.168          default:
  22.169              DPRINTK("error: unknown block io operation [%d]\n",
  22.170                      blk_ring->ring[i].req.operation);
  22.171              make_response(blkif, blk_ring->ring[i].req.id, 
  22.172 -                          blk_ring->ring[i].req.operation, 1);
  22.173 +                          blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR);
  22.174              break;
  22.175          }
  22.176      }
  22.177 @@ -275,24 +243,64 @@ static int do_block_io_op(blkif_t *blkif
  22.178      return more_to_do;
  22.179  }
  22.180  
  22.181 -static void dispatch_rw_block_io(blkif_t *blkif,
  22.182 -                                 blk_ring_req_entry_t *req)
  22.183 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req)
  22.184 +{
  22.185 +    int      i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
  22.186 +    pgprot_t prot;
  22.187 +
  22.188 +    /* Check that number of segments is sane. */
  22.189 +    if ( unlikely(req->nr_segments == 0) || 
  22.190 +         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
  22.191 +    {
  22.192 +        DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
  22.193 +        goto bad_descriptor;
  22.194 +    }
  22.195 +
  22.196 +    prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
  22.197 +    for ( i = 0; i < req->nr_segments; i++ )
  22.198 +    {
  22.199 +        if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) )
  22.200 +            goto bad_descriptor;
  22.201 +        rc = direct_remap_area_pages(&init_mm, 
  22.202 +                                     MMAP_VADDR(pending_idx, i),
  22.203 +                                     req->buffer_and_sects[i] & PAGE_MASK, 
  22.204 +                                     PAGE_SIZE, prot, blkif->domid);
  22.205 +        if ( rc != 0 )
  22.206 +            goto bad_descriptor;
  22.207 +    }
  22.208 +
  22.209 +    rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 
  22.210 +                   (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t));
  22.211 +
  22.212 +    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
  22.213 +                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
  22.214 +    make_response(blkif, req->id, req->operation, rc);
  22.215 +    return;
  22.216 +
  22.217 + bad_descriptor:
  22.218 +    vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
  22.219 +                      MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
  22.220 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
  22.221 +}
  22.222 +
  22.223 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req)
  22.224  {
  22.225      extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
  22.226      struct buffer_head *bh;
  22.227 -    int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ;
  22.228 +    int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
  22.229      unsigned short nr_sects;
  22.230      unsigned long buffer;
  22.231 -    int i, tot_sects;
  22.232 +    int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
  22.233      pending_req_t *pending_req;
  22.234 +    pgprot_t       prot;
  22.235  
  22.236      /* We map virtual scatter/gather segments to physical segments. */
  22.237      int new_segs, nr_psegs = 0;
  22.238 -    phys_seg_t phys_seg[MAX_BLK_SEGS * 2];
  22.239 +    phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
  22.240  
  22.241      /* Check that number of segments is sane. */
  22.242      if ( unlikely(req->nr_segments == 0) || 
  22.243 -         unlikely(req->nr_segments > MAX_BLK_SEGS) )
  22.244 +         unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) )
  22.245      {
  22.246          DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments);
  22.247          goto bad_descriptor;
  22.248 @@ -310,8 +318,11 @@ static void dispatch_rw_block_io(blkif_t
  22.249          nr_sects = req->buffer_and_sects[i] &  0x1FF;
  22.250  
  22.251          if ( unlikely(nr_sects == 0) )
  22.252 +            continue;
  22.253 +
  22.254 +        if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) )
  22.255          {
  22.256 -            DPRINTK("zero-sized data request\n");
  22.257 +            DPRINTK("Too many sectors in segment\n");
  22.258              goto bad_descriptor;
  22.259          }
  22.260  
  22.261 @@ -333,29 +344,41 @@ static void dispatch_rw_block_io(blkif_t
  22.262          }
  22.263    
  22.264          nr_psegs += new_segs;
  22.265 -        ASSERT(nr_psegs <= MAX_BLK_SEGS*2);
  22.266 +        ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2);
  22.267      }
  22.268  
  22.269 +    /* Nonsensical zero-sized request? */
  22.270 +    if ( unlikely(nr_psegs == 0) )
  22.271 +        goto bad_descriptor;
  22.272 +
  22.273 +    if ( operation == READ )
  22.274 +        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW);
  22.275 +    else
  22.276 +        prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED);
  22.277 +
  22.278      for ( i = 0; i < nr_psegs; i++ )
  22.279      {
  22.280 -        if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 
  22.281 -                                   phys_seg[i].nr_sects << 9,
  22.282 -                                   operation==READ)) )
  22.283 +        unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + 
  22.284 +                            (phys_seg[i].nr_sects << 9) + 
  22.285 +                            (PAGE_SIZE - 1)) & PAGE_MASK;
  22.286 +        int rc = direct_remap_area_pages(&init_mm, 
  22.287 +                                         MMAP_VADDR(pending_idx, i),
  22.288 +                                         phys_seg[i].buffer & PAGE_MASK, 
  22.289 +                                         sz, prot, blkif->domid);
  22.290 +        if ( rc != 0 )
  22.291          {
  22.292              DPRINTK("invalid buffer\n");
  22.293 -            while ( i-- > 0 )
  22.294 -                unlock_buffer(phys_seg[i].buffer, 
  22.295 -                              phys_seg[i].nr_sects << 9,
  22.296 -                              operation==READ);
  22.297 +            vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 
  22.298 +                              MMAP_PAGES_PER_REQUEST * PAGE_SIZE);
  22.299              goto bad_descriptor;
  22.300          }
  22.301      }
  22.302  
  22.303 -    pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]];
  22.304 +    pending_req = &pending_reqs[pending_idx];
  22.305      pending_req->blkif     = blkif;
  22.306      pending_req->id        = req->id;
  22.307      pending_req->operation = operation;
  22.308 -    pending_req->status    = 0;
  22.309 +    pending_req->status    = BLKIF_RSP_OKAY;
  22.310      atomic_set(&pending_req->pendcnt, nr_psegs);
  22.311  
  22.312      blkif_get(blkif);
  22.313 @@ -363,38 +386,37 @@ static void dispatch_rw_block_io(blkif_t
  22.314      /* Now we pass each segment down to the real blkdev layer. */
  22.315      for ( i = 0; i < nr_psegs; i++ )
  22.316      {
  22.317 -        bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL);
  22.318 +        bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC);
  22.319          if ( unlikely(bh == NULL) )
  22.320              panic("bh is null\n");
  22.321          memset(bh, 0, sizeof (struct buffer_head));
  22.322 -    
  22.323 +
  22.324 +        init_waitqueue_head(&bh->b_wait);
  22.325          bh->b_size          = phys_seg[i].nr_sects << 9;
  22.326          bh->b_dev           = phys_seg[i].dev;
  22.327 +        bh->b_rdev          = phys_seg[i].dev;
  22.328          bh->b_rsector       = (unsigned long)phys_seg[i].sector_number;
  22.329 -
  22.330 -        /* SMH: we store a 'pseudo-virtual' bogus address in b_data since
  22.331 -           later code will undo this transformation (i.e. +-PAGE_OFFSET). */
  22.332 -        bh->b_data          = phys_to_virt(phys_seg[i].buffer);
  22.333 - 
  22.334 -        /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */
  22.335 -        bh->b_page          = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 
  22.336 +        bh->b_data          = (char *)MMAP_VADDR(pending_idx, i) +
  22.337 +            (phys_seg[i].buffer & ~PAGE_MASK);
  22.338          bh->b_end_io        = end_block_io_op;
  22.339          bh->b_private       = pending_req;
  22.340  
  22.341 -        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock);
  22.342 +        bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 
  22.343 +            (1 << BH_Req) | (1 << BH_Launder);
  22.344          if ( operation == WRITE )
  22.345              bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate);
  22.346  
  22.347          atomic_set(&bh->b_count, 1);
  22.348  
  22.349          /* Dispatch a single request. We'll flush it to disc later. */
  22.350 -        submit_bh(operation, bh);
  22.351 +        generic_make_request(operation, bh);
  22.352      }
  22.353  
  22.354 +    pending_cons++;
  22.355      return;
  22.356  
  22.357   bad_descriptor:
  22.358 -    make_response(blkif, req->id, req->operation, 1);
  22.359 +    make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
  22.360  } 
  22.361  
  22.362  
  22.363 @@ -405,12 +427,13 @@ static void dispatch_rw_block_io(blkif_t
  22.364  
  22.365  
  22.366  static void make_response(blkif_t *blkif, unsigned long id, 
  22.367 -                          unsigned short op, unsigned long st)
  22.368 +                          unsigned short op, int st)
  22.369  {
  22.370 -    blk_ring_resp_entry_t *resp;
  22.371 +    blkif_response_t *resp;
  22.372 +    unsigned long     flags;
  22.373  
  22.374      /* Place on the response ring for the relevant domain. */ 
  22.375 -    spin_lock(&blkif->blk_ring_lock);
  22.376 +    spin_lock_irqsave(&blkif->blk_ring_lock, flags);
  22.377      resp = &blkif->blk_ring_base->
  22.378          ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp;
  22.379      resp->id        = id;
  22.380 @@ -418,85 +441,44 @@ static void make_response(blkif_t *blkif
  22.381      resp->status    = st;
  22.382      wmb();
  22.383      blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod;
  22.384 -    spin_unlock(&blkif->blk_ring_lock);
  22.385 +    spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
  22.386  
  22.387      /* Kick the relevant domain. */
  22.388      notify_via_evtchn(blkif->evtchn);
  22.389  }
  22.390  
  22.391 -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs)
  22.392 +void blkif_deschedule(blkif_t *blkif)
  22.393  {
  22.394 -#if 0
  22.395 -    unsigned long flags;
  22.396 -    struct task_struct *p;
  22.397 -    blk_ring_t *blk_ring;
  22.398 -    int i;
  22.399 -
  22.400 -    printk("Dumping block queue stats: nr_pending = %d"
  22.401 -           " (prod=0x%08x,cons=0x%08x)\n",
  22.402 -           NR_PENDING_REQS, pending_prod, pending_cons);
  22.403 -
  22.404 -    read_lock_irqsave(&tasklist_lock, flags);
  22.405 -    for_each_domain ( p )
  22.406 -    {
  22.407 -        printk("Domain: %llu\n", blkif->domain);
  22.408 -        blk_ring = blkif->blk_ring_base;
  22.409 -        printk("  req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/"
  22.410 -               "0x%08x on_list=%d\n",
  22.411 -               blk_ring->req_prod, blkif->blk_req_cons,
  22.412 -               blk_ring->resp_prod, blkif->blk_resp_prod,
  22.413 -               __on_blkdev_list(p));
  22.414 -    }
  22.415 -    read_unlock_irqrestore(&tasklist_lock, flags);
  22.416 -
  22.417 -    for ( i = 0; i < MAX_PENDING_REQS; i++ )
  22.418 -    {
  22.419 -        printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n",
  22.420 -               i, pending_reqs[i].domain, pending_reqs[i].id,
  22.421 -               atomic_read(&pending_reqs[i].pendcnt), 
  22.422 -               pending_reqs[i].operation, pending_reqs[i].status);
  22.423 -    }
  22.424 -#endif
  22.425 -}
  22.426 -
  22.427 -void unlink_blkdev_info(blkif_t *blkif)
  22.428 -{
  22.429 -    unsigned long flags;
  22.430 -
  22.431 -    spin_lock_irqsave(&io_schedule_list_lock, flags);
  22.432 -    if ( __on_blkdev_list(blkif) )
  22.433 -    {
  22.434 -        list_del(&blkif->blkdev_list);
  22.435 -        blkif->blkdev_list.next = (void *)0xdeadbeef;
  22.436 -        blkif_put(blkif);
  22.437 -    }
  22.438 -    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
  22.439 +    remove_from_blkdev_list(blkif);
  22.440  }
  22.441  
  22.442  static int __init init_module(void)
  22.443  {
  22.444      int i;
  22.445  
  22.446 +    blkif_interface_init();
  22.447 +
  22.448 +    if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL )
  22.449 +    {
  22.450 +        printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n");
  22.451 +        return -ENOMEM;
  22.452 +    }
  22.453 +
  22.454      pending_cons = 0;
  22.455      pending_prod = MAX_PENDING_REQS;
  22.456      memset(pending_reqs, 0, sizeof(pending_reqs));
  22.457      for ( i = 0; i < MAX_PENDING_REQS; i++ )
  22.458          pending_ring[i] = i;
  22.459      
  22.460 -    for ( i = 0; i < NR_CPUS; i++ )
  22.461 -        completed_bhs[i] = NULL;
  22.462 -        
  22.463      spin_lock_init(&io_schedule_list_lock);
  22.464      INIT_LIST_HEAD(&io_schedule_list);
  22.465  
  22.466 -    if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 
  22.467 -                     SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 )
  22.468 -        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  22.469 -
  22.470      buffer_head_cachep = kmem_cache_create(
  22.471          "buffer_head_cache", sizeof(struct buffer_head),
  22.472          0, SLAB_HWCACHE_ALIGN, NULL, NULL);
  22.473  
  22.474 +    blkif_ctrlif_init();
  22.475 +
  22.476      return 0;
  22.477  }
  22.478  
    23.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	Sun May 02 00:27:58 2004 +0000
    23.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c	Sun May 02 00:52:45 2004 +0000
    23.3 @@ -8,7 +8,7 @@
    23.4  
    23.5  #include "common.h"
    23.6  
    23.7 -void vbd_create(blkif_vbd_create_t *create) 
    23.8 +void vbd_create(blkif_be_vbd_create_t *create) 
    23.9  {
   23.10      vbd_t       *vbd; 
   23.11      rb_node_t  **rb_p, *rb_parent = NULL;
   23.12 @@ -18,9 +18,9 @@ void vbd_create(blkif_vbd_create_t *crea
   23.13      blkif = blkif_find_by_handle(create->domid, create->blkif_handle);
   23.14      if ( unlikely(blkif == NULL) )
   23.15      {
   23.16 -        DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 
   23.17 +        DPRINTK("vbd_create attempted for non-existent blkif (%llu,%u)\n", 
   23.18                  create->domid, create->blkif_handle); 
   23.19 -        create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   23.20 +        create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   23.21          return;
   23.22      }
   23.23  
   23.24 @@ -42,49 +42,50 @@ void vbd_create(blkif_vbd_create_t *crea
   23.25          else
   23.26          {
   23.27              DPRINTK("vbd_create attempted for already existing vbd\n");
   23.28 -            create->status = BLKIF_STATUS_VBD_EXISTS;
   23.29 +            create->status = BLKIF_BE_STATUS_VBD_EXISTS;
   23.30              goto out;
   23.31          }
   23.32      }
   23.33  
   23.34 -    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) )
   23.35 +    if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) )
   23.36      {
   23.37          DPRINTK("vbd_create: out of memory\n");
   23.38 -        create->status = BLKIF_STATUS_OUT_OF_MEMORY;
   23.39 +        create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
   23.40          goto out;
   23.41      }
   23.42  
   23.43 -    vbd->vdevice = vdevice; 
   23.44 -    vbd->mode    = create->mode; 
   23.45 -    vbd->type    = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   23.46 -    vbd->extents = NULL; 
   23.47 +    vbd->vdevice  = vdevice; 
   23.48 +    vbd->readonly = create->readonly;
   23.49 +    vbd->type     = VDISK_TYPE_DISK | VDISK_FLAG_VIRT;
   23.50 +    vbd->extents  = NULL; 
   23.51  
   23.52      rb_link_node(&vbd->rb, rb_parent, rb_p);
   23.53      rb_insert_color(&vbd->rb, &blkif->vbd_rb);
   23.54  
   23.55 -    create->status = BLKIF_STATUS_OKAY;
   23.56 +    DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n",
   23.57 +            vdevice, create->domid);
   23.58 +    create->status = BLKIF_BE_STATUS_OKAY;
   23.59  
   23.60   out:
   23.61      spin_unlock(&blkif->vbd_lock);
   23.62 -    blkif_put(blkif);
   23.63  }
   23.64  
   23.65  
   23.66  /* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */
   23.67 -void vbd_grow(blkif_vbd_grow_t *grow) 
   23.68 +void vbd_grow(blkif_be_vbd_grow_t *grow) 
   23.69  {
   23.70 -    blkif_t          *blkif;
   23.71 -    xen_extent_le_t **px, *x; 
   23.72 -    vbd_t            *vbd = NULL;
   23.73 -    rb_node_t        *rb;
   23.74 -    blkif_vdev_t      vdevice = grow->vdevice;
   23.75 +    blkif_t            *blkif;
   23.76 +    blkif_extent_le_t **px, *x; 
   23.77 +    vbd_t              *vbd = NULL;
   23.78 +    rb_node_t          *rb;
   23.79 +    blkif_vdev_t        vdevice = grow->vdevice;
   23.80  
   23.81      blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle);
   23.82      if ( unlikely(blkif == NULL) )
   23.83      {
   23.84 -        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 
   23.85 +        DPRINTK("vbd_grow attempted for non-existent blkif (%llu,%u)\n", 
   23.86                  grow->domid, grow->blkif_handle); 
   23.87 -        grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
   23.88 +        grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
   23.89          return;
   23.90      }
   23.91  
   23.92 @@ -105,49 +106,51 @@ void vbd_grow(blkif_vbd_grow_t *grow)
   23.93      if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
   23.94      {
   23.95          DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n");
   23.96 -        grow->status = BLKIF_STATUS_VBD_NOT_FOUND;
   23.97 +        grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
   23.98          goto out;
   23.99      } 
  23.100  
  23.101 -    if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) )
  23.102 +    if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 
  23.103 +                               GFP_ATOMIC)) == NULL) )
  23.104      {
  23.105          DPRINTK("vbd_grow: out of memory\n");
  23.106 -        grow->status = BLKIF_STATUS_OUT_OF_MEMORY;
  23.107 +        grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY;
  23.108          goto out;
  23.109      }
  23.110   
  23.111      x->extent.device        = grow->extent.device; 
  23.112      x->extent.sector_start  = grow->extent.sector_start; 
  23.113      x->extent.sector_length = grow->extent.sector_length; 
  23.114 -    x->next                 = (xen_extent_le_t *)NULL; 
  23.115 +    x->next                 = (blkif_extent_le_t *)NULL; 
  23.116  
  23.117      for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 
  23.118          continue;
  23.119  
  23.120      *px = x;
  23.121  
  23.122 -    grow->status = BLKIF_STATUS_OKAY;
  23.123 +    DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n",
  23.124 +            vdevice, grow->domid);
  23.125 +    grow->status = BLKIF_BE_STATUS_OKAY;
  23.126  
  23.127   out:
  23.128      spin_unlock(&blkif->vbd_lock);
  23.129 -    blkif_put(blkif);
  23.130  }
  23.131  
  23.132  
  23.133 -void vbd_shrink(blkif_vbd_shrink_t *shrink)
  23.134 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink)
  23.135  {
  23.136 -    blkif_t          *blkif;
  23.137 -    xen_extent_le_t **px, *x; 
  23.138 -    vbd_t            *vbd = NULL;
  23.139 -    rb_node_t        *rb;
  23.140 -    blkif_vdev_t      vdevice = shrink->vdevice;
  23.141 +    blkif_t            *blkif;
  23.142 +    blkif_extent_le_t **px, *x; 
  23.143 +    vbd_t              *vbd = NULL;
  23.144 +    rb_node_t          *rb;
  23.145 +    blkif_vdev_t        vdevice = shrink->vdevice;
  23.146  
  23.147      blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle);
  23.148      if ( unlikely(blkif == NULL) )
  23.149      {
  23.150 -        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 
  23.151 +        DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,%u)\n", 
  23.152                  shrink->domid, shrink->blkif_handle); 
  23.153 -        shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  23.154 +        shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  23.155          return;
  23.156      }
  23.157  
  23.158 @@ -167,13 +170,13 @@ void vbd_shrink(blkif_vbd_shrink_t *shri
  23.159  
  23.160      if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) )
  23.161      {
  23.162 -        shrink->status = BLKIF_STATUS_VBD_NOT_FOUND;
  23.163 +        shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  23.164          goto out;
  23.165      }
  23.166  
  23.167      if ( unlikely(vbd->extents == NULL) )
  23.168      {
  23.169 -        shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND;
  23.170 +        shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND;
  23.171          goto out;
  23.172      }
  23.173  
  23.174 @@ -185,28 +188,27 @@ void vbd_shrink(blkif_vbd_shrink_t *shri
  23.175      *px = x->next;
  23.176      kfree(x);
  23.177  
  23.178 -    shrink->status = BLKIF_STATUS_OKAY;
  23.179 +    shrink->status = BLKIF_BE_STATUS_OKAY;
  23.180  
  23.181   out:
  23.182      spin_unlock(&blkif->vbd_lock);
  23.183 -    blkif_put(blkif);
  23.184  }
  23.185  
  23.186  
  23.187 -void vbd_destroy(blkif_vbd_destroy_t *destroy) 
  23.188 +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 
  23.189  {
  23.190 -    blkif_t         *blkif;
  23.191 -    vbd_t           *vbd;
  23.192 -    rb_node_t       *rb;
  23.193 -    xen_extent_le_t *x, *t;
  23.194 -    blkif_vdev_t     vdevice = destroy->vdevice;
  23.195 +    blkif_t           *blkif;
  23.196 +    vbd_t             *vbd;
  23.197 +    rb_node_t         *rb;
  23.198 +    blkif_extent_le_t *x, *t;
  23.199 +    blkif_vdev_t       vdevice = destroy->vdevice;
  23.200  
  23.201      blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle);
  23.202      if ( unlikely(blkif == NULL) )
  23.203      {
  23.204 -        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 
  23.205 +        DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,%u)\n", 
  23.206                  destroy->domid, destroy->blkif_handle); 
  23.207 -        destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND;
  23.208 +        destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND;
  23.209          return;
  23.210      }
  23.211  
  23.212 @@ -224,7 +226,7 @@ void vbd_destroy(blkif_vbd_destroy_t *de
  23.213              goto found;
  23.214      }
  23.215  
  23.216 -    destroy->status = BLKIF_STATUS_VBD_NOT_FOUND;
  23.217 +    destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND;
  23.218      goto out;
  23.219  
  23.220   found:
  23.221 @@ -241,7 +243,6 @@ void vbd_destroy(blkif_vbd_destroy_t *de
  23.222      
  23.223   out:
  23.224      spin_unlock(&blkif->vbd_lock);
  23.225 -    blkif_put(blkif);
  23.226  }
  23.227  
  23.228  
  23.229 @@ -249,7 +250,7 @@ void destroy_all_vbds(blkif_t *blkif)
  23.230  {
  23.231      vbd_t *vbd;
  23.232      rb_node_t *rb;
  23.233 -    xen_extent_le_t *x, *t;
  23.234 +    blkif_extent_le_t *x, *t;
  23.235  
  23.236      spin_lock(&blkif->vbd_lock);
  23.237  
  23.238 @@ -273,51 +274,30 @@ void destroy_all_vbds(blkif_t *blkif)
  23.239  }
  23.240  
  23.241  
  23.242 -static int vbd_probe_single(xen_disk_info_t *xdi, 
  23.243 -                            vbd_t *vbd, 
  23.244 -                            struct task_struct *p)
  23.245 +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd)
  23.246  {
  23.247 -    xen_extent_le_t *x; 
  23.248 -    xen_disk_t cur_disk; 
  23.249 -
  23.250 -    if ( xdi->count == xdi->max )
  23.251 -    {
  23.252 -        DPRINTK("vbd_probe_devices: out of space for probe.\n"); 
  23.253 -        return -ENOMEM; 
  23.254 -    }
  23.255 +    blkif_extent_le_t *x; 
  23.256  
  23.257 -    cur_disk.device = vbd->vdevice; 
  23.258 -    cur_disk.info   = vbd->type;
  23.259 -    if ( !VBD_CAN_WRITE(vbd) )
  23.260 -        cur_disk.info |= XD_FLAG_RO; 
  23.261 -    cur_disk.capacity = 0ULL;
  23.262 +    vbd_info->device = vbd->vdevice; 
  23.263 +    vbd_info->info   = vbd->type;
  23.264 +    if ( vbd->readonly )
  23.265 +        vbd_info->info |= VDISK_FLAG_RO; 
  23.266 +    vbd_info->capacity = 0ULL;
  23.267      for ( x = vbd->extents; x != NULL; x = x->next )
  23.268 -        cur_disk.capacity += x->extent.nr_sectors; 
  23.269 -    cur_disk.domain = p->domain; 
  23.270 +        vbd_info->capacity += x->extent.sector_length; 
  23.271          
  23.272 -    /* Now copy into relevant part of user-space buffer */
  23.273 -    if( copy_to_user(&xdi->disks[xdi->count], 
  23.274 -                     &cur_disk, 
  23.275 -                     sizeof(xen_disk_t)) )
  23.276 -    { 
  23.277 -        DPRINTK("vbd_probe_devices: copy_to_user failed\n");
  23.278 -        return -EFAULT;
  23.279 -    } 
  23.280 -        
  23.281 -    xdi->count++; 
  23.282 -
  23.283      return 0;
  23.284  }
  23.285  
  23.286  
  23.287 -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p)
  23.288 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds)
  23.289  {
  23.290 -    int rc = 0;
  23.291 +    int rc = 0, nr_vbds = 0;
  23.292      rb_node_t *rb;
  23.293  
  23.294 -    spin_lock(&p->vbd_lock);
  23.295 +    spin_lock(&blkif->vbd_lock);
  23.296  
  23.297 -    if ( (rb = p->vbd_rb.rb_node) == NULL )
  23.298 +    if ( (rb = blkif->vbd_rb.rb_node) == NULL )
  23.299          goto out;
  23.300  
  23.301   new_subtree:
  23.302 @@ -328,7 +308,10 @@ static int vbd_probe_devices(xen_disk_in
  23.303      for ( ; ; )
  23.304      {
  23.305          /* STEP 2. Dealt with left subtree. Now process current node. */
  23.306 -        if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 )
  23.307 +        if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], 
  23.308 +                                    rb_entry(rb, vbd_t, rb))) != 0 )
  23.309 +            goto out;
  23.310 +        if ( ++nr_vbds == max_vbds )
  23.311              goto out;
  23.312  
  23.313          /* STEP 3. Process right subtree, if any. */
  23.314 @@ -355,146 +338,22 @@ static int vbd_probe_devices(xen_disk_in
  23.315      }
  23.316  
  23.317   out:
  23.318 -    spin_unlock(&p->vbd_lock);
  23.319 -    return rc;  
  23.320 -}
  23.321 -
  23.322 -
  23.323 -/*
  23.324 - * Return information about the VBDs available for a given domain, or for all 
  23.325 - * domains; in the general case the 'domain' argument will be 0 which means 
  23.326 - * "information about the caller"; otherwise the 'domain' argument will 
  23.327 - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 
  23.328 - * these cases require the caller to be privileged.
  23.329 - */
  23.330 -long vbd_probe(vbd_probe_t *probe) 
  23.331 -{
  23.332 -    struct task_struct *p = NULL; 
  23.333 -    unsigned long flags;
  23.334 -    long ret = 0;  
  23.335 -
  23.336 -    if ( probe->domain != 0 )
  23.337 -    { 
  23.338 -        /* We can only probe for ourselves (unless we're privileged). */
  23.339 -        if( (probe->domain != current->domain) && !IS_PRIV(current) )
  23.340 -            return -EPERM; 
  23.341 -
  23.342 -        if ( (probe->domain != VBD_PROBE_ALL) &&
  23.343 -             ((p = find_domain_by_id(probe->domain)) == NULL) )
  23.344 -        {
  23.345 -            DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 
  23.346 -                    probe->domain); 
  23.347 -            return -EINVAL; 
  23.348 -        }
  23.349 -    }
  23.350 -    else
  23.351 -    { 
  23.352 -        /* Default is to probe for ourselves. */
  23.353 -        p = current; 
  23.354 -        get_task_struct(p); /* to mirror final put_task_struct */
  23.355 -    }
  23.356 -
  23.357 -    if ( probe->domain == VBD_PROBE_ALL )
  23.358 -    { 
  23.359 -        read_lock_irqsave(&tasklist_lock, flags);
  23.360 -        for_each_domain ( p )
  23.361 -        {
  23.362 -            if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  23.363 -            { 
  23.364 -                read_unlock_irqrestore(&tasklist_lock, flags);
  23.365 -                goto out; 
  23.366 -            }
  23.367 -        }
  23.368 -        read_unlock_irqrestore(&tasklist_lock, flags);
  23.369 -    } 
  23.370 -    else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 )
  23.371 -        goto out; 
  23.372 -
  23.373 - out: 
  23.374 -    if ( ret != 0 )
  23.375 -        DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 
  23.376 -    if ( p != NULL )
  23.377 -        put_task_struct(p); 
  23.378 -    return ret; 
  23.379 +    spin_unlock(&blkif->vbd_lock);
  23.380 +    return (rc == 0) ? nr_vbds : rc;  
  23.381  }
  23.382  
  23.383  
  23.384 -long vbd_info(vbd_info_t *info) 
  23.385 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation)
  23.386  {
  23.387 -    struct task_struct *p; 
  23.388 -    xen_extent_le_t *x; 
  23.389 -    xen_extent_t *extents; 
  23.390 -    vbd_t *vbd = NULL;
  23.391 -    rb_node_t *rb;
  23.392 -    long ret = 0;  
  23.393 -   
  23.394 -    if ( (info->domain != current->domain) && !IS_PRIV(current) )
  23.395 -        return -EPERM; 
  23.396 -
  23.397 -    if ( (p = find_domain_by_id(info->domain)) == NULL )
  23.398 -    {
  23.399 -        DPRINTK("vbd_info attempted for non-existent domain %llu\n", 
  23.400 -                info->domain); 
  23.401 -        return -EINVAL; 
  23.402 -    }
  23.403 -
  23.404 -    spin_lock(&p->vbd_lock);
  23.405 -
  23.406 -    rb = p->vbd_rb.rb_node;
  23.407 -    while ( rb != NULL )
  23.408 -    {
  23.409 -        vbd = rb_entry(rb, vbd_t, rb);
  23.410 -        if ( info->vdevice < vbd->vdevice )
  23.411 -            rb = rb->rb_left;
  23.412 -        else if ( info->vdevice > vbd->vdevice )
  23.413 -            rb = rb->rb_right;
  23.414 -        else
  23.415 -            break;
  23.416 -    }
  23.417 +    blkif_extent_le_t *x; 
  23.418 +    vbd_t             *vbd;
  23.419 +    rb_node_t         *rb;
  23.420 +    blkif_sector_t     sec_off;
  23.421 +    unsigned long      nr_secs;
  23.422  
  23.423 -    if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) )
  23.424 -    {
  23.425 -        DPRINTK("vbd_info attempted on non-existent VBD.\n"); 
  23.426 -        ret = -EINVAL; 
  23.427 -        goto out; 
  23.428 -    }
  23.429 -
  23.430 -    info->mode     = vbd->mode;
  23.431 -    info->nextents = 0; 
  23.432 +    spin_lock(&blkif->vbd_lock);
  23.433  
  23.434 -    extents = info->extents;
  23.435 -    for ( x = vbd->extents; x != NULL; x = x->next )
  23.436 -    {
  23.437 -        if ( info->nextents == info->maxextents )
  23.438 -            break;
  23.439 -        if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) )
  23.440 -        {
  23.441 -            DPRINTK("vbd_info: copy_to_user failed\n");
  23.442 -            ret = -EFAULT;
  23.443 -            goto out; 
  23.444 -        } 
  23.445 -        extents++;
  23.446 -        info->nextents++;
  23.447 -    }
  23.448 -
  23.449 - out: 
  23.450 -    spin_unlock(&p->vbd_lock);
  23.451 -    put_task_struct(p); 
  23.452 -    return ret; 
  23.453 -}
  23.454 -
  23.455 -
  23.456 -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation)
  23.457 -{
  23.458 -    xen_extent_le_t *x; 
  23.459 -    vbd_t *vbd;
  23.460 -    rb_node_t *rb;
  23.461 -    xen_sector_t sec_off;
  23.462 -    unsigned long nr_secs;
  23.463 -
  23.464 -    spin_lock(&p->vbd_lock);
  23.465 -
  23.466 -    rb = p->vbd_rb.rb_node;
  23.467 +    rb = blkif->vbd_rb.rb_node;
  23.468      while ( rb != NULL )
  23.469      {
  23.470          vbd = rb_entry(rb, vbd_t, rb);
  23.471 @@ -507,42 +366,41 @@ int vbd_translate(phys_seg_t *pseg, stru
  23.472      }
  23.473  
  23.474      DPRINTK("vbd_translate; domain %llu attempted to access "
  23.475 -            "non-existent VBD.\n", p->domain); 
  23.476 +            "non-existent VBD.\n", blkif->domid);
  23.477  
  23.478 -    spin_unlock(&p->vbd_lock);
  23.479 +    spin_unlock(&blkif->vbd_lock);
  23.480      return -ENODEV; 
  23.481  
  23.482   found:
  23.483  
  23.484 -    if ( ((operation == READ) && !VBD_CAN_READ(vbd)) ||
  23.485 -         ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) )
  23.486 +    if ( (operation == WRITE) && vbd->readonly )
  23.487      {
  23.488 -        spin_unlock(&p->vbd_lock);
  23.489 +        spin_unlock(&blkif->vbd_lock);
  23.490          return -EACCES; 
  23.491      }
  23.492  
  23.493      /*
  23.494 -     * Now iterate through the list of xen_extents, working out which should 
  23.495 +     * Now iterate through the list of blkif_extents, working out which should 
  23.496       * be used to perform the translation.
  23.497       */
  23.498      sec_off = pseg->sector_number; 
  23.499      nr_secs = pseg->nr_sects;
  23.500      for ( x = vbd->extents; x != NULL; x = x->next )
  23.501      { 
  23.502 -        if ( sec_off < x->extent.nr_sectors )
  23.503 +        if ( sec_off < x->extent.sector_length )
  23.504          {
  23.505              pseg->dev = x->extent.device; 
  23.506 -            pseg->sector_number = x->extent.start_sector + sec_off;
  23.507 -            if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) )
  23.508 +            pseg->sector_number = x->extent.sector_start + sec_off;
  23.509 +            if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) )
  23.510                  goto overrun;
  23.511              spin_unlock(&p->vbd_lock);
  23.512              return 1;
  23.513          } 
  23.514 -        sec_off -= x->extent.nr_sectors; 
  23.515 +        sec_off -= x->extent.sector_length; 
  23.516      }
  23.517  
  23.518      DPRINTK("vbd_translate: end of vbd.\n");
  23.519 -    spin_unlock(&p->vbd_lock);
  23.520 +    spin_unlock(&blkif->vbd_lock);
  23.521      return -EACCES; 
  23.522  
  23.523      /*
  23.524 @@ -554,7 +412,7 @@ int vbd_translate(phys_seg_t *pseg, stru
  23.525   overrun:
  23.526  
  23.527      /* Adjust length of first chunk to run to end of first extent. */
  23.528 -    pseg[0].nr_sects = x->extent.nr_sectors - sec_off;
  23.529 +    pseg[0].nr_sects = x->extent.sector_length - sec_off;
  23.530  
  23.531      /* Set second chunk buffer and length to start where first chunk ended. */
  23.532      pseg[1].buffer   = pseg[0].buffer + (pseg[0].nr_sects << 9);
  23.533 @@ -562,7 +420,7 @@ int vbd_translate(phys_seg_t *pseg, stru
  23.534  
  23.535      /* Now move to the next extent. Check it exists and is long enough! */
  23.536      if ( unlikely((x = x->next) == NULL) || 
  23.537 -         unlikely(x->extent.nr_sectors < pseg[1].nr_sects) )
  23.538 +         unlikely(x->extent.sector_length < pseg[1].nr_sects) )
  23.539      {
  23.540          DPRINTK("vbd_translate: multiple overruns or end of vbd.\n");
  23.541          spin_unlock(&p->vbd_lock);
  23.542 @@ -571,8 +429,8 @@ int vbd_translate(phys_seg_t *pseg, stru
  23.543  
  23.544      /* Store the real device and start sector for the second chunk. */
  23.545      pseg[1].dev           = x->extent.device;
  23.546 -    pseg[1].sector_number = x->extent.start_sector;
  23.547 +    pseg[1].sector_number = x->extent.sector_start;
  23.548      
  23.549 -    spin_unlock(&p->vbd_lock);
  23.550 +    spin_unlock(&blkif->vbd_lock);
  23.551      return 2;
  23.552  }
    24.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	Sun May 02 00:27:58 2004 +0000
    24.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h	Sun May 02 00:52:45 2004 +0000
    24.3 @@ -16,24 +16,27 @@
    24.4  #define BLKIF_OP_WRITE     1
    24.5  #define BLKIF_OP_PROBE     2
    24.6  
    24.7 -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */
    24.8 +/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */
    24.9  #define BLKIF_RING_SIZE        64
   24.10  
   24.11  /*
   24.12   * Maximum scatter/gather segments per request.
   24.13 - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE.
   24.14 + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
   24.15   * NB. This could be 12 if the ring indexes weren't stored in the same page.
   24.16   */
   24.17 -#define BLKIF_REQUEST_MAX_SEGMENTS 11
   24.18 +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
   24.19 +
   24.20 +#define BLKIF_MAX_SECTORS_PER_SEGMENT  16
   24.21  
   24.22  typedef struct {
   24.23      unsigned char  operation;        /* BLKIF_OP_???                         */
   24.24 -    unsigned char  nr_segments;      /* number of segments (<= MAX_BLK_SEGS) */
   24.25 +    unsigned char  nr_segments;      /* number of segments                   */
   24.26      blkif_vdev_t   device;           /* only for read/write requests         */
   24.27      unsigned long  id;               /* private guest value, echoed in resp  */
   24.28 -    xen_sector_t   sector_number;    /* start sector idx on disk (r/w only)  */
   24.29 -    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.      */
   24.30 -    unsigned long  buffer_and_sects[MAX_BLK_SEGS];
   24.31 +    blkif_sector_t sector_number;    /* start sector idx on disk (r/w only)  */
   24.32 +    /* Least 9 bits is 'nr_sects'. High 23 bits is the address.       */
   24.33 +    /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */
   24.34 +    unsigned long  buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST];
   24.35  } blkif_request_t;
   24.36  
   24.37  typedef struct {
   24.38 @@ -59,8 +62,8 @@ typedef unsigned int BLKIF_RING_IDX;
   24.39  #define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1))
   24.40  
   24.41  typedef struct {
   24.42 -    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by guest OS. */
   24.43 -    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen.     */
   24.44 +    BLKIF_RING_IDX req_prod;  /* Request producer. Updated by front-end. */
   24.45 +    BLKIF_RING_IDX resp_prod; /* Response producer. Updated by back-end. */
   24.46      union {
   24.47          blkif_request_t  req;
   24.48          blkif_response_t resp;
   24.49 @@ -103,7 +106,7 @@ typedef struct {
   24.50  typedef struct {
   24.51      blkif_vdev_t   device;       /* Device number (opaque 16 bit value). */
   24.52      unsigned short info;         /* Device type and flags (VDISK_*).     */
   24.53 -    xen_sector_t   capacity;     /* Size in terms of 512-byte sectors.   */
   24.54 +    blkif_sector_t capacity;     /* Size in terms of 512-byte sectors.   */
   24.55  } vdisk_t;
   24.56  
   24.57  #endif /* __SHARED_BLKIF_H__ */
    25.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	Sun May 02 00:27:58 2004 +0000
    25.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile	Sun May 02 00:52:45 2004 +0000
    25.3 @@ -1,3 +1,3 @@
    25.4  O_TARGET := drv.o
    25.5 -obj-y := block.o vbd.o
    25.6 +obj-y := main.o vbd.o
    25.7  include $(TOPDIR)/Rules.make
    26.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c	Sun May 02 00:27:58 2004 +0000
    26.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    26.3 @@ -1,625 +0,0 @@
    26.4 -/******************************************************************************
    26.5 - * block.c
    26.6 - * 
    26.7 - * Xenolinux virtual block-device driver.
    26.8 - * 
    26.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   26.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   26.11 - */
   26.12 -
   26.13 -#include "block.h"
   26.14 -#include <linux/blk.h>
   26.15 -#include <linux/cdrom.h>
   26.16 -#include <linux/tqueue.h>
   26.17 -#include <linux/sched.h>
   26.18 -#include <scsi/scsi.h>
   26.19 -
   26.20 -#include <linux/interrupt.h>
   26.21 -
   26.22 -typedef unsigned char byte; /* from linux/ide.h */
   26.23 -
   26.24 -#define STATE_ACTIVE    0
   26.25 -#define STATE_SUSPENDED 1
   26.26 -#define STATE_CLOSED    2
   26.27 -static unsigned int state = STATE_SUSPENDED;
   26.28 -
   26.29 -/* Dynamically-mapped IRQs. */
   26.30 -static int xlblk_response_irq, xlblk_update_irq;
   26.31 -
   26.32 -static blk_ring_t *blk_ring;
   26.33 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   26.34 -static BLK_RING_IDX req_prod;  /* Private request producer.         */
   26.35 -
   26.36 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   26.37 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   26.38 -                      (state != STATE_ACTIVE))
   26.39 -
   26.40 -
   26.41 -/*
   26.42 - * Request queues with outstanding work, but ring is currently full.
   26.43 - * We need no special lock here, as we always access this with the
   26.44 - * io_request_lock held. We only need a small maximum list.
   26.45 - */
   26.46 -#define MAX_PENDING 8
   26.47 -static request_queue_t *pending_queues[MAX_PENDING];
   26.48 -static int nr_pending;
   26.49 -
   26.50 -static kdev_t        sg_dev;
   26.51 -static int           sg_operation = -1;
   26.52 -static unsigned long sg_next_sect;
   26.53 -#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   26.54 -
   26.55 -static inline void signal_requests_to_xen(void)
   26.56 -{
   26.57 -    block_io_op_t op; 
   26.58 -
   26.59 -    DISABLE_SCATTERGATHER();
   26.60 -    blk_ring->req_prod = req_prod;
   26.61 -
   26.62 -    op.cmd = BLOCK_IO_OP_SIGNAL; 
   26.63 -    HYPERVISOR_block_io_op(&op);
   26.64 -    return;
   26.65 -}
   26.66 -
   26.67 -
   26.68 -/*
   26.69 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   26.70 - * 
   26.71 - * Schedule a task for keventd to run, which will update the VBDs and perform 
   26.72 - * the corresponding updates to our view of VBD state, so the XenoLinux will 
   26.73 - * respond to changes / additions / deletions to the set of VBDs automatically.
   26.74 - */
   26.75 -static struct tq_struct update_tq;
   26.76 -static void update_vbds_task(void *unused)
   26.77 -{ 
   26.78 -    xlvbd_update_vbds();
   26.79 -}
   26.80 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   26.81 -{
   26.82 -    update_tq.routine = update_vbds_task;
   26.83 -    schedule_task(&update_tq);
   26.84 -}
   26.85 -
   26.86 -
   26.87 -int xen_block_open(struct inode *inode, struct file *filep)
   26.88 -{
   26.89 -    short xldev = inode->i_rdev; 
   26.90 -    struct gendisk *gd = get_gendisk(xldev);
   26.91 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   26.92 -    short minor = MINOR(xldev); 
   26.93 -
   26.94 -    if ( gd->part[minor].nr_sects == 0 )
   26.95 -    { 
   26.96 -        /*
   26.97 -         * Device either doesn't exist, or has zero capacity; we use a few
   26.98 -         * cheesy heuristics to return the relevant error code
   26.99 -         */
  26.100 -        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  26.101 -             ((minor & (gd->max_p - 1)) != 0) )
  26.102 -        { 
  26.103 -            /*
  26.104 -             * We have a real device, but no such partition, or we just have a
  26.105 -             * partition number so guess this is the problem.
  26.106 -             */
  26.107 -            return -ENXIO;     /* no such device or address */
  26.108 -        }
  26.109 -        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  26.110 -        {
  26.111 -            /* This is a removable device => assume that media is missing. */ 
  26.112 -            return -ENOMEDIUM; /* media not present (this is a guess) */
  26.113 -        } 
  26.114 -        else
  26.115 -        { 
  26.116 -            /* Just go for the general 'no such device' error. */
  26.117 -            return -ENODEV;    /* no such device */
  26.118 -        }
  26.119 -    }
  26.120 -    
  26.121 -    /* Update of usage count is protected by per-device semaphore. */
  26.122 -    disk->usage++;
  26.123 -
  26.124 -    return 0;
  26.125 -}
  26.126 -
  26.127 -
  26.128 -int xen_block_release(struct inode *inode, struct file *filep)
  26.129 -{
  26.130 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  26.131 -
  26.132 -    /*
  26.133 -     * When usage drops to zero it may allow more VBD updates to occur.
  26.134 -     * Update of usage count is protected by a per-device semaphore.
  26.135 -     */
  26.136 -    if ( --disk->usage == 0 )
  26.137 -    {
  26.138 -        update_tq.routine = update_vbds_task;
  26.139 -        schedule_task(&update_tq);
  26.140 -    }
  26.141 -
  26.142 -    return 0;
  26.143 -}
  26.144 -
  26.145 -
  26.146 -int xen_block_ioctl(struct inode *inode, struct file *filep,
  26.147 -                          unsigned command, unsigned long argument)
  26.148 -{
  26.149 -    kdev_t dev = inode->i_rdev;
  26.150 -    struct hd_geometry *geo = (struct hd_geometry *)argument;
  26.151 -    struct gendisk *gd;     
  26.152 -    struct hd_struct *part; 
  26.153 -    int i;
  26.154 -
  26.155 -    /* NB. No need to check permissions. That is done for us. */
  26.156 -    
  26.157 -    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  26.158 -                  command, (long) argument, dev); 
  26.159 -  
  26.160 -    gd = get_gendisk(dev);
  26.161 -    part = &gd->part[MINOR(dev)]; 
  26.162 -
  26.163 -    switch ( command )
  26.164 -    {
  26.165 -    case BLKGETSIZE:
  26.166 -        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  26.167 -        return put_user(part->nr_sects, (unsigned long *) argument);
  26.168 -
  26.169 -    case BLKGETSIZE64:
  26.170 -        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  26.171 -                      (u64)part->nr_sects * 512);
  26.172 -        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  26.173 -
  26.174 -    case BLKRRPART:                               /* re-read partition table */
  26.175 -        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  26.176 -        return xen_block_revalidate(dev);
  26.177 -
  26.178 -    case BLKSSZGET:
  26.179 -        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  26.180 -
  26.181 -    case BLKBSZGET:                                        /* get block size */
  26.182 -        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  26.183 -        break;
  26.184 -
  26.185 -    case BLKBSZSET:                                        /* set block size */
  26.186 -        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  26.187 -        break;
  26.188 -
  26.189 -    case BLKRASET:                                         /* set read-ahead */
  26.190 -        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  26.191 -        break;
  26.192 -
  26.193 -    case BLKRAGET:                                         /* get read-ahead */
  26.194 -        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  26.195 -        break;
  26.196 -
  26.197 -    case HDIO_GETGEO:
  26.198 -        /* note: these values are complete garbage */
  26.199 -        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  26.200 -        if (!argument) return -EINVAL;
  26.201 -        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  26.202 -        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  26.203 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  26.204 -        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  26.205 -        return 0;
  26.206 -
  26.207 -    case HDIO_GETGEO_BIG: 
  26.208 -        /* note: these values are complete garbage */
  26.209 -        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  26.210 -        if (!argument) return -EINVAL;
  26.211 -        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  26.212 -        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  26.213 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  26.214 -        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  26.215 -        return 0;
  26.216 -
  26.217 -    case CDROMMULTISESSION:
  26.218 -        DPRINTK("FIXME: support multisession CDs later\n");
  26.219 -        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  26.220 -            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  26.221 -        return 0;
  26.222 -
  26.223 -    case SCSI_IOCTL_GET_BUS_NUMBER:
  26.224 -        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  26.225 -        return -ENOSYS;
  26.226 -
  26.227 -    default:
  26.228 -        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  26.229 -        return -ENOSYS;
  26.230 -    }
  26.231 -    
  26.232 -    return 0;
  26.233 -}
  26.234 -
  26.235 -/* check media change: should probably do something here in some cases :-) */
  26.236 -int xen_block_check(kdev_t dev)
  26.237 -{
  26.238 -    DPRINTK("xen_block_check\n");
  26.239 -    return 0;
  26.240 -}
  26.241 -
  26.242 -int xen_block_revalidate(kdev_t dev)
  26.243 -{
  26.244 -    struct block_device *bd;
  26.245 -    struct gendisk *gd;
  26.246 -    xl_disk_t *disk;
  26.247 -    unsigned long capacity;
  26.248 -    int i, rc = 0;
  26.249 -    
  26.250 -    if ( (bd = bdget(dev)) == NULL )
  26.251 -        return -EINVAL;
  26.252 -
  26.253 -    /*
  26.254 -     * Update of partition info, and check of usage count, is protected
  26.255 -     * by the per-block-device semaphore.
  26.256 -     */
  26.257 -    down(&bd->bd_sem);
  26.258 -
  26.259 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
  26.260 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  26.261 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  26.262 -    {
  26.263 -        rc = -EINVAL;
  26.264 -        goto out;
  26.265 -    }
  26.266 -
  26.267 -    if ( disk->usage > 1 )
  26.268 -    {
  26.269 -        rc = -EBUSY;
  26.270 -        goto out;
  26.271 -    }
  26.272 -
  26.273 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
  26.274 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  26.275 -    {
  26.276 -        for ( i = gd->max_p - 1; i >= 0; i-- )
  26.277 -        {
  26.278 -            invalidate_device(dev+i, 1);
  26.279 -            gd->part[MINOR(dev+i)].start_sect = 0;
  26.280 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
  26.281 -            gd->sizes[MINOR(dev+i)]           = 0;
  26.282 -        }
  26.283 -
  26.284 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  26.285 -    }
  26.286 -
  26.287 - out:
  26.288 -    up(&bd->bd_sem);
  26.289 -    bdput(bd);
  26.290 -    return rc;
  26.291 -}
  26.292 -
  26.293 -
  26.294 -/*
  26.295 - * hypervisor_request
  26.296 - *
  26.297 - * request block io 
  26.298 - * 
  26.299 - * id: for guest use only.
  26.300 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  26.301 - * buffer: buffer to read/write into. this should be a
  26.302 - *   virtual address in the guest os.
  26.303 - */
  26.304 -static int hypervisor_request(unsigned long   id,
  26.305 -                              int             operation,
  26.306 -                              char *          buffer,
  26.307 -                              unsigned long   sector_number,
  26.308 -                              unsigned short  nr_sectors,
  26.309 -                              kdev_t          device)
  26.310 -{
  26.311 -    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  26.312 -    struct gendisk *gd;
  26.313 -    blk_ring_req_entry_t *req;
  26.314 -    struct buffer_head *bh;
  26.315 -
  26.316 -    if ( unlikely(nr_sectors >= (1<<9)) )
  26.317 -        BUG();
  26.318 -    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  26.319 -        BUG();
  26.320 -
  26.321 -    if ( unlikely(state == STATE_CLOSED) )
  26.322 -        return 1;
  26.323 -
  26.324 -    switch ( operation )
  26.325 -    {
  26.326 -
  26.327 -    case XEN_BLOCK_READ:
  26.328 -    case XEN_BLOCK_WRITE:
  26.329 -        gd = get_gendisk(device); 
  26.330 -
  26.331 -        /*
  26.332 -         * Update the sector_number we'll pass down as appropriate; note that
  26.333 -         * we could sanity check that resulting sector will be in this
  26.334 -         * partition, but this will happen in xen anyhow.
  26.335 -         */
  26.336 -        sector_number += gd->part[MINOR(device)].start_sect;
  26.337 -
  26.338 -        /*
  26.339 -         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  26.340 -         * partitions then we clear the partn bits from the device number.
  26.341 -         */
  26.342 -        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  26.343 -               GENHD_FL_VIRT_PARTNS) )
  26.344 -            device &= ~(gd->max_p - 1);
  26.345 -
  26.346 -        if ( (sg_operation == operation) &&
  26.347 -             (sg_dev == device) &&
  26.348 -             (sg_next_sect == sector_number) )
  26.349 -        {
  26.350 -            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  26.351 -            bh = (struct buffer_head *)id;
  26.352 -            bh->b_reqnext = (struct buffer_head *)req->id;
  26.353 -            req->id = id;
  26.354 -            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  26.355 -            if ( ++req->nr_segments < MAX_BLK_SEGS )
  26.356 -                sg_next_sect += nr_sectors;
  26.357 -            else
  26.358 -                DISABLE_SCATTERGATHER();
  26.359 -            return 0;
  26.360 -        }
  26.361 -        else if ( RING_PLUGGED )
  26.362 -        {
  26.363 -            return 1;
  26.364 -        }
  26.365 -        else
  26.366 -        {
  26.367 -            sg_operation = operation;
  26.368 -            sg_dev       = device;
  26.369 -            sg_next_sect = sector_number + nr_sectors;
  26.370 -        }
  26.371 -        break;
  26.372 -
  26.373 -    default:
  26.374 -        panic("unknown op %d\n", operation);
  26.375 -    }
  26.376 -
  26.377 -    /* Fill out a communications ring structure. */
  26.378 -    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  26.379 -    req->id            = id;
  26.380 -    req->operation     = operation;
  26.381 -    req->sector_number = (xen_sector_t)sector_number;
  26.382 -    req->device        = device; 
  26.383 -    req->nr_segments   = 1;
  26.384 -    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  26.385 -    req_prod++;
  26.386 -
  26.387 -    return 0;
  26.388 -}
  26.389 -
  26.390 -
  26.391 -/*
  26.392 - * do_xlblk_request
  26.393 - *  read a block; request is in a request queue
  26.394 - */
  26.395 -void do_xlblk_request(request_queue_t *rq)
  26.396 -{
  26.397 -    struct request *req;
  26.398 -    struct buffer_head *bh, *next_bh;
  26.399 -    int rw, nsect, full, queued = 0;
  26.400 -
  26.401 -    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  26.402 -
  26.403 -    while ( !rq->plugged && !list_empty(&rq->queue_head))
  26.404 -    {
  26.405 -        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  26.406 -            goto out;
  26.407 -  
  26.408 -        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  26.409 -                req, req->cmd, req->sector,
  26.410 -                req->current_nr_sectors, req->nr_sectors, req->bh);
  26.411 -
  26.412 -        rw = req->cmd;
  26.413 -        if ( rw == READA )
  26.414 -            rw = READ;
  26.415 -        if ( unlikely((rw != READ) && (rw != WRITE)) )
  26.416 -            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  26.417 -
  26.418 -        req->errors = 0;
  26.419 -
  26.420 -        bh = req->bh;
  26.421 -        while ( bh != NULL )
  26.422 -        {
  26.423 -            next_bh = bh->b_reqnext;
  26.424 -            bh->b_reqnext = NULL;
  26.425 -
  26.426 -            full = hypervisor_request(
  26.427 -                (unsigned long)bh,
  26.428 -                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  26.429 -                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  26.430 -
  26.431 -            if ( full )
  26.432 -            { 
  26.433 -                bh->b_reqnext = next_bh;
  26.434 -                pending_queues[nr_pending++] = rq;
  26.435 -                if ( unlikely(nr_pending >= MAX_PENDING) )
  26.436 -                    BUG();
  26.437 -                goto out; 
  26.438 -            }
  26.439 -
  26.440 -            queued++;
  26.441 -
  26.442 -            /* Dequeue the buffer head from the request. */
  26.443 -            nsect = bh->b_size >> 9;
  26.444 -            bh = req->bh = next_bh;
  26.445 -            
  26.446 -            if ( bh != NULL )
  26.447 -            {
  26.448 -                /* There's another buffer head to do. Update the request. */
  26.449 -                req->hard_sector += nsect;
  26.450 -                req->hard_nr_sectors -= nsect;
  26.451 -                req->sector = req->hard_sector;
  26.452 -                req->nr_sectors = req->hard_nr_sectors;
  26.453 -                req->current_nr_sectors = bh->b_size >> 9;
  26.454 -                req->buffer = bh->b_data;
  26.455 -            }
  26.456 -            else
  26.457 -            {
  26.458 -                /* That was the last buffer head. Finalise the request. */
  26.459 -                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  26.460 -                    BUG();
  26.461 -                blkdev_dequeue_request(req);
  26.462 -                end_that_request_last(req);
  26.463 -            }
  26.464 -        }
  26.465 -    }
  26.466 -
  26.467 - out:
  26.468 -    if ( queued != 0 ) signal_requests_to_xen();
  26.469 -}
  26.470 -
  26.471 -
  26.472 -static void kick_pending_request_queues(void)
  26.473 -{
  26.474 -    /* We kick pending request queues if the ring is reasonably empty. */
  26.475 -    if ( (nr_pending != 0) && 
  26.476 -         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  26.477 -    {
  26.478 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
  26.479 -        while ( (nr_pending != 0) && !RING_PLUGGED )
  26.480 -            do_xlblk_request(pending_queues[--nr_pending]);
  26.481 -    }
  26.482 -}
  26.483 -
  26.484 -
  26.485 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  26.486 -{
  26.487 -    BLK_RING_IDX i; 
  26.488 -    unsigned long flags; 
  26.489 -    struct buffer_head *bh, *next_bh;
  26.490 -    
  26.491 -    if ( unlikely(state == STATE_CLOSED) )
  26.492 -        return;
  26.493 -    
  26.494 -    spin_lock_irqsave(&io_request_lock, flags);     
  26.495 -
  26.496 -    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  26.497 -    {
  26.498 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  26.499 -        switch ( bret->operation )
  26.500 -        {
  26.501 -        case XEN_BLOCK_READ:
  26.502 -        case XEN_BLOCK_WRITE:
  26.503 -            if ( unlikely(bret->status != 0) )
  26.504 -                DPRINTK("Bad return from blkdev data request: %lx\n",
  26.505 -                        bret->status);
  26.506 -            for ( bh = (struct buffer_head *)bret->id; 
  26.507 -                  bh != NULL; 
  26.508 -                  bh = next_bh )
  26.509 -            {
  26.510 -                next_bh = bh->b_reqnext;
  26.511 -                bh->b_reqnext = NULL;
  26.512 -                bh->b_end_io(bh, !bret->status);
  26.513 -            }
  26.514 -            break;
  26.515 -     
  26.516 -        default:
  26.517 -            BUG();
  26.518 -        }
  26.519 -    }
  26.520 -    
  26.521 -    resp_cons = i;
  26.522 -
  26.523 -    kick_pending_request_queues();
  26.524 -
  26.525 -    spin_unlock_irqrestore(&io_request_lock, flags);
  26.526 -}
  26.527 -
  26.528 -
  26.529 -static void reset_xlblk_interface(void)
  26.530 -{
  26.531 -    block_io_op_t op; 
  26.532 -
  26.533 -    nr_pending = 0;
  26.534 -
  26.535 -    op.cmd = BLOCK_IO_OP_RESET;
  26.536 -    if ( HYPERVISOR_block_io_op(&op) != 0 )
  26.537 -        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  26.538 -
  26.539 -    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  26.540 -    (void)HYPERVISOR_block_io_op(&op);
  26.541 -
  26.542 -    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  26.543 -    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  26.544 -    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  26.545 -
  26.546 -    wmb();
  26.547 -    state = STATE_ACTIVE;
  26.548 -}
  26.549 -
  26.550 -
  26.551 -int __init xlblk_init(void)
  26.552 -{
  26.553 -    int error; 
  26.554 -
  26.555 -    reset_xlblk_interface();
  26.556 -
  26.557 -    xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV);
  26.558 -    xlblk_update_irq   = bind_virq_to_irq(VIRQ_VBD_UPD);
  26.559 -
  26.560 -    error = request_irq(xlblk_response_irq, xlblk_response_int, 
  26.561 -                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  26.562 -    if ( error )
  26.563 -    {
  26.564 -        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  26.565 -        goto fail;
  26.566 -    }
  26.567 -
  26.568 -    error = request_irq(xlblk_update_irq, xlblk_update_int,
  26.569 -                        0, "blkdev", NULL);
  26.570 -
  26.571 -    if ( error )
  26.572 -    {
  26.573 -        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  26.574 -        goto fail;
  26.575 -    }
  26.576 -
  26.577 -    (void)xlvbd_init();
  26.578 -
  26.579 -    return 0;
  26.580 -
  26.581 - fail:
  26.582 -    return error;
  26.583 -}
  26.584 -
  26.585 -
  26.586 -static void __exit xlblk_cleanup(void)
  26.587 -{
  26.588 -    xlvbd_cleanup();
  26.589 -    free_irq(xlblk_response_irq, NULL);
  26.590 -    free_irq(xlblk_update_irq, NULL);
  26.591 -    unbind_virq_from_irq(VIRQ_BLKDEV);
  26.592 -    unbind_virq_from_irq(VIRQ_VBD_UPD);
  26.593 -}
  26.594 -
  26.595 -
  26.596 -#ifdef MODULE
  26.597 -module_init(xlblk_init);
  26.598 -module_exit(xlblk_cleanup);
  26.599 -#endif
  26.600 -
  26.601 -
  26.602 -void blkdev_suspend(void)
  26.603 -{
  26.604 -    state = STATE_SUSPENDED;
  26.605 -    wmb();
  26.606 -
  26.607 -    while ( resp_cons != blk_ring->req_prod )
  26.608 -    {
  26.609 -        barrier();
  26.610 -        current->state = TASK_INTERRUPTIBLE;
  26.611 -        schedule_timeout(1);
  26.612 -    }
  26.613 -
  26.614 -    wmb();
  26.615 -    state = STATE_CLOSED;
  26.616 -    wmb();
  26.617 -
  26.618 -    clear_fixmap(FIX_BLKRING_BASE);
  26.619 -}
  26.620 -
  26.621 -
  26.622 -void blkdev_resume(void)
  26.623 -{
  26.624 -    reset_xlblk_interface();
  26.625 -    spin_lock_irq(&io_request_lock);
  26.626 -    kick_pending_request_queues();
  26.627 -    spin_unlock_irq(&io_request_lock);
  26.628 -}
    27.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h	Sun May 02 00:27:58 2004 +0000
    27.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.3 @@ -1,82 +0,0 @@
    27.4 -/******************************************************************************
    27.5 - * block.h
    27.6 - * 
    27.7 - * Shared definitions between all levels of XenoLinux Virtual block devices.
    27.8 - */
    27.9 -
   27.10 -#ifndef __XEN_DRIVERS_BLOCK_H__
   27.11 -#define __XEN_DRIVERS_BLOCK_H__
   27.12 -
   27.13 -#include <linux/config.h>
   27.14 -#include <linux/module.h>
   27.15 -
   27.16 -#include <linux/kernel.h>
   27.17 -#include <linux/sched.h>
   27.18 -#include <linux/slab.h>
   27.19 -#include <linux/string.h>
   27.20 -#include <linux/errno.h>
   27.21 -
   27.22 -#include <linux/fs.h>
   27.23 -#include <linux/hdreg.h>
   27.24 -#include <linux/blkdev.h>
   27.25 -#include <linux/major.h>
   27.26 -
   27.27 -#include <asm/hypervisor-ifs/hypervisor-if.h>
   27.28 -#include <asm/hypervisor-ifs/vbd.h>
   27.29 -#include <asm/io.h>
   27.30 -#include <asm/atomic.h>
   27.31 -#include <asm/uaccess.h>
   27.32 -
   27.33 -#if 0
   27.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   27.35 -#else
   27.36 -#define DPRINTK(_f, _a...) ((void)0)
   27.37 -#endif
   27.38 -
   27.39 -#if 0
   27.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   27.41 -#else
   27.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   27.43 -#endif
   27.44 -
   27.45 -/* Private gendisk->flags[] values. */
   27.46 -#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   27.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   27.48 -
   27.49 -/*
   27.50 - * We have one of these per vbd, whether ide, scsi or 'other'.
   27.51 - * They hang in an array off the gendisk structure. We may end up putting
   27.52 - * all kinds of interesting stuff here :-)
   27.53 - */
   27.54 -typedef struct xl_disk {
   27.55 -    int usage;
   27.56 -} xl_disk_t;
   27.57 -
   27.58 -extern int xen_control_msg(int operration, char *buffer, int size);
   27.59 -extern int xen_block_open(struct inode *inode, struct file *filep);
   27.60 -extern int xen_block_release(struct inode *inode, struct file *filep);
   27.61 -extern int xen_block_ioctl(struct inode *inode, struct file *filep,
   27.62 -                                 unsigned command, unsigned long argument);
   27.63 -extern int xen_block_check(kdev_t dev);
   27.64 -extern int xen_block_revalidate(kdev_t dev);
   27.65 -extern void do_xlblk_request (request_queue_t *rq); 
   27.66 -
   27.67 -extern void xlvbd_update_vbds(void);
   27.68 -
   27.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   27.70 -{
   27.71 -    struct gendisk *gd = get_gendisk(xldev);
   27.72 -    
   27.73 -    if ( gd == NULL ) 
   27.74 -        return NULL;
   27.75 -    
   27.76 -    return (xl_disk_t *)gd->real_devices + 
   27.77 -        (MINOR(xldev) >> gd->minor_shift);
   27.78 -}
   27.79 -
   27.80 -
   27.81 -/* Virtual block-device subsystem. */
   27.82 -extern int  xlvbd_init(void);
   27.83 -extern void xlvbd_cleanup(void); 
   27.84 -
   27.85 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
    28.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    28.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h	Sun May 02 00:52:45 2004 +0000
    28.3 @@ -0,0 +1,84 @@
    28.4 +/******************************************************************************
    28.5 + * arch/xen/drivers/blkif/frontend/common.h
    28.6 + * 
    28.7 + * Shared definitions between all levels of XenoLinux Virtual block devices.
    28.8 + */
    28.9 +
   28.10 +#ifndef __XEN_DRIVERS_COMMON_H__
   28.11 +#define __XEN_DRIVERS_COMMON_H__
   28.12 +
   28.13 +#include <linux/config.h>
   28.14 +#include <linux/module.h>
   28.15 +
   28.16 +#include <linux/kernel.h>
   28.17 +#include <linux/sched.h>
   28.18 +#include <linux/slab.h>
   28.19 +#include <linux/string.h>
   28.20 +#include <linux/errno.h>
   28.21 +
   28.22 +#include <linux/fs.h>
   28.23 +#include <linux/hdreg.h>
   28.24 +#include <linux/blkdev.h>
   28.25 +#include <linux/major.h>
   28.26 +
   28.27 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   28.28 +#include <asm/hypervisor-ifs/vbd.h>
   28.29 +#include <asm/io.h>
   28.30 +#include <asm/atomic.h>
   28.31 +#include <asm/uaccess.h>
   28.32 +
   28.33 +#include "../blkif.h"
   28.34 +
   28.35 +#if 0
   28.36 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   28.37 +#else
   28.38 +#define DPRINTK(_f, _a...) ((void)0)
   28.39 +#endif
   28.40 +
   28.41 +#if 0
   28.42 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   28.43 +#else
   28.44 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   28.45 +#endif
   28.46 +
   28.47 +/* Private gendisk->flags[] values. */
   28.48 +#define GENHD_FL_XEN        2 /* Is unit a Xen block device?  */
   28.49 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   28.50 +
   28.51 +/*
   28.52 + * We have one of these per vbd, whether ide, scsi or 'other'.
   28.53 + * They hang in an array off the gendisk structure. We may end up putting
   28.54 + * all kinds of interesting stuff here :-)
   28.55 + */
   28.56 +typedef struct xl_disk {
   28.57 +    int usage;
   28.58 +} xl_disk_t;
   28.59 +
   28.60 +extern int blkif_open(struct inode *inode, struct file *filep);
   28.61 +extern int blkif_release(struct inode *inode, struct file *filep);
   28.62 +extern int blkif_ioctl(struct inode *inode, struct file *filep,
   28.63 +                                 unsigned command, unsigned long argument);
   28.64 +extern int blkif_check(kdev_t dev);
   28.65 +extern int blkif_revalidate(kdev_t dev);
   28.66 +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp);
   28.67 +extern void do_blkif_request (request_queue_t *rq); 
   28.68 +
   28.69 +extern void xlvbd_update_vbds(void);
   28.70 +
   28.71 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   28.72 +{
   28.73 +    struct gendisk *gd = get_gendisk(xldev);
   28.74 +    
   28.75 +    if ( gd == NULL ) 
   28.76 +        return NULL;
   28.77 +    
   28.78 +    return (xl_disk_t *)gd->real_devices + 
   28.79 +        (MINOR(xldev) >> gd->minor_shift);
   28.80 +}
   28.81 +
   28.82 +
   28.83 +/* Virtual block-device subsystem. */
   28.84 +extern int  xlvbd_init(void);
   28.85 +extern void xlvbd_cleanup(void); 
   28.86 +
   28.87 +#endif /* __XEN_DRIVERS_COMMON_H__ */
    29.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    29.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c	Sun May 02 00:52:45 2004 +0000
    29.3 @@ -0,0 +1,727 @@
    29.4 +/******************************************************************************
    29.5 + * arch/xen/drivers/blkif/frontend/main.c
    29.6 + * 
    29.7 + * Xenolinux virtual block-device driver.
    29.8 + * 
    29.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   29.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   29.11 + */
   29.12 +
   29.13 +#include "common.h"
   29.14 +#include <linux/blk.h>
   29.15 +#include <linux/cdrom.h>
   29.16 +#include <linux/tqueue.h>
   29.17 +#include <linux/sched.h>
   29.18 +#include <linux/interrupt.h>
   29.19 +#include <scsi/scsi.h>
   29.20 +#include <asm/ctrl_if.h>
   29.21 +
   29.22 +typedef unsigned char byte; /* from linux/ide.h */
   29.23 +
   29.24 +#define BLKIF_STATE_CLOSED       0
   29.25 +#define BLKIF_STATE_DISCONNECTED 1
   29.26 +#define BLKIF_STATE_CONNECTED    2
   29.27 +static unsigned int blkif_state = BLKIF_STATE_CLOSED;
   29.28 +static unsigned int blkif_evtchn, blkif_irq;
   29.29 +
   29.30 +static struct tq_struct blkif_statechange_tq;
   29.31 +
   29.32 +static int blkif_control_rsp_valid;
   29.33 +static blkif_response_t blkif_control_rsp;
   29.34 +
   29.35 +static blkif_ring_t *blk_ring;
   29.36 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   29.37 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
   29.38 +
   29.39 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   29.40 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   29.41 +                      (blkif_state != BLKIF_STATE_CONNECTED))
   29.42 +
   29.43 +
   29.44 +/*
   29.45 + * Request queues with outstanding work, but ring is currently full.
   29.46 + * We need no special lock here, as we always access this with the
   29.47 + * io_request_lock held. We only need a small maximum list.
   29.48 + */
   29.49 +#define MAX_PENDING 8
   29.50 +static request_queue_t *pending_queues[MAX_PENDING];
   29.51 +static int nr_pending;
   29.52 +
   29.53 +static kdev_t        sg_dev;
   29.54 +static int           sg_operation = -1;
   29.55 +static unsigned long sg_next_sect;
   29.56 +#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   29.57 +
   29.58 +static inline void flush_requests(void)
   29.59 +{
   29.60 +    DISABLE_SCATTERGATHER();
   29.61 +    blk_ring->req_prod = req_prod;
   29.62 +    notify_via_evtchn(blkif_evtchn);
   29.63 +}
   29.64 +
   29.65 +
   29.66 +/*
   29.67 + * blkif_update_int/update-vbds_task - handle VBD update events.
   29.68 + *  Schedule a task for keventd to run, which will update the VBDs and perform 
   29.69 + *  the corresponding updates to our view of VBD state.
   29.70 + */
   29.71 +static struct tq_struct update_tq;
   29.72 +static void update_vbds_task(void *unused)
   29.73 +{ 
   29.74 +    xlvbd_update_vbds();
   29.75 +}
   29.76 +
   29.77 +
   29.78 +int blkif_open(struct inode *inode, struct file *filep)
   29.79 +{
   29.80 +    short xldev = inode->i_rdev; 
   29.81 +    struct gendisk *gd = get_gendisk(xldev);
   29.82 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   29.83 +    short minor = MINOR(xldev); 
   29.84 +
   29.85 +    if ( gd->part[minor].nr_sects == 0 )
   29.86 +    { 
   29.87 +        /*
   29.88 +         * Device either doesn't exist, or has zero capacity; we use a few
   29.89 +         * cheesy heuristics to return the relevant error code
   29.90 +         */
   29.91 +        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
   29.92 +             ((minor & (gd->max_p - 1)) != 0) )
   29.93 +        { 
   29.94 +            /*
   29.95 +             * We have a real device, but no such partition, or we just have a
   29.96 +             * partition number so guess this is the problem.
   29.97 +             */
   29.98 +            return -ENXIO;     /* no such device or address */
   29.99 +        }
  29.100 +        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  29.101 +        {
  29.102 +            /* This is a removable device => assume that media is missing. */ 
  29.103 +            return -ENOMEDIUM; /* media not present (this is a guess) */
  29.104 +        } 
  29.105 +        else
  29.106 +        { 
  29.107 +            /* Just go for the general 'no such device' error. */
  29.108 +            return -ENODEV;    /* no such device */
  29.109 +        }
  29.110 +    }
  29.111 +    
  29.112 +    /* Update of usage count is protected by per-device semaphore. */
  29.113 +    disk->usage++;
  29.114 +
  29.115 +    return 0;
  29.116 +}
  29.117 +
  29.118 +
  29.119 +int blkif_release(struct inode *inode, struct file *filep)
  29.120 +{
  29.121 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  29.122 +
  29.123 +    /*
  29.124 +     * When usage drops to zero it may allow more VBD updates to occur.
  29.125 +     * Update of usage count is protected by a per-device semaphore.
  29.126 +     */
  29.127 +    if ( --disk->usage == 0 )
  29.128 +    {
  29.129 +#if 0
  29.130 +        update_tq.routine = update_vbds_task;
  29.131 +        schedule_task(&update_tq);
  29.132 +#endif
  29.133 +    }
  29.134 +
  29.135 +    return 0;
  29.136 +}
  29.137 +
  29.138 +
  29.139 +int blkif_ioctl(struct inode *inode, struct file *filep,
  29.140 +                          unsigned command, unsigned long argument)
  29.141 +{
  29.142 +    kdev_t dev = inode->i_rdev;
  29.143 +    struct hd_geometry *geo = (struct hd_geometry *)argument;
  29.144 +    struct gendisk *gd;     
  29.145 +    struct hd_struct *part; 
  29.146 +    int i;
  29.147 +
  29.148 +    /* NB. No need to check permissions. That is done for us. */
  29.149 +    
  29.150 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  29.151 +                  command, (long) argument, dev); 
  29.152 +  
  29.153 +    gd = get_gendisk(dev);
  29.154 +    part = &gd->part[MINOR(dev)]; 
  29.155 +
  29.156 +    switch ( command )
  29.157 +    {
  29.158 +    case BLKGETSIZE:
  29.159 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  29.160 +        return put_user(part->nr_sects, (unsigned long *) argument);
  29.161 +
  29.162 +    case BLKGETSIZE64:
  29.163 +        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  29.164 +                      (u64)part->nr_sects * 512);
  29.165 +        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  29.166 +
  29.167 +    case BLKRRPART:                               /* re-read partition table */
  29.168 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  29.169 +        return blkif_revalidate(dev);
  29.170 +
  29.171 +    case BLKSSZGET:
  29.172 +        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  29.173 +
  29.174 +    case BLKBSZGET:                                        /* get block size */
  29.175 +        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  29.176 +        break;
  29.177 +
  29.178 +    case BLKBSZSET:                                        /* set block size */
  29.179 +        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  29.180 +        break;
  29.181 +
  29.182 +    case BLKRASET:                                         /* set read-ahead */
  29.183 +        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  29.184 +        break;
  29.185 +
  29.186 +    case BLKRAGET:                                         /* get read-ahead */
  29.187 +        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  29.188 +        break;
  29.189 +
  29.190 +    case HDIO_GETGEO:
  29.191 +        /* note: these values are complete garbage */
  29.192 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  29.193 +        if (!argument) return -EINVAL;
  29.194 +        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  29.195 +        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  29.196 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  29.197 +        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  29.198 +        return 0;
  29.199 +
  29.200 +    case HDIO_GETGEO_BIG: 
  29.201 +        /* note: these values are complete garbage */
  29.202 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  29.203 +        if (!argument) return -EINVAL;
  29.204 +        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  29.205 +        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  29.206 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  29.207 +        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  29.208 +        return 0;
  29.209 +
  29.210 +    case CDROMMULTISESSION:
  29.211 +        DPRINTK("FIXME: support multisession CDs later\n");
  29.212 +        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  29.213 +            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  29.214 +        return 0;
  29.215 +
  29.216 +    case SCSI_IOCTL_GET_BUS_NUMBER:
  29.217 +        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
  29.218 +        return -ENOSYS;
  29.219 +
  29.220 +    default:
  29.221 +        printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
  29.222 +        return -ENOSYS;
  29.223 +    }
  29.224 +    
  29.225 +    return 0;
  29.226 +}
  29.227 +
  29.228 +/* check media change: should probably do something here in some cases :-) */
  29.229 +int blkif_check(kdev_t dev)
  29.230 +{
  29.231 +    DPRINTK("blkif_check\n");
  29.232 +    return 0;
  29.233 +}
  29.234 +
  29.235 +int blkif_revalidate(kdev_t dev)
  29.236 +{
  29.237 +    struct block_device *bd;
  29.238 +    struct gendisk *gd;
  29.239 +    xl_disk_t *disk;
  29.240 +    unsigned long capacity;
  29.241 +    int i, rc = 0;
  29.242 +    
  29.243 +    if ( (bd = bdget(dev)) == NULL )
  29.244 +        return -EINVAL;
  29.245 +
  29.246 +    /*
  29.247 +     * Update of partition info, and check of usage count, is protected
  29.248 +     * by the per-block-device semaphore.
  29.249 +     */
  29.250 +    down(&bd->bd_sem);
  29.251 +
  29.252 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  29.253 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  29.254 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  29.255 +    {
  29.256 +        rc = -EINVAL;
  29.257 +        goto out;
  29.258 +    }
  29.259 +
  29.260 +    if ( disk->usage > 1 )
  29.261 +    {
  29.262 +        rc = -EBUSY;
  29.263 +        goto out;
  29.264 +    }
  29.265 +
  29.266 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  29.267 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  29.268 +    {
  29.269 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  29.270 +        {
  29.271 +            invalidate_device(dev+i, 1);
  29.272 +            gd->part[MINOR(dev+i)].start_sect = 0;
  29.273 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  29.274 +            gd->sizes[MINOR(dev+i)]           = 0;
  29.275 +        }
  29.276 +
  29.277 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  29.278 +    }
  29.279 +
  29.280 + out:
  29.281 +    up(&bd->bd_sem);
  29.282 +    bdput(bd);
  29.283 +    return rc;
  29.284 +}
  29.285 +
  29.286 +
  29.287 +/*
  29.288 + * blkif_queue_request
  29.289 + *
  29.290 + * request block io 
  29.291 + * 
  29.292 + * id: for guest use only.
  29.293 + * operation: BLKIF_OP_{READ,WRITE,PROBE}
  29.294 + * buffer: buffer to read/write into. this should be a
  29.295 + *   virtual address in the guest os.
  29.296 + */
  29.297 +static int blkif_queue_request(unsigned long   id,
  29.298 +                               int             operation,
  29.299 +                               char *          buffer,
  29.300 +                               unsigned long   sector_number,
  29.301 +                               unsigned short  nr_sectors,
  29.302 +                               kdev_t          device)
  29.303 +{
  29.304 +    unsigned long       buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  29.305 +    struct gendisk     *gd;
  29.306 +    blkif_request_t    *req;
  29.307 +    struct buffer_head *bh;
  29.308 +
  29.309 +    if ( unlikely(nr_sectors >= (1<<9)) )
  29.310 +        BUG();
  29.311 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  29.312 +        BUG();
  29.313 +
  29.314 +    if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
  29.315 +        return 1;
  29.316 +
  29.317 +    switch ( operation )
  29.318 +    {
  29.319 +
  29.320 +    case BLKIF_OP_READ:
  29.321 +    case BLKIF_OP_WRITE:
  29.322 +        gd = get_gendisk(device); 
  29.323 +
  29.324 +        /*
  29.325 +         * Update the sector_number we'll pass down as appropriate; note that
  29.326 +         * we could sanity check that resulting sector will be in this
  29.327 +         * partition, but this will happen in driver backend anyhow.
  29.328 +         */
  29.329 +        sector_number += gd->part[MINOR(device)].start_sect;
  29.330 +
  29.331 +        /*
  29.332 +         * If this unit doesn't consist of virtual partitions then we clear 
  29.333 +         * the partn bits from the device number.
  29.334 +         */
  29.335 +        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  29.336 +               GENHD_FL_VIRT_PARTNS) )
  29.337 +            device &= ~(gd->max_p - 1);
  29.338 +
  29.339 +        if ( (sg_operation == operation) &&
  29.340 +             (sg_dev == device) &&
  29.341 +             (sg_next_sect == sector_number) )
  29.342 +        {
  29.343 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  29.344 +            bh = (struct buffer_head *)id;
  29.345 +            bh->b_reqnext = (struct buffer_head *)req->id;
  29.346 +            req->id = id;
  29.347 +            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  29.348 +            if ( ++req->nr_segments < MAX_BLK_SEGS )
  29.349 +                sg_next_sect += nr_sectors;
  29.350 +            else
  29.351 +                DISABLE_SCATTERGATHER();
  29.352 +            return 0;
  29.353 +        }
  29.354 +        else if ( RING_PLUGGED )
  29.355 +        {
  29.356 +            return 1;
  29.357 +        }
  29.358 +        else
  29.359 +        {
  29.360 +            sg_operation = operation;
  29.361 +            sg_dev       = device;
  29.362 +            sg_next_sect = sector_number + nr_sectors;
  29.363 +        }
  29.364 +        break;
  29.365 +
  29.366 +    default:
  29.367 +        panic("unknown op %d\n", operation);
  29.368 +    }
  29.369 +
  29.370 +    /* Fill out a communications ring structure. */
  29.371 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  29.372 +    req->id            = id;
  29.373 +    req->operation     = operation;
  29.374 +    req->sector_number = (blkif_sector_t)sector_number;
  29.375 +    req->device        = device; 
  29.376 +    req->nr_segments   = 1;
  29.377 +    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  29.378 +    req_prod++;
  29.379 +
  29.380 +    return 0;
  29.381 +}
  29.382 +
  29.383 +
  29.384 +/*
  29.385 + * do_blkif_request
  29.386 + *  read a block; request is in a request queue
  29.387 + */
  29.388 +void do_blkif_request(request_queue_t *rq)
  29.389 +{
  29.390 +    struct request *req;
  29.391 +    struct buffer_head *bh, *next_bh;
  29.392 +    int rw, nsect, full, queued = 0;
  29.393 +
  29.394 +    DPRINTK("Entered do_blkif_request\n"); 
  29.395 +
  29.396 +    while ( !rq->plugged && !list_empty(&rq->queue_head))
  29.397 +    {
  29.398 +        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  29.399 +            goto out;
  29.400 +  
  29.401 +        DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  29.402 +                req, req->cmd, req->sector,
  29.403 +                req->current_nr_sectors, req->nr_sectors, req->bh);
  29.404 +
  29.405 +        rw = req->cmd;
  29.406 +        if ( rw == READA )
  29.407 +            rw = READ;
  29.408 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
  29.409 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  29.410 +
  29.411 +        req->errors = 0;
  29.412 +
  29.413 +        bh = req->bh;
  29.414 +        while ( bh != NULL )
  29.415 +        {
  29.416 +            next_bh = bh->b_reqnext;
  29.417 +            bh->b_reqnext = NULL;
  29.418 +
  29.419 +            full = blkif_queue_request(
  29.420 +                (unsigned long)bh,
  29.421 +                (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 
  29.422 +                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  29.423 +
  29.424 +            if ( full )
  29.425 +            { 
  29.426 +                bh->b_reqnext = next_bh;
  29.427 +                pending_queues[nr_pending++] = rq;
  29.428 +                if ( unlikely(nr_pending >= MAX_PENDING) )
  29.429 +                    BUG();
  29.430 +                goto out; 
  29.431 +            }
  29.432 +
  29.433 +            queued++;
  29.434 +
  29.435 +            /* Dequeue the buffer head from the request. */
  29.436 +            nsect = bh->b_size >> 9;
  29.437 +            bh = req->bh = next_bh;
  29.438 +            
  29.439 +            if ( bh != NULL )
  29.440 +            {
  29.441 +                /* There's another buffer head to do. Update the request. */
  29.442 +                req->hard_sector += nsect;
  29.443 +                req->hard_nr_sectors -= nsect;
  29.444 +                req->sector = req->hard_sector;
  29.445 +                req->nr_sectors = req->hard_nr_sectors;
  29.446 +                req->current_nr_sectors = bh->b_size >> 9;
  29.447 +                req->buffer = bh->b_data;
  29.448 +            }
  29.449 +            else
  29.450 +            {
  29.451 +                /* That was the last buffer head. Finalise the request. */
  29.452 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  29.453 +                    BUG();
  29.454 +                blkdev_dequeue_request(req);
  29.455 +                end_that_request_last(req);
  29.456 +            }
  29.457 +        }
  29.458 +    }
  29.459 +
  29.460 + out:
  29.461 +    if ( queued != 0 )
  29.462 +        flush_requests();
  29.463 +}
  29.464 +
  29.465 +
  29.466 +static void kick_pending_request_queues(void)
  29.467 +{
  29.468 +    /* We kick pending request queues if the ring is reasonably empty. */
  29.469 +    if ( (nr_pending != 0) && 
  29.470 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  29.471 +    {
  29.472 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  29.473 +        while ( (nr_pending != 0) && !RING_PLUGGED )
  29.474 +            do_blkif_request(pending_queues[--nr_pending]);
  29.475 +    }
  29.476 +}
  29.477 +
  29.478 +
  29.479 +static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
  29.480 +{
  29.481 +    BLK_RING_IDX i; 
  29.482 +    unsigned long flags; 
  29.483 +    struct buffer_head *bh, *next_bh;
  29.484 +    
  29.485 +    if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) )
  29.486 +        return;
  29.487 +    
  29.488 +    spin_lock_irqsave(&io_request_lock, flags);     
  29.489 +
  29.490 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  29.491 +    {
  29.492 +        blkif_response_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  29.493 +        switch ( bret->operation )
  29.494 +        {
  29.495 +        case BLKIF_OP_READ:
  29.496 +        case BLKIF_OP_WRITE:
  29.497 +            if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
  29.498 +                DPRINTK("Bad return from blkdev data request: %lx\n",
  29.499 +                        bret->status);
  29.500 +            for ( bh = (struct buffer_head *)bret->id; 
  29.501 +                  bh != NULL; 
  29.502 +                  bh = next_bh )
  29.503 +            {
  29.504 +                next_bh = bh->b_reqnext;
  29.505 +                bh->b_reqnext = NULL;
  29.506 +                bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
  29.507 +            }
  29.508 +            break;
  29.509 +        case BLKIF_OP_PROBE:
  29.510 +            memcpy(&blkif_control_rsp, bret, sizeof(*bret));
  29.511 +            blkif_control_rsp_valid = 1;
  29.512 +            break;
  29.513 +        default:
  29.514 +            BUG();
  29.515 +        }
  29.516 +    }
  29.517 +    
  29.518 +    resp_cons = i;
  29.519 +
  29.520 +    kick_pending_request_queues();
  29.521 +
  29.522 +    spin_unlock_irqrestore(&io_request_lock, flags);
  29.523 +}
  29.524 +
  29.525 +
  29.526 +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
  29.527 +{
  29.528 +    unsigned long flags;
  29.529 +
  29.530 + retry:
  29.531 +    while ( (req_prod - resp_cons) == BLK_RING_SIZE )
  29.532 +    {
  29.533 +        set_current_state(TASK_INTERRUPTIBLE);
  29.534 +        schedule_timeout(1);
  29.535 +    }
  29.536 +
  29.537 +    spin_lock_irqsave(&io_request_lock, flags);
  29.538 +    if ( (req_prod - resp_cons) == BLK_RING_SIZE )
  29.539 +    {
  29.540 +        spin_unlock_irqrestore(&io_request_lock, flags);
  29.541 +        goto retry;
  29.542 +    }
  29.543 +
  29.544 +    DISABLE_SCATTERGATHER();
  29.545 +    memcpy(&blk_ring->ring[MASK_BLK_IDX(req_prod)].req, req, sizeof(*req));
  29.546 +    req_prod++;
  29.547 +    flush_requests();
  29.548 +
  29.549 +    spin_unlock_irqrestore(&io_request_lock, flags);
  29.550 +
  29.551 +    while ( !blkif_control_rsp_valid )
  29.552 +    {
  29.553 +        set_current_state(TASK_INTERRUPTIBLE);
  29.554 +        schedule_timeout(1);
  29.555 +    }
  29.556 +
  29.557 +    memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
  29.558 +    blkif_control_rsp_valid = 0;
  29.559 +}
  29.560 +
  29.561 +
  29.562 +static void blkif_bringup_phase1(void *unused)
  29.563 +{
  29.564 +    ctrl_msg_t                   cmsg;
  29.565 +    blkif_fe_interface_connect_t up;
  29.566 +
  29.567 +    /* Move from CLOSED to DISCONNECTED state. */
  29.568 +    blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
  29.569 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  29.570 +    blkif_state  = BLKIF_STATE_DISCONNECTED;
  29.571 +
  29.572 +    /* Construct an interface-CONNECT message for the domain controller. */
  29.573 +    cmsg.type      = CMSG_BLKIF_FE;
  29.574 +    cmsg.subtype   = CMSG_BLKIF_FE_INTERFACE_CONNECT;
  29.575 +    cmsg.length    = sizeof(blkif_fe_interface_connect_t);
  29.576 +    up.handle      = 0;
  29.577 +    up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT;
  29.578 +    memcpy(cmsg.msg, &up, sizeof(up));
  29.579 +
  29.580 +    /* Tell the controller to bring up the interface. */
  29.581 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  29.582 +}
  29.583 +
  29.584 +static void blkif_bringup_phase2(void *unused)
  29.585 +{
  29.586 +    blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
  29.587 +    (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL);
  29.588 +
  29.589 +    /* Probe for discs that are attached to the interface. */
  29.590 +    xlvbd_init();
  29.591 +
  29.592 +    blkif_state = BLKIF_STATE_CONNECTED;
  29.593 +
  29.594 +    /* Kick pending requests. */
  29.595 +    spin_lock_irq(&io_request_lock);
  29.596 +    kick_pending_request_queues();
  29.597 +    spin_unlock_irq(&io_request_lock);
  29.598 +}
  29.599 +
  29.600 +static void blkif_status_change(blkif_fe_interface_status_changed_t *status)
  29.601 +{
  29.602 +    if ( status->handle != 0 )
  29.603 +    {
  29.604 +        printk(KERN_WARNING "Status change on unsupported blkif %d\n",
  29.605 +               status->handle);
  29.606 +        return;
  29.607 +    }
  29.608 +
  29.609 +    switch ( status->status )
  29.610 +    {
  29.611 +    case BLKIF_INTERFACE_STATUS_DESTROYED:
  29.612 +        printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n",
  29.613 +               blkif_state);
  29.614 +        break;
  29.615 +
  29.616 +    case BLKIF_INTERFACE_STATUS_DISCONNECTED:
  29.617 +        if ( blkif_state != BLKIF_STATE_CLOSED )
  29.618 +        {
  29.619 +            printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message"
  29.620 +                   " in state %d\n", blkif_state);
  29.621 +            break;
  29.622 +        }
  29.623 +        blkif_statechange_tq.routine = blkif_bringup_phase1;
  29.624 +        schedule_task(&blkif_statechange_tq);
  29.625 +        break;
  29.626 +
  29.627 +    case BLKIF_INTERFACE_STATUS_CONNECTED:
  29.628 +        if ( blkif_state == BLKIF_STATE_CLOSED )
  29.629 +        {
  29.630 +            printk(KERN_WARNING "Unexpected blkif-CONNECTED message"
  29.631 +                   " in state %d\n", blkif_state);
  29.632 +            break;
  29.633 +        }
  29.634 +        blkif_evtchn = status->evtchn;
  29.635 +        blkif_statechange_tq.routine = blkif_bringup_phase2;
  29.636 +        schedule_task(&blkif_statechange_tq);
  29.637 +        break;
  29.638 +
  29.639 +    default:
  29.640 +        printk(KERN_WARNING "Status change to unknown value %d\n", 
  29.641 +               status->status);
  29.642 +        break;
  29.643 +    }
  29.644 +}
  29.645 +
  29.646 +
  29.647 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  29.648 +{
  29.649 +    switch ( msg->subtype )
  29.650 +    {
  29.651 +    case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED:
  29.652 +        if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) )
  29.653 +            goto parse_error;
  29.654 +        blkif_status_change((blkif_fe_interface_status_changed_t *)
  29.655 +                            &msg->msg[0]);
  29.656 +        break;        
  29.657 +#if 0
  29.658 +    case CMSG_BLKIF_FE_VBD_STATUS_CHANGED:
  29.659 +        update_tq.routine = update_vbds_task;
  29.660 +        schedule_task(&update_tq);
  29.661 +        break;
  29.662 +#endif
  29.663 +    default:
  29.664 +        goto parse_error;
  29.665 +    }
  29.666 +
  29.667 +    ctrl_if_send_response(msg);
  29.668 +    return;
  29.669 +
  29.670 + parse_error:
  29.671 +    msg->length = 0;
  29.672 +    ctrl_if_send_response(msg);
  29.673 +}
  29.674 +
  29.675 +
  29.676 +int __init xlblk_init(void)
  29.677 +{
  29.678 +    ctrl_msg_t                       cmsg;
  29.679 +    blkif_fe_driver_status_changed_t st;
  29.680 +
  29.681 +    (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx);
  29.682 +
  29.683 +    /* Send a driver-UP notification to the domain controller. */
  29.684 +    cmsg.type      = CMSG_BLKIF_FE;
  29.685 +    cmsg.subtype   = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED;
  29.686 +    cmsg.length    = sizeof(blkif_fe_driver_status_changed_t);
  29.687 +    st.status      = BLKIF_DRIVER_STATUS_UP;
  29.688 +    memcpy(cmsg.msg, &st, sizeof(st));
  29.689 +    ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
  29.690 +
  29.691 +    /*
  29.692 +     * We should read 'nr_interfaces' from response message and wait
  29.693 +     * for notifications before proceeding. For now we assume that we
  29.694 +     * will be notified of exactly one interface.
  29.695 +     */
  29.696 +    while ( blkif_state != BLKIF_STATE_CONNECTED )
  29.697 +    {
  29.698 +        set_current_state(TASK_INTERRUPTIBLE);
  29.699 +        schedule_timeout(1);
  29.700 +    }
  29.701 +
  29.702 +    return 0;
  29.703 +}
  29.704 +
  29.705 +
  29.706 +static void __exit xlblk_cleanup(void)
  29.707 +{
  29.708 +    /* XXX FIXME */
  29.709 +    BUG();
  29.710 +}
  29.711 +
  29.712 +
  29.713 +#ifdef MODULE
  29.714 +module_init(xlblk_init);
  29.715 +module_exit(xlblk_cleanup);
  29.716 +#endif
  29.717 +
  29.718 +
  29.719 +void blkdev_suspend(void)
  29.720 +{
  29.721 +    /* XXX FIXME */
  29.722 +    BUG();
  29.723 +}
  29.724 +
  29.725 +
  29.726 +void blkdev_resume(void)
  29.727 +{
  29.728 +    /* XXX FIXME */
  29.729 +    BUG();
  29.730 +}
    30.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Sun May 02 00:27:58 2004 +0000
    30.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c	Sun May 02 00:52:45 2004 +0000
    30.3 @@ -1,13 +1,13 @@
    30.4  /******************************************************************************
    30.5 - * vbd.c
    30.6 + * arch/xen/drivers/blkif/frontend/vbd.c
    30.7   * 
    30.8 - * Xenolinux virtual block-device driver (xvd).
    30.9 + * Xenolinux virtual block-device driver.
   30.10   * 
   30.11   * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   30.12   * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   30.13   */
   30.14  
   30.15 -#include "block.h"
   30.16 +#include "common.h"
   30.17  #include <linux/blk.h>
   30.18  
   30.19  /*
   30.20 @@ -43,54 +43,59 @@ static int xlvbd_blksize_size[256];
   30.21  static int xlvbd_hardsect_size[256];
   30.22  static int xlvbd_max_sectors[256];
   30.23  
   30.24 -/* Information from Xen about our VBDs. */
   30.25 +/* Information about our VBDs. */
   30.26  #define MAX_VBDS 64
   30.27  static int nr_vbds;
   30.28 -static xen_disk_t *vbd_info;
   30.29 +static vdisk_t *vbd_info;
   30.30  
   30.31  static struct block_device_operations xlvbd_block_fops = 
   30.32  {
   30.33 -    open:               xen_block_open,
   30.34 -    release:            xen_block_release,
   30.35 -    ioctl:              xen_block_ioctl,
   30.36 -    check_media_change: xen_block_check,
   30.37 -    revalidate:         xen_block_revalidate,
   30.38 +    open:               blkif_open,
   30.39 +    release:            blkif_release,
   30.40 +    ioctl:              blkif_ioctl,
   30.41 +    check_media_change: blkif_check,
   30.42 +    revalidate:         blkif_revalidate,
   30.43  };
   30.44  
   30.45 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   30.46 +static int xlvbd_get_vbd_info(vdisk_t *disk_info)
   30.47  {
   30.48 -    int error;
   30.49 -    block_io_op_t op; 
   30.50 +    vdisk_t         *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
   30.51 +    blkif_request_t  req;
   30.52 +    blkif_response_t rsp;
   30.53 +    int              nr;
   30.54  
   30.55 -    /* Probe for disk information. */
   30.56 -    memset(&op, 0, sizeof(op)); 
   30.57 -    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   30.58 -    op.u.probe_params.domain    = 0; 
   30.59 -    op.u.probe_params.xdi.max   = MAX_VBDS;
   30.60 -    op.u.probe_params.xdi.disks = disk_info;
   30.61 -    op.u.probe_params.xdi.count = 0;
   30.62 +    memset(&req, 0, sizeof(req));
   30.63 +    req.operation   = BLKIF_OP_PROBE;
   30.64 +    req.nr_segments = 1;
   30.65 +    req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512);
   30.66  
   30.67 -    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   30.68 +    blkif_control_send(&req, &rsp);
   30.69 +
   30.70 +    if ( rsp.status <= 0 )
   30.71      {
   30.72 -        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   30.73 +        printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
   30.74          return -1;
   30.75      }
   30.76  
   30.77 -    return op.u.probe_params.xdi.count;
   30.78 +    if ( (nr = rsp.status) > MAX_VBDS )
   30.79 +         nr = MAX_VBDS;
   30.80 +    memcpy(disk_info, buf, nr * sizeof(vdisk_t));
   30.81 +
   30.82 +    return nr;
   30.83  }
   30.84  
   30.85  /*
   30.86   * xlvbd_init_device - initialise a VBD device
   30.87 - * @disk:              a xen_disk_t describing the VBD
   30.88 + * @disk:              a vdisk_t describing the VBD
   30.89   *
   30.90 - * Takes a xen_disk_t * that describes a VBD the domain has access to.
   30.91 + * Takes a vdisk_t * that describes a VBD the domain has access to.
   30.92   * Performs appropriate initialisation and registration of the device.
   30.93   *
   30.94   * Care needs to be taken when making re-entrant calls to ensure that
   30.95   * corruption does not occur.  Also, devices that are in use should not have
   30.96   * their details updated.  This is the caller's responsibility.
   30.97   */
   30.98 -static int xlvbd_init_device(xen_disk_t *xd)
   30.99 +static int xlvbd_init_device(vdisk_t *xd)
  30.100  {
  30.101      int device = xd->device;
  30.102      int major  = MAJOR(device); 
  30.103 @@ -181,11 +186,11 @@ static int xlvbd_init_device(xen_disk_t 
  30.104              read_ahead[major]    = 8;
  30.105          }
  30.106  
  30.107 -        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  30.108 +        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request);
  30.109  
  30.110          /*
  30.111           * Turn off barking 'headactive' mode. We dequeue buffer heads as
  30.112 -         * soon as we pass them down to Xen.
  30.113 +         * soon as we pass them to the back-end driver.
  30.114           */
  30.115          blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  30.116  
  30.117 @@ -431,12 +436,12 @@ static int xlvbd_remove_device(int devic
  30.118  void xlvbd_update_vbds(void)
  30.119  {
  30.120      int i, j, k, old_nr, new_nr;
  30.121 -    xen_disk_t *old_info, *new_info, *merged_info;
  30.122 +    vdisk_t *old_info, *new_info, *merged_info;
  30.123  
  30.124      old_info = vbd_info;
  30.125      old_nr   = nr_vbds;
  30.126  
  30.127 -    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  30.128 +    new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  30.129      if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  30.130      {
  30.131          kfree(new_info);
  30.132 @@ -448,7 +453,7 @@ void xlvbd_update_vbds(void)
  30.133       * old list and new list do not overlap at all, and we cannot yet destroy
  30.134       * VBDs in the old list because the usage counts are busy.
  30.135       */
  30.136 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  30.137 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
  30.138  
  30.139      /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  30.140      i = j = k = 0;
  30.141 @@ -458,13 +463,13 @@ void xlvbd_update_vbds(void)
  30.142          if ( old_info[i].device < new_info[j].device )
  30.143          {
  30.144              if ( xlvbd_remove_device(old_info[i].device) != 0 )
  30.145 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.146 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  30.147              i++;
  30.148          }
  30.149          else if ( old_info[i].device > new_info[j].device )
  30.150          {
  30.151              if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.152 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.153 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  30.154              j++;
  30.155          }
  30.156          else
  30.157 @@ -472,9 +477,9 @@ void xlvbd_update_vbds(void)
  30.158              if ( ((old_info[i].capacity == new_info[j].capacity) &&
  30.159                    (old_info[i].info == new_info[j].info)) ||
  30.160                   (xlvbd_remove_device(old_info[i].device) != 0) )
  30.161 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.162 +                memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  30.163              else if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.164 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.165 +                memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  30.166              i++; j++;
  30.167          }
  30.168      }
  30.169 @@ -482,13 +487,13 @@ void xlvbd_update_vbds(void)
  30.170      for ( ; i < old_nr; i++ )
  30.171      {
  30.172          if ( xlvbd_remove_device(old_info[i].device) != 0 )
  30.173 -            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  30.174 +            memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
  30.175      }
  30.176  
  30.177      for ( ; j < new_nr; j++ )
  30.178      {
  30.179          if ( xlvbd_init_device(&new_info[j]) == 0 )
  30.180 -            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  30.181 +            memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
  30.182      }
  30.183  
  30.184      vbd_info = merged_info;
  30.185 @@ -500,14 +505,14 @@ void xlvbd_update_vbds(void)
  30.186  
  30.187  
  30.188  /*
  30.189 - * Set up all the linux device goop for the virtual block devices (vbd's) that 
  30.190 - * xen tells us about. Note that although from xen's pov VBDs are addressed 
  30.191 - * simply an opaque 16-bit device number, the domain creation tools 
  30.192 + * Set up all the linux device goop for the virtual block devices (vbd's) that
  30.193 + * we know about. Note that although from the backend driver's p.o.v. VBDs are
  30.194 + * addressed simply an opaque 16-bit device number, the domain creation tools 
  30.195   * conventionally allocate these numbers to correspond to those used by 'real' 
  30.196   * linux -- this is just for convenience as it means e.g. that the same 
  30.197 - * /etc/fstab can be used when booting with or without xen.
  30.198 + * /etc/fstab can be used when booting with or without Xen.
  30.199   */
  30.200 -int __init xlvbd_init(void)
  30.201 +int xlvbd_init(void)
  30.202  {
  30.203      int i;
  30.204      
  30.205 @@ -537,7 +542,7 @@ int __init xlvbd_init(void)
  30.206          xlvbd_max_sectors[i]   = 128;
  30.207      }
  30.208  
  30.209 -    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  30.210 +    vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
  30.211      nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  30.212  
  30.213      if ( nr_vbds < 0 )
  30.214 @@ -554,8 +559,3 @@ int __init xlvbd_init(void)
  30.215  
  30.216      return 0;
  30.217  }
  30.218 -
  30.219 -
  30.220 -#ifdef MODULE
  30.221 -module_init(xlvbd_init);
  30.222 -#endif
    31.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Sun May 02 00:27:58 2004 +0000
    31.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c	Sun May 02 00:52:45 2004 +0000
    31.3 @@ -36,7 +36,7 @@ static struct proc_dir_entry *privcmd_in
    31.4  static int privcmd_ioctl(struct inode *inode, struct file *file,
    31.5                           unsigned int cmd, unsigned long data)
    31.6  {
    31.7 -    int ret = 0;
    31.8 +    int ret = -ENOSYS;
    31.9  
   31.10      switch ( cmd )
   31.11      {
   31.12 @@ -108,6 +108,13 @@ static int privcmd_ioctl(struct inode *i
   31.13      }
   31.14      break;
   31.15  
   31.16 +    case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN:
   31.17 +    {
   31.18 +        extern int initdom_ctrlif_domcontroller_port;
   31.19 +        ret = initdom_ctrlif_domcontroller_port;
   31.20 +    }
   31.21 +    break;
   31.22 +    
   31.23      default:
   31.24          ret = -EINVAL;
   31.25      	break;
   31.26 @@ -140,7 +147,7 @@ static int __init init_module(void)
   31.27      {
   31.28          privcmd_intf->owner      = THIS_MODULE;
   31.29          privcmd_intf->nlink      = 1;
   31.30 -	privcmd_intf->proc_fops  = &privcmd_file_ops;
   31.31 +        privcmd_intf->proc_fops  = &privcmd_file_ops;
   31.32      }
   31.33  
   31.34      return 0;
    32.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Sun May 02 00:27:58 2004 +0000
    32.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c	Sun May 02 00:52:45 2004 +0000
    32.3 @@ -17,6 +17,13 @@
    32.4  #include <asm/ctrl_if.h>
    32.5  #include <asm/evtchn.h>
    32.6  
    32.7 +/*
    32.8 + * Only used by initial domain which must create its own control-interface
    32.9 + * event channel. This value is picked up by the user-space domain controller
   32.10 + * via an ioctl.
   32.11 + */
   32.12 +int initdom_ctrlif_domcontroller_port = -1;
   32.13 +
   32.14  static int        ctrl_if_evtchn;
   32.15  static int        ctrl_if_irq;
   32.16  static spinlock_t ctrl_if_lock;
   32.17 @@ -276,9 +283,6 @@ void ctrl_if_unregister_receiver(u8 type
   32.18  
   32.19  void ctrl_if_suspend(void)
   32.20  {
   32.21 -    if ( start_info.flags & SIF_INITDOMAIN )
   32.22 -        return;
   32.23 -
   32.24      free_irq(ctrl_if_irq, NULL);
   32.25      unbind_evtchn_from_irq(ctrl_if_evtchn);
   32.26  }
   32.27 @@ -286,7 +290,21 @@ void ctrl_if_suspend(void)
   32.28  void ctrl_if_resume(void)
   32.29  {
   32.30      if ( start_info.flags & SIF_INITDOMAIN )
   32.31 -        return;
   32.32 +    {
   32.33 +        /*
   32.34 +         * The initial domain must create its own domain-controller link.
   32.35 +         * The controller is probably not running at this point, but will
   32.36 +         * pick up its end of the event channel from 
   32.37 +         */
   32.38 +        evtchn_op_t op;
   32.39 +        op.cmd = EVTCHNOP_bind_interdomain;
   32.40 +        op.u.bind_interdomain.dom1 = DOMID_SELF;
   32.41 +        op.u.bind_interdomain.dom2 = DOMID_SELF;
   32.42 +        if ( HYPERVISOR_event_channel_op(&op) != 0 )
   32.43 +            BUG();
   32.44 +        start_info.domain_controller_evtchn = op.u.bind_interdomain.port1;
   32.45 +        initdom_ctrlif_domcontroller_port   = op.u.bind_interdomain.port2;
   32.46 +    }
   32.47  
   32.48      ctrl_if_tx_resp_cons = 0;
   32.49      ctrl_if_rx_req_cons  = 0;
    33.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Sun May 02 00:27:58 2004 +0000
    33.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c	Sun May 02 00:52:45 2004 +0000
    33.3 @@ -5,7 +5,7 @@
    33.4   *
    33.5   * (C) Copyright 1995 1996 Linus Torvalds
    33.6   *
    33.7 - * Modifications for Xenolinux (c) 2003 Keir Fraser
    33.8 + * Modifications for Xenolinux (c) 2003-2004 Keir Fraser
    33.9   */
   33.10  
   33.11  #include <linux/slab.h>
   33.12 @@ -28,21 +28,26 @@
   33.13    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
   33.14  
   33.15  static inline int direct_remap_area_pte(pte_t *pte, 
   33.16 -                                         unsigned long address, 
   33.17 -                                         unsigned long size,
   33.18 -                                         unsigned long machine_addr, 
   33.19 -                                         pgprot_t prot,
   33.20 -                                         domid_t  domid)
   33.21 +                                        unsigned long address, 
   33.22 +                                        unsigned long size,
   33.23 +                                        unsigned long machine_addr, 
   33.24 +                                        pgprot_t prot,
   33.25 +                                        domid_t  domid)
   33.26  {
   33.27      unsigned long end;
   33.28 -
   33.29 -    mmu_update_t *u, *v;
   33.30 -    u = v = vmalloc(3*PAGE_SIZE); /* plenty */
   33.31 +#define MAX_DIRECTMAP_MMU_QUEUE 64
   33.32 +    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v;
   33.33  
   33.34 -    if (!u) 
   33.35 -	return -ENOMEM;
   33.36 +    address &= ~PMD_MASK;
   33.37 +    end = address + size;
   33.38 +    if (end > PMD_SIZE)
   33.39 +        end = PMD_SIZE;
   33.40 +    if (address >= end)
   33.41 +        BUG();
   33.42  
   33.43 + reset_buffer:
   33.44      /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */
   33.45 +    v = &u[0];
   33.46      if ( domid != 0 )
   33.47      {
   33.48          v[0].val  = (unsigned long)(domid<<16) & ~0xFFFFUL;
   33.49 @@ -56,12 +61,6 @@ static inline int direct_remap_area_pte(
   33.50          v += 2;
   33.51      }
   33.52  
   33.53 -    address &= ~PMD_MASK;
   33.54 -    end = address + size;
   33.55 -    if (end > PMD_SIZE)
   33.56 -        end = PMD_SIZE;
   33.57 -    if (address >= end)
   33.58 -        BUG();
   33.59      do {
   33.60  #if 0  /* thanks to new ioctl mmaping interface this is no longer a bug */
   33.61          if (!pte_none(*pte)) {
   33.62 @@ -71,7 +70,12 @@ static inline int direct_remap_area_pte(
   33.63  #endif
   33.64          v->ptr = virt_to_machine(pte);
   33.65          v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO;
   33.66 -        v++;
   33.67 +        if ( ++v == MAX_DIRECTMAP_MMU_QUEUE )
   33.68 +        {
   33.69 +            if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 )
   33.70 +                return -EFAULT;
   33.71 +            goto reset_buffer;
   33.72 +        }
   33.73          address += PAGE_SIZE;
   33.74          machine_addr += PAGE_SIZE;
   33.75          pte++;
   33.76 @@ -84,7 +88,6 @@ static inline int direct_remap_area_pte(
   33.77  	return -EINVAL;
   33.78      }
   33.79  
   33.80 -    vfree(u);
   33.81      return 0;
   33.82  }
   33.83  
   33.84 @@ -96,8 +99,8 @@ static inline int direct_remap_area_pmd(
   33.85                                          pgprot_t prot,
   33.86                                          domid_t  domid)
   33.87  {
   33.88 +    int error = 0;
   33.89      unsigned long end;
   33.90 -    int rc;
   33.91  
   33.92      address &= ~PGDIR_MASK;
   33.93      end = address + size;
   33.94 @@ -111,14 +114,14 @@ static inline int direct_remap_area_pmd(
   33.95          if (!pte)
   33.96              return -ENOMEM;
   33.97  
   33.98 -        if ( rc = direct_remap_area_pte(pte, address, end - address, 
   33.99 -                              address + machine_addr, prot, domid) )
  33.100 -	    return rc;
  33.101 -
  33.102 +        error = direct_remap_area_pte(pte, address, end - address, 
  33.103 +                                      address + machine_addr, prot, domid);
  33.104 +        if ( error )
  33.105 +            break;
  33.106          address = (address + PMD_SIZE) & PMD_MASK;
  33.107          pmd++;
  33.108      } while (address && (address < end));
  33.109 -    return 0;
  33.110 +    return error;
  33.111  }
  33.112   
  33.113  int direct_remap_area_pages(struct mm_struct *mm,
    34.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Sun May 02 00:27:58 2004 +0000
    34.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h	Sun May 02 00:52:45 2004 +0000
    34.3 @@ -52,7 +52,7 @@ int ctrl_if_send_message_noblock(
    34.4   *     function returns.
    34.5   *  2. If @hnd is NULL then no callback is executed.
    34.6   */
    34.7 -int ctrl_if_send_message(
    34.8 +int ctrl_if_send_message_block(
    34.9      ctrl_msg_t *msg, 
   34.10      ctrl_msg_handler_t hnd, 
   34.11      unsigned long id, 
    35.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h	Sun May 02 00:27:58 2004 +0000
    35.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h	Sun May 02 00:52:45 2004 +0000
    35.3 @@ -159,13 +159,47 @@ extern void iounmap(void *addr);
    35.4  extern void *bt_ioremap(unsigned long offset, unsigned long size);
    35.5  extern void bt_iounmap(void *addr, unsigned long size);
    35.6  
    35.7 +#ifdef CONFIG_XEN_PHYSDEV_ACCESS
    35.8 +
    35.9 +#ifdef CONFIG_HIGHMEM
   35.10 +#error "Highmem is not yet compatible with physical device access"
   35.11 +#endif
   35.12 +
   35.13  /*
   35.14 - * IO bus memory addresses are also 1:1 with the physical address
   35.15 + * The bus translation macros need special care if we are executing device
   35.16 + * accesses to/from other domains' memory. In these cases the virtual address
   35.17 + * is actually a temporary mapping in the 'vmalloc' space. The physical
   35.18 + * address will therefore be >max_low_pfn, and will not have a valid entry
   35.19 + * in the phys_to_mach mapping table.
   35.20   */
   35.21 +static inline unsigned long phys_to_bus(unsigned long phys)
   35.22 +{
   35.23 +    extern unsigned long max_pfn;
   35.24 +    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
   35.25 +    void *addr;
   35.26 +    unsigned long bus;
   35.27 +    if ( (phys >> PAGE_SHIFT) < max_pfn )
   35.28 +        return phys_to_machine(phys);
   35.29 +    addr = phys_to_virt(phys);
   35.30 +    pgd = pgd_offset_k(   (unsigned long)addr);
   35.31 +    pmd = pmd_offset(pgd, (unsigned long)addr);
   35.32 +    pte = pte_offset(pmd, (unsigned long)addr);
   35.33 +    bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK);
   35.34 +    return bus;
   35.35 +}
   35.36 +
   35.37 +#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x))
   35.38 +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
   35.39 +#define page_to_bus(_x) phys_to_bus(page_to_phys(_x))
   35.40 +
   35.41 +#else
   35.42 +
   35.43  #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x))
   35.44  #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x))
   35.45  #define page_to_bus(_x) phys_to_machine(page_to_phys(_x))
   35.46  
   35.47 +#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
   35.48 +
   35.49  /*
   35.50   * readX/writeX() are used to access memory mapped devices. On some
   35.51   * architectures the memory mapped IO stuff needs to be accessed
    36.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h	Sun May 02 00:27:58 2004 +0000
    36.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h	Sun May 02 00:52:45 2004 +0000
    36.3 @@ -47,6 +47,11 @@ static inline pmd_t * pmd_offset(pgd_t *
    36.4  	return (pmd_t *) dir;
    36.5  }
    36.6  
    36.7 +#define pte_same(a, b)		((a).pte_low == (b).pte_low)
    36.8 +#define pte_page(x)		(mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
    36.9 +#define pte_none(x)		(!(x).pte_low)
   36.10 +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
   36.11 +
   36.12  /*
   36.13   * A note on implementation of this atomic 'get-and-clear' operation.
   36.14   * This is actually very simple because XenoLinux can only run on a single
   36.15 @@ -59,13 +64,9 @@ static inline pmd_t * pmd_offset(pgd_t *
   36.16  static inline pte_t ptep_get_and_clear(pte_t *xp)
   36.17  {
   36.18      pte_t pte = *xp;
   36.19 -    queue_l1_entry_update(xp, 0);
   36.20 +    if ( !pte_none(pte) )
   36.21 +        queue_l1_entry_update(xp, 0);
   36.22      return pte;
   36.23  }
   36.24  
   36.25 -#define pte_same(a, b)		((a).pte_low == (b).pte_low)
   36.26 -#define pte_page(x)		(mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT))))
   36.27 -#define pte_none(x)		(!(x).pte_low)
   36.28 -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot))
   36.29 -
   36.30  #endif /* _I386_PGTABLE_2LEVEL_H */
    37.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Sun May 02 00:27:58 2004 +0000
    37.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h	Sun May 02 00:52:45 2004 +0000
    37.3 @@ -32,10 +32,22 @@ typedef struct privcmd_blkmsg
    37.4      int           buf_size;
    37.5  } privcmd_blkmsg_t;
    37.6  
    37.7 -#define IOCTL_PRIVCMD_HYPERCALL        \
    37.8 +/*
    37.9 + * @cmd: IOCTL_PRIVCMD_HYPERCALL
   37.10 + * @arg: &privcmd_hypercall_t
   37.11 + * Return: Value returned from execution of the specified hypercall.
   37.12 + */
   37.13 +#define IOCTL_PRIVCMD_HYPERCALL         \
   37.14      _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
   37.15 -#define IOCTL_PRIVCMD_BLKMSG           \
   37.16 -    _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t))
   37.17 +
   37.18 +/*
   37.19 + * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN
   37.20 + * @arg: n/a
   37.21 + * Return: Port associated with domain-controller end of control event channel
   37.22 + *         for the initial domain.
   37.23 + */
   37.24 +#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \
   37.25 +    _IOC(_IOC_NONE, 'P', 1, 0)
   37.26  #define IOCTL_PRIVCMD_MMAP             \
   37.27      _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
   37.28  
    38.1 --- a/xenolinux-2.4.26-sparse/mm/vmalloc.c	Sun May 02 00:27:58 2004 +0000
    38.2 +++ b/xenolinux-2.4.26-sparse/mm/vmalloc.c	Sun May 02 00:52:45 2004 +0000
    38.3 @@ -45,6 +45,10 @@ static inline void free_area_pte(pmd_t *
    38.4  			continue;
    38.5  		if (pte_present(page)) {
    38.6  			struct page *ptpage = pte_page(page);
    38.7 +#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
    38.8 +			if (pte_io(page))
    38.9 +				continue;
   38.10 +#endif
   38.11  			if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
   38.12  				__free_page(ptpage);
   38.13  			continue;
   38.14 @@ -250,11 +254,6 @@ void __vfree(void * addr, int free_area_
   38.15  	for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
   38.16  		if (tmp->addr == addr) {
   38.17  			*p = tmp->next;
   38.18 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST
   38.19 -			if (tmp->flags & VM_IOREMAP)
   38.20 -				zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size);
   38.21 -			else
   38.22 -#endif
   38.23  			if (free_area_pages)
   38.24  			    vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
   38.25  			write_unlock(&vmlist_lock);