ia64/xen-unstable
changeset 1337:b4a9837c89b1
bitkeeper revision 1.885 (4094465dmX2TefiJicy4taNzc0jiaw)
manual merge
manual merge
line diff
1.1 --- a/.rootkeys Sun May 02 00:27:58 2004 +0000 1.2 +++ b/.rootkeys Sun May 02 00:52:45 2004 +0000 1.3 @@ -102,6 +102,7 @@ 3fbd4bd6GtGwZGxYUJPOheYIR7bPaA tools/xen 1.4 4055ee44Bu6oP7U0WxxXypbUt4dNPQ tools/xenctl/setup.py 1.5 40431ac64Hj4ixUnKmlugZKhXPFE_Q tools/xend/Makefile 1.6 4055ad95Se-FqttgxollqOAAHB94zA tools/xend/lib/__init__.py 1.7 +4092738fMRGC9fFBcPRCWaJaj9U3ag tools/xend/lib/blkif.py 1.8 4055ad97wMLUj0BZT0e_T0EwQN0Bvw tools/xend/lib/console.py 1.9 4048c0ddsF0WrU7HUzTvg1MJoCIfWg tools/xend/lib/domain_controller.h 1.10 4054a301VEag2GwrBrFBna5U1BGlLA tools/xend/lib/main.py 1.11 @@ -665,8 +666,8 @@ 4087cf0dkVF3I19gpT1cNubeJgQr7g xenolinux 1.12 4087cf0dlv1Dw4MAbeRStPPG8IvPPg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c 1.13 40880cc6hHg6s2cPHbqPNQxENefjoQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h 1.14 4075806dI5kfeMD5RV-DA0PYoThx_w xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile 1.15 -4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c 1.16 -4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h 1.17 +4075806d4-j7vN0Mn0bklI1cRUX1vQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h 1.18 +4075806d3fJqqDC1pYYPTZPc575iKg xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c 1.19 4075806dibjCcfuXv6CINMhxWTw3jQ xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c 1.20 3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.26-sparse/arch/xen/drivers/block/Makefile 1.21 3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.26-sparse/arch/xen/drivers/block/block.c
2.1 --- a/tools/examples/xc_dom_create.py Sun May 02 00:27:58 2004 +0000 2.2 +++ b/tools/examples/xc_dom_create.py Sun May 02 00:52:45 2004 +0000 2.3 @@ -1,7 +1,7 @@ 2.4 #!/usr/bin/env python 2.5 2.6 import string, sys, os, time, socket, getopt, signal, syslog 2.7 -import Xc, xenctl.utils, xenctl.console_client 2.8 +import Xc, xenctl.utils, xenctl.console_client, re 2.9 2.10 config_dir = '/etc/xc/' 2.11 config_file = xc_config_file = config_dir + 'defaults' 2.12 @@ -195,6 +195,15 @@ output('VM cmdline : "%s"' % cmd 2.13 if dryrun: 2.14 sys.exit(1) 2.15 2.16 +##### HACK HACK HACK 2.17 +##### Until everyone moves to the new I/O world, and a more robust domain 2.18 +##### controller (xend), we use this little trick to discover whether we 2.19 +##### are in a testing environment for new I/O stuff. 2.20 +new_io_world = True 2.21 +for line in os.popen('cat /proc/interrupts').readlines(): 2.22 + if re.search('blkdev', line): 2.23 + new_io_world = False 2.24 + 2.25 ##### Code beyond this point is actually used to manage the mechanics of 2.26 ##### starting (and watching if necessary) guest virtual machines. 2.27 2.28 @@ -228,19 +237,23 @@ def make_domain(): 2.29 2.30 cmsg = 'new_control_interface(dom='+str(id)+', console_port='+str(console_port)+')' 2.31 2.32 - xend_response = xenctl.utils.xend_control_message(cmsg) 2.33 + cons_response = xenctl.utils.xend_control_message(cmsg) 2.34 2.35 - if not xend_response['success']: 2.36 + if not cons_response['success']: 2.37 print "Error creating initial event channel" 2.38 - print "Error type: " + xend_response['error_type'] 2.39 - if xend_response['error_type'] == 'exception': 2.40 - print "Exception type: " + xend_response['exception_type'] 2.41 - print "Exception value: " + xend_response['exception_value'] 2.42 + print "Error type: " + cons_response['error_type'] 2.43 + if cons_response['error_type'] == 'exception': 2.44 + print "Exception type: " + cons_response['exception_type'] 2.45 + print "Exception value: " + cons_response['exception_value'] 2.46 xc.domain_destroy ( dom=id ) 2.47 sys.exit() 2.48 2.49 + # will the domain have IO privileges? 2.50 + if pci_device_list != []: io_priv = True 2.51 + else: io_priv = False 2.52 + 2.53 if restore: 2.54 - ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1 )' % builder_fn) 2.55 + ret = eval('xc.%s_restore ( dom=id, state_file=state_file, progress=1, io_priv=%d )' % (builder_fn, io_priv)) 2.56 if ret < 0: 2.57 print "Error restoring domain" 2.58 print "Return code = " + str(ret) 2.59 @@ -248,7 +261,7 @@ def make_domain(): 2.60 sys.exit() 2.61 else: 2.62 2.63 - ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=xend_response["remote_port"] )' % builder_fn) 2.64 + ret = eval('xc.%s_build ( dom=id, image=image, ramdisk=ramdisk, cmdline=cmdline, control_evtchn=cons_response["remote_port"], io_priv=%d )' % (builder_fn, io_priv) ) 2.65 if ret < 0: 2.66 print "Error building Linux guest OS: " 2.67 print "Return code = " + str(ret) 2.68 @@ -259,6 +272,18 @@ def make_domain(): 2.69 2.70 # set the expertise level appropriately 2.71 xenctl.utils.VBD_EXPERT_MODE = vbd_expert 2.72 + 2.73 + if new_io_world: 2.74 + cmsg = 'new_block_interface(dom='+str(id)+')' 2.75 + xend_response = xenctl.utils.xend_control_message(cmsg) 2.76 + if not xend_response['success']: 2.77 + print "Error creating block interface" 2.78 + print "Error type: " + xend_response['error_type'] 2.79 + if xend_response['error_type'] == 'exception': 2.80 + print "Exception type: " + xend_response['exception_type'] 2.81 + print "Exception val: " + xend_response['exception_value'] 2.82 + xc.domain_destroy ( dom=id ) 2.83 + sys.exit() 2.84 2.85 for ( uname, virt_name, rw ) in vbd_list: 2.86 virt_dev = xenctl.utils.blkdev_name_to_number( virt_name ) 2.87 @@ -269,42 +294,70 @@ def make_domain(): 2.88 xc.domain_destroy ( dom=id ) 2.89 sys.exit() 2.90 2.91 - # check that setting up this VBD won't violate the sharing 2.92 - # allowed by the current VBD expertise level 2.93 - if xenctl.utils.vd_extents_validate(segments, rw=='w' or rw=='rw') < 0: 2.94 - xc.domain_destroy( dom = id ) 2.95 - sys.exit() 2.96 + if new_io_world: 2.97 + if len(segments) > 1: 2.98 + print "New I/O world cannot deal with multi-extent vdisks" 2.99 + xc.domain_destroy ( dom=id ) 2.100 + sys.exit() 2.101 + seg = segments[0] 2.102 + cmsg = 'new_block_device(dom=' + str(id) + \ 2.103 + ',handle=0,vdev=' + str(virt_dev) + \ 2.104 + ',pdev=' + str(seg['device']) + \ 2.105 + ',start_sect=' + str(seg['start_sector']) + \ 2.106 + ',nr_sect=' + str(seg['nr_sectors']) + \ 2.107 + ',readonly=' + str(not re.match('w',rw)) + ')' 2.108 + xend_response = xenctl.utils.xend_control_message(cmsg) 2.109 + if not xend_response['success']: 2.110 + print "Error creating virtual block device" 2.111 + print "Error type: " + xend_response['error_type'] 2.112 + if xend_response['error_type'] == 'exception': 2.113 + print "Exception type: " + xend_response['exception_type'] 2.114 + print "Exception val: " + xend_response['exception_value'] 2.115 + xc.domain_destroy ( dom=id ) 2.116 + sys.exit() 2.117 + else: 2.118 + # check that setting up this VBD won't violate the sharing 2.119 + # allowed by the current VBD expertise level 2.120 + if xenctl.utils.vd_extents_validate(segments, 2.121 + rw=='w' or rw=='rw') < 0: 2.122 + xc.domain_destroy( dom = id ) 2.123 + sys.exit() 2.124 2.125 - if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or rw=='rw' ): 2.126 - print "Error creating VBD vbd=%d writeable=%d\n" % (virt_dev,rw) 2.127 - xc.domain_destroy ( dom=id ) 2.128 - sys.exit() 2.129 + if xc.vbd_create( dom=id, vbd=virt_dev, 2.130 + writeable= rw=='w' or rw=='rw' ): 2.131 + print "Error creating VBD %d (writeable=%d)\n" % (virt_dev,rw) 2.132 + xc.domain_destroy ( dom=id ) 2.133 + sys.exit() 2.134 2.135 - if xc.vbd_setextents( dom=id, 2.136 - vbd=virt_dev, 2.137 - extents=segments): 2.138 - print "Error populating VBD vbd=%d\n" % virt_dev 2.139 - xc.domain_destroy ( dom=id ) 2.140 - sys.exit() 2.141 + if xc.vbd_setextents( dom=id, 2.142 + vbd=virt_dev, 2.143 + extents=segments): 2.144 + print "Error populating VBD vbd=%d\n" % virt_dev 2.145 + xc.domain_destroy ( dom=id ) 2.146 + sys.exit() 2.147 2.148 - # setup virtual firewall rules for all aliases 2.149 - for ip in vfr_ipaddr: 2.150 - xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip ) 2.151 + if not new_io_world: 2.152 + # setup virtual firewall rules for all aliases 2.153 + for ip in vfr_ipaddr: 2.154 + xenctl.utils.setup_vfr_rules_for_vif( id, 0, ip ) 2.155 2.156 - # check for physical device access 2.157 - for (pci_bus, pci_dev, pci_func) in pci_device_list: 2.158 - if xc.physdev_pci_access_modify( 2.159 - dom=id, bus=pci_bus, dev=pci_dev, func=pci_func, enable=1 ) < 0: 2.160 - print "Non-fatal error enabling PCI device access." 2.161 - else: 2.162 - print "Enabled PCI access (%d:%d:%d)." % (pci_bus,pci_dev,pci_func) 2.163 + if new_io_world: 2.164 + # check for physical device access 2.165 + for (pci_bus, pci_dev, pci_func) in pci_device_list: 2.166 + if xc.physdev_pci_access_modify( 2.167 + dom=id, bus=pci_bus, dev=pci_dev, 2.168 + func=pci_func, enable=1 ) < 0: 2.169 + print "Non-fatal error enabling PCI device access." 2.170 + else: 2.171 + print "Enabled PCI access (%d:%d:%d)." % \ 2.172 + (pci_bus,pci_dev,pci_func) 2.173 2.174 if xc.domain_start( dom=id ) < 0: 2.175 print "Error starting domain" 2.176 xc.domain_destroy ( dom=id ) 2.177 sys.exit() 2.178 2.179 - return (id, xend_response['console_port']) 2.180 + return (id, cons_response['console_port']) 2.181 # end of make_domain() 2.182 2.183 def mkpidfile():
3.1 --- a/tools/xc/lib/xc.h Sun May 02 00:27:58 2004 +0000 3.2 +++ b/tools/xc/lib/xc.h Sun May 02 00:52:45 2004 +0000 3.3 @@ -64,7 +64,7 @@ int xc_linux_save(int xc_handle, 3.4 int verbose); 3.5 3.6 int xc_linux_restore(int xc_handle, 3.7 - u64 domid, 3.8 + u64 domid, 3.9 const char *state_file, 3.10 int verbose, 3.11 u64 *pdomid); 3.12 @@ -74,13 +74,15 @@ int xc_linux_build(int xc_handle, 3.13 const char *image_name, 3.14 const char *ramdisk_name, 3.15 const char *cmdline, 3.16 - unsigned int control_evtchn); 3.17 + unsigned int control_evtchn, 3.18 + int io_priv); 3.19 3.20 int xc_netbsd_build(int xc_handle, 3.21 u64 domid, 3.22 const char *image_name, 3.23 const char *cmdline, 3.24 - unsigned int control_evtchn); 3.25 + unsigned int control_evtchn, 3.26 + int io_priv); 3.27 3.28 int xc_bvtsched_global_set(int xc_handle, 3.29 unsigned long ctx_allow); 3.30 @@ -248,15 +250,15 @@ int xc_shadow_control(int xc_handle, 3.31 3.32 int xc_domain_setname(int xc_handle, 3.33 u64 domid, 3.34 - char *name); 3.35 + char *name); 3.36 3.37 int xc_domain_setinitialmem(int xc_handle, 3.38 - u64 domid, 3.39 - unsigned int initial_memkb); 3.40 + u64 domid, 3.41 + unsigned int initial_memkb); 3.42 3.43 int xc_domain_setmaxmem(int xc_handle, 3.44 - u64 domid, 3.45 - unsigned int max_memkb); 3.46 + u64 domid, 3.47 + unsigned int max_memkb); 3.48 3.49 3.50 #endif /* __XC_H__ */
4.1 --- a/tools/xc/lib/xc_linux_build.c Sun May 02 00:27:58 2004 +0000 4.2 +++ b/tools/xc/lib/xc_linux_build.c Sun May 02 00:52:45 2004 +0000 4.3 @@ -74,7 +74,8 @@ static int setup_guestos(int xc_handle, 4.4 full_execution_context_t *ctxt, 4.5 const char *cmdline, 4.6 unsigned long shared_info_frame, 4.7 - unsigned int control_evtchn) 4.8 + unsigned int control_evtchn, 4.9 + int io_priv) 4.10 { 4.11 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; 4.12 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; 4.13 @@ -268,7 +269,7 @@ static int setup_guestos(int xc_handle, 4.14 memset(start_info, 0, sizeof(*start_info)); 4.15 start_info->nr_pages = nr_pages; 4.16 start_info->shared_info = shared_info_frame << PAGE_SHIFT; 4.17 - start_info->flags = 0; 4.18 + start_info->flags = io_priv ? SIF_PRIVILEGED : 0; 4.19 start_info->pt_base = vpt_start; 4.20 start_info->nr_pt_frames = nr_pt_pages; 4.21 start_info->mfn_list = vphysmap_start; 4.22 @@ -381,7 +382,8 @@ int xc_linux_build(int xc_handle, 4.23 const char *image_name, 4.24 const char *ramdisk_name, 4.25 const char *cmdline, 4.26 - unsigned int control_evtchn) 4.27 + unsigned int control_evtchn, 4.28 + int io_priv) 4.29 { 4.30 dom0_op_t launch_op, op; 4.31 int initrd_fd = -1; 4.32 @@ -446,7 +448,7 @@ int xc_linux_build(int xc_handle, 4.33 &vstartinfo_start, &vkern_entry, 4.34 ctxt, cmdline, 4.35 op.u.getdomaininfo.shared_info_frame, 4.36 - control_evtchn) < 0 ) 4.37 + control_evtchn, io_priv) < 0 ) 4.38 { 4.39 ERROR("Error constructing guest OS"); 4.40 goto error_out; 4.41 @@ -560,13 +562,13 @@ static int readelfimage_base_and_size(ch 4.42 4.43 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize ) 4.44 { 4.45 - ERROR("ELF program headers extend beyond end of image."); 4.46 + ERROR("ELF program headers extend beyond end of image."); 4.47 return -EINVAL; 4.48 } 4.49 4.50 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize ) 4.51 { 4.52 - ERROR("ELF section headers extend beyond end of image."); 4.53 + ERROR("ELF section headers extend beyond end of image."); 4.54 return -EINVAL; 4.55 } 4.56 4.57 @@ -642,7 +644,7 @@ static int loadelfimage(char *elfbase, i 4.58 { 4.59 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize)); 4.60 if ( !is_loadable_phdr(phdr) ) 4.61 - continue; 4.62 + continue; 4.63 4.64 for ( done = 0; done < phdr->p_filesz; done += chunksz ) 4.65 {
5.1 --- a/tools/xc/lib/xc_netbsd_build.c Sun May 02 00:27:58 2004 +0000 5.2 +++ b/tools/xc/lib/xc_netbsd_build.c Sun May 02 00:52:45 2004 +0000 5.3 @@ -63,7 +63,8 @@ static int setup_guestos(int xc_handle, 5.4 full_execution_context_t *ctxt, 5.5 const char *cmdline, 5.6 unsigned long shared_info_frame, 5.7 - unsigned int control_evtchn) 5.8 + unsigned int control_evtchn, 5.9 + int io_priv) 5.10 { 5.11 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL; 5.12 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL; 5.13 @@ -175,7 +176,7 @@ static int setup_guestos(int xc_handle, 5.14 start_info->mod_len = symtab_len; 5.15 start_info->nr_pages = tot_pages; 5.16 start_info->shared_info = shared_info_frame << PAGE_SHIFT; 5.17 - start_info->flags = 0; 5.18 + start_info->flags = io_priv ? SIF_PRIVILEGED : 0; 5.19 start_info->domain_controller_evtchn = control_evtchn; 5.20 strncpy(start_info->cmd_line, cmdline, MAX_CMDLINE); 5.21 start_info->cmd_line[MAX_CMDLINE-1] = '\0'; 5.22 @@ -212,7 +213,8 @@ int xc_netbsd_build(int xc_handle, 5.23 u64 domid, 5.24 const char *image_name, 5.25 const char *cmdline, 5.26 - unsigned int control_evtchn) 5.27 + unsigned int control_evtchn, 5.28 + int io_priv) 5.29 { 5.30 dom0_op_t launch_op, op; 5.31 unsigned long load_addr; 5.32 @@ -269,7 +271,7 @@ int xc_netbsd_build(int xc_handle, 5.33 &virt_startinfo_addr, 5.34 &load_addr, &st_ctxt, cmdline, 5.35 op.u.getdomaininfo.shared_info_frame, 5.36 - control_evtchn) < 0 ) 5.37 + control_evtchn, io_priv) < 0 ) 5.38 { 5.39 ERROR("Error constructing guest OS"); 5.40 goto error_out;
6.1 --- a/tools/xc/py/Xc.c Sun May 02 00:27:58 2004 +0000 6.2 +++ b/tools/xc/py/Xc.c Sun May 02 00:52:45 2004 +0000 6.3 @@ -228,18 +228,19 @@ static PyObject *pyxc_linux_build(PyObje 6.4 6.5 u64 dom; 6.6 char *image, *ramdisk = NULL, *cmdline = ""; 6.7 - int control_evtchn; 6.8 + int control_evtchn, io_priv = 0; 6.9 6.10 static char *kwd_list[] = { "dom", "control_evtchn", 6.11 - "image", "ramdisk", "cmdline", NULL }; 6.12 + "image", "ramdisk", "cmdline", "io_priv", 6.13 + NULL }; 6.14 6.15 - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, 6.16 + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, 6.17 &dom, &control_evtchn, 6.18 - &image, &ramdisk, &cmdline) ) 6.19 + &image, &ramdisk, &cmdline, &io_priv) ) 6.20 return NULL; 6.21 6.22 if ( xc_linux_build(xc->xc_handle, dom, image, 6.23 - ramdisk, cmdline, control_evtchn) != 0 ) 6.24 + ramdisk, cmdline, control_evtchn, io_priv) != 0 ) 6.25 return PyErr_SetFromErrno(xc_error); 6.26 6.27 Py_INCREF(zero); 6.28 @@ -254,18 +255,19 @@ static PyObject *pyxc_netbsd_build(PyObj 6.29 6.30 u64 dom; 6.31 char *image, *ramdisk = NULL, *cmdline = ""; 6.32 - int control_evtchn; 6.33 + int control_evtchn, io_priv = 0; 6.34 6.35 static char *kwd_list[] = { "dom", "control_evtchn", 6.36 - "image", "ramdisk", "cmdline", NULL }; 6.37 + "image", "ramdisk", "cmdline", "io_priv", 6.38 + NULL }; 6.39 6.40 - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ss", kwd_list, 6.41 + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "Lis|ssi", kwd_list, 6.42 &dom, &control_evtchn, 6.43 - &image, &ramdisk, &cmdline) ) 6.44 + &image, &ramdisk, &cmdline, &io_priv) ) 6.45 return NULL; 6.46 6.47 if ( xc_netbsd_build(xc->xc_handle, dom, image, 6.48 - cmdline, control_evtchn) != 0 ) 6.49 + cmdline, control_evtchn, io_priv) != 0 ) 6.50 return PyErr_SetFromErrno(xc_error); 6.51 6.52 Py_INCREF(zero); 6.53 @@ -1160,7 +1162,8 @@ static PyMethodDef pyxc_methods[] = { 6.54 " dom [long]: Identifier of domain to build into.\n" 6.55 " image [str]: Name of kernel image file. May be gzipped.\n" 6.56 " ramdisk [str, n/a]: Name of ramdisk file, if any.\n" 6.57 - " cmdline [str, n/a]: Kernel parameters, if any.\n\n" 6.58 + " cmdline [str, n/a]: Kernel parameters, if any.\n" 6.59 + " io_priv [boolean]: Does the domain have IO privileges?\n\n" 6.60 "Returns: [int] 0 on success; -1 on error.\n" }, 6.61 6.62 { "netbsd_build", 6.63 @@ -1169,7 +1172,8 @@ static PyMethodDef pyxc_methods[] = { 6.64 "Build a new NetBSD guest OS.\n" 6.65 " dom [long]: Identifier of domain to build into.\n" 6.66 " image [str]: Name of kernel image file. May be gzipped.\n" 6.67 - " cmdline [str, n/a]: Kernel parameters, if any.\n\n" 6.68 + " cmdline [str, n/a]: Kernel parameters, if any.\n" 6.69 + " io_priv [boolean]: Does the domain have IO privileges?\n\n" 6.70 "Returns: [int] 0 on success; -1 on error.\n" }, 6.71 6.72 { "bvtsched_global_set",
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/tools/xend/lib/blkif.py Sun May 02 00:52:45 2004 +0000 7.3 @@ -0,0 +1,143 @@ 7.4 + 7.5 +################################################################# 7.6 +## xend/blkif.py -- Block-interface management functions for Xend 7.7 +## Copyright (c) 2004, K A Fraser (University of Cambridge) 7.8 +################################################################# 7.9 + 7.10 +import errno, re, os, select, signal, socket, struct, sys 7.11 +import xend.main, xend.console, xend.manager, xend.utils, Xc 7.12 + 7.13 +CMSG_BLKIF_BE = 1 7.14 +CMSG_BLKIF_FE = 2 7.15 +CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED = 0 7.16 +CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED = 32 7.17 +CMSG_BLKIF_FE_INTERFACE_CONNECT = 33 7.18 +CMSG_BLKIF_FE_INTERFACE_DISCONNECT = 34 7.19 +CMSG_BLKIF_BE_CREATE = 0 7.20 +CMSG_BLKIF_BE_DESTROY = 1 7.21 +CMSG_BLKIF_BE_CONNECT = 2 7.22 +CMSG_BLKIF_BE_DISCONNECT = 3 7.23 +CMSG_BLKIF_BE_VBD_CREATE = 4 7.24 +CMSG_BLKIF_BE_VBD_DESTROY = 5 7.25 +CMSG_BLKIF_BE_VBD_GROW = 6 7.26 +CMSG_BLKIF_BE_VBD_SHRINK = 7 7.27 + 7.28 +pendmsg = None 7.29 +pendaddr = None 7.30 + 7.31 +def backend_tx_req(msg): 7.32 + port = xend.main.dom0_port 7.33 + if port.space_to_write_request(): 7.34 + port.write_request(msg) 7.35 + port.notify() 7.36 + else: 7.37 + xend.blkif.pendmsg = msg 7.38 + 7.39 +def backend_rx_req(port, msg): 7.40 + port.write_response(msg) 7.41 + 7.42 +def backend_rx_rsp(port, msg): 7.43 + subtype = (msg.get_header())['subtype'] 7.44 + print "Received blkif-be response, subtype %d" % subtype 7.45 + if subtype == CMSG_BLKIF_BE_CREATE: 7.46 + rsp = { 'success': True } 7.47 + xend.main.send_management_response(rsp, xend.blkif.pendaddr) 7.48 + elif subtype == CMSG_BLKIF_BE_CONNECT: 7.49 + (dom,hnd,evtchn,frame,st) = struct.unpack("QIILI", msg.get_payload()) 7.50 + blkif = interface.list[xend.main.port_from_dom(dom).local_port] 7.51 + msg = xend.utils.message(CMSG_BLKIF_FE, \ 7.52 + CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0) 7.53 + msg.append_payload(struct.pack("III",0,2,blkif.evtchn['port2'])) 7.54 + blkif.ctrlif_tx_req(xend.main.port_list[blkif.key], msg) 7.55 + elif subtype == CMSG_BLKIF_BE_VBD_CREATE: 7.56 + (dom,hnd,vdev,ro,st) = struct.unpack("QIHII", msg.get_payload()) 7.57 + blkif = interface.list[xend.main.port_from_dom(dom).local_port] 7.58 + (pdev, start_sect, nr_sect, readonly) = blkif.devices[vdev] 7.59 + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_GROW, 0) 7.60 + msg.append_payload(struct.pack("QIHHHQQI",dom,0,vdev,0, \ 7.61 + pdev,start_sect,nr_sect,0)) 7.62 + backend_tx_req(msg) 7.63 + elif subtype == CMSG_BLKIF_BE_VBD_GROW: 7.64 + rsp = { 'success': True } 7.65 + xend.main.send_management_response(rsp, xend.blkif.pendaddr) 7.66 + 7.67 +def backend_do_work(port): 7.68 + global pendmsg 7.69 + if pendmsg and port.space_to_write_request(): 7.70 + port.write_request(pendmsg) 7.71 + pendmsg = None 7.72 + return True 7.73 + return False 7.74 + 7.75 + 7.76 +class interface: 7.77 + 7.78 + # Dictionary of all block-device interfaces. 7.79 + list = {} 7.80 + 7.81 + 7.82 + # NB. 'key' is an opaque value that has no meaning in this class. 7.83 + def __init__(self, dom, key): 7.84 + self.dom = dom 7.85 + self.key = key 7.86 + self.devices = {} 7.87 + self.pendmsg = None 7.88 + interface.list[key] = self 7.89 + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_CREATE, 0) 7.90 + msg.append_payload(struct.pack("QII",dom,0,0)) 7.91 + xend.blkif.pendaddr = xend.main.mgmt_req_addr 7.92 + backend_tx_req(msg) 7.93 + 7.94 + # Attach a device to the specified interface 7.95 + def attach_device(self, vdev, pdev, start_sect, nr_sect, readonly): 7.96 + if self.devices.has_key(vdev): 7.97 + return False 7.98 + self.devices[vdev] = (pdev, start_sect, nr_sect, readonly) 7.99 + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_VBD_CREATE, 0) 7.100 + msg.append_payload(struct.pack("QIHII",self.dom,0,vdev,readonly,0)) 7.101 + xend.blkif.pendaddr = xend.main.mgmt_req_addr 7.102 + backend_tx_req(msg) 7.103 + return True 7.104 + 7.105 + 7.106 + # Completely destroy this interface. 7.107 + def destroy(self): 7.108 + del interface.list[self.key] 7.109 + msg = xend.utils.message(CMSG_BLKIF_BE, CMSG_BLKIF_BE_DESTROY, 0) 7.110 + msg.append_payload(struct.pack("QII",self.dom,0,0)) 7.111 + backend_tx_req(msg) 7.112 + 7.113 + 7.114 + # The parameter @port is the control-interface event channel. This method 7.115 + # returns True if messages were written to the control interface. 7.116 + def ctrlif_transmit_work(self, port): 7.117 + if self.pendmsg and port.space_to_write_request(): 7.118 + port.write_request(self.pendmsg) 7.119 + self.pendmsg = None 7.120 + return True 7.121 + return False 7.122 + 7.123 + def ctrlif_tx_req(self, port, msg): 7.124 + if port.space_to_write_request(): 7.125 + port.write_request(msg) 7.126 + port.notify() 7.127 + else: 7.128 + self.pendmsg = msg 7.129 + 7.130 + def ctrlif_rx_req(self, port, msg): 7.131 + port.write_response(msg) 7.132 + subtype = (msg.get_header())['subtype'] 7.133 + if subtype == CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED: 7.134 + msg = xend.utils.message(CMSG_BLKIF_FE, \ 7.135 + CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED, 0) 7.136 + msg.append_payload(struct.pack("III",0,1,0)) 7.137 + self.ctrlif_tx_req(port, msg) 7.138 + elif subtype == CMSG_BLKIF_FE_INTERFACE_CONNECT: 7.139 + (hnd,frame) = struct.unpack("IL", msg.get_payload()) 7.140 + xc = Xc.new() 7.141 + self.evtchn = xc.evtchn_bind_interdomain(dom1=0,dom2=self.dom) 7.142 + msg = xend.utils.message(CMSG_BLKIF_BE, \ 7.143 + CMSG_BLKIF_BE_CONNECT, 0) 7.144 + msg.append_payload(struct.pack("QIILI",self.dom,0, \ 7.145 + self.evtchn['port1'],frame,0)) 7.146 + backend_tx_req(msg)
8.1 --- a/tools/xend/lib/console.py Sun May 02 00:27:58 2004 +0000 8.2 +++ b/tools/xend/lib/console.py Sun May 02 00:52:45 2004 +0000 8.3 @@ -5,7 +5,7 @@ 8.4 ############################################################# 8.5 8.6 import errno, re, os, select, signal, socket, struct, sys 8.7 - 8.8 +import xend.blkif, xend.main, xend.manager, xend.utils, Xc 8.9 8.10 ## 8.11 ## interface: 8.12 @@ -16,7 +16,7 @@ import errno, re, os, select, signal, so 8.13 ## CONNECTED: sending/receiving console data on TCP port 'self.port' 8.14 ## 8.15 ## A dictionary of all active interfaces, indexed by TCP socket descriptor, 8.16 -## is accessible as 'interface.interface_list'. 8.17 +## is accessible as 'interface.list_by_fd'. 8.18 ## 8.19 ## NB. When a class instance is to be destroyed you *must* call the 'close' 8.20 ## method. Otherwise a stale reference will eb left in the interface list. 8.21 @@ -30,7 +30,11 @@ class interface: 8.22 8.23 8.24 # Dictionary of all active (non-closed) console interfaces. 8.25 - interface_list = {} 8.26 + list_by_fd = {} 8.27 + 8.28 + 8.29 + # Dictionary of all console interfaces, closed and open. 8.30 + list = {} 8.31 8.32 8.33 # NB. 'key' is an opaque value that has no meaning in this class. 8.34 @@ -38,6 +42,9 @@ class interface: 8.35 self.status = interface.CLOSED 8.36 self.port = port 8.37 self.key = key 8.38 + self.rbuf = xend.utils.buffer() 8.39 + self.wbuf = xend.utils.buffer() 8.40 + interface.list[key] = self 8.41 8.42 8.43 # Is this interface closed (inactive)? 8.44 @@ -58,14 +65,14 @@ class interface: 8.45 # Close the interface, if it is not closed already. 8.46 def close(self): 8.47 if not self.closed(): 8.48 - del interface.interface_list[self.sock.fileno()] 8.49 + del interface.list_by_fd[self.sock.fileno()] 8.50 self.sock.close() 8.51 del self.sock 8.52 self.status = interface.CLOSED 8.53 8.54 8.55 # Move the interface into the 'listening' state. Opens a new listening 8.56 - # socket and updates 'interface_list'. 8.57 + # socket and updates 'list_by_fd'. 8.58 def listen(self): 8.59 # Close old socket (if any), and create a fresh one. 8.60 self.close() 8.61 @@ -80,7 +87,7 @@ class interface: 8.62 8.63 # Announce the new status of thsi interface. 8.64 self.status = interface.LISTENING 8.65 - interface.interface_list[self.sock.fileno()] = self 8.66 + interface.list_by_fd[self.sock.fileno()] = self 8.67 8.68 except: 8.69 # In case of trouble ensure we get rid of dangling socket reference 8.70 @@ -105,7 +112,69 @@ class interface: 8.71 # Publish the new socket and the new interface state. 8.72 self.sock = sock 8.73 self.status = interface.CONNECTED 8.74 - interface.interface_list[self.sock.fileno()] = self 8.75 + interface.list_by_fd[self.sock.fileno()] = self 8.76 return 1 8.77 8.78 8.79 + # Completely sestroy a console interface. 8.80 + def destroy(self): 8.81 + self.close() 8.82 + del interface.list[self.key] 8.83 + 8.84 + 8.85 + # Do work triggered by resource availability on a console-interface socket. 8.86 + def socket_work(self): 8.87 + # If the interface is listening, check for pending connections. 8.88 + if self.listening(): 8.89 + self.connect() 8.90 + 8.91 + # All done if the interface is not connected. 8.92 + if not self.connected(): 8.93 + return 8.94 + 8.95 + # Send as much pending data as possible via the socket. 8.96 + while not self.rbuf.empty(): 8.97 + try: 8.98 + bytes = self.sock.send(self.rbuf.peek()) 8.99 + if bytes > 0: 8.100 + self.rbuf.discard(bytes) 8.101 + except socket.error, error: 8.102 + pass 8.103 + 8.104 + # Read as much data as is available. Don't worry about 8.105 + # overflowing our buffer: it's more important to read the 8.106 + # incoming data stream and detect errors or closure of the 8.107 + # remote end in a timely manner. 8.108 + try: 8.109 + while 1: 8.110 + data = self.sock.recv(2048) 8.111 + # Return of zero means the remote end has disconnected. 8.112 + # We therefore return the console interface to listening. 8.113 + if not data: 8.114 + self.listen() 8.115 + break 8.116 + self.wbuf.write(data) 8.117 + except socket.error, error: 8.118 + # Assume that most errors mean that the connection is dead. 8.119 + # In such cases we return the interface to 'listening' state. 8.120 + if error[0] != errno.EAGAIN: 8.121 + print "Better return to listening" 8.122 + self.listen() 8.123 + print "New status: " + str(self.status) 8.124 + 8.125 + 8.126 + # The parameter @port is the control-interface event channel. This method 8.127 + # returns True if messages were written to the control interface. 8.128 + def ctrlif_transmit_work(self, port): 8.129 + work_done = False 8.130 + while not self.wbuf.empty() and port.space_to_write_request(): 8.131 + msg = xend.utils.message(0, 0, 0) 8.132 + msg.append_payload(self.wbuf.read(msg.MAX_PAYLOAD)) 8.133 + port.write_request(msg) 8.134 + work_done = True 8.135 + return work_done 8.136 + 8.137 + 8.138 + def ctrlif_rx_req(self, port, msg): 8.139 + self.rbuf.write(msg.get_payload()) 8.140 + port.write_response(msg)
9.1 --- a/tools/xend/lib/domain_controller.h Sun May 02 00:27:58 2004 +0000 9.2 +++ b/tools/xend/lib/domain_controller.h Sun May 02 00:52:45 2004 +0000 9.3 @@ -56,29 +56,113 @@ typedef struct { 9.4 #define CMSG_BLKIF_BE 1 /* Block-device backend */ 9.5 #define CMSG_BLKIF_FE 2 /* Block-device frontend */ 9.6 9.7 + 9.8 +/****************************************************************************** 9.9 + * CONSOLE DEFINITIONS 9.10 + */ 9.11 + 9.12 /* 9.13 * Subtypes for console messages. 9.14 */ 9.15 #define CMSG_CONSOLE_DATA 0 9.16 9.17 -/* 9.18 - * Subtypes for block-device messages. 9.19 - */ 9.20 -#define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */ 9.21 -#define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */ 9.22 -#define CMSG_BLKIF_BE_VBD_CREATE 2 /* Create a new VBD for an interface. */ 9.23 -#define CMSG_BLKIF_BE_VBD_DESTROY 3 /* Delete a VBD from an interface. */ 9.24 -#define CMSG_BLKIF_BE_VBD_GROW 4 /* Append an extent to a given VBD. */ 9.25 -#define CMSG_BLKIF_BE_VBD_SHRINK 5 /* Remove last extent from a given VBD. */ 9.26 9.27 -/* 9.28 - * Message request/response defintions for block-device messages. 9.29 +/****************************************************************************** 9.30 + * BLOCK-INTERFACE FRONTEND DEFINITIONS 9.31 */ 9.32 9.33 +/* Messages from domain controller to guest. */ 9.34 +#define CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED 0 9.35 + 9.36 +/* Messages from guest to domain controller. */ 9.37 +#define CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED 32 9.38 +#define CMSG_BLKIF_FE_INTERFACE_CONNECT 33 9.39 +#define CMSG_BLKIF_FE_INTERFACE_DISCONNECT 34 9.40 + 9.41 +/* These are used by both front-end and back-end drivers. */ 9.42 #define blkif_vdev_t u16 9.43 #define blkif_pdev_t u16 9.44 #define blkif_sector_t u64 9.45 9.46 +/* 9.47 + * CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: 9.48 + * Notify a guest about a status change on one of its block interfaces. 9.49 + * If the interface is DESTROYED or DOWN then the interface is disconnected: 9.50 + * 1. The shared-memory frame is available for reuse. 9.51 + * 2. Any unacknowledged messgaes pending on the interface were dropped. 9.52 + */ 9.53 +#define BLKIF_INTERFACE_STATUS_DESTROYED 0 /* Interface doesn't exist. */ 9.54 +#define BLKIF_INTERFACE_STATUS_DISCONNECTED 1 /* Exists but is disconnected. */ 9.55 +#define BLKIF_INTERFACE_STATUS_CONNECTED 2 /* Exists and is connected. */ 9.56 +typedef struct { 9.57 + unsigned int handle; 9.58 + unsigned int status; 9.59 + unsigned int evtchn; /* status == BLKIF_INTERFACE_STATUS_CONNECTED */ 9.60 +} blkif_fe_interface_status_changed_t; 9.61 + 9.62 +/* 9.63 + * CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED: 9.64 + * Notify the domain controller that the front-end driver is DOWN or UP. 9.65 + * When the driver goes DOWN then the controller will send no more 9.66 + * status-change notifications. When the driver comes UP then the controller 9.67 + * will send a notification for each interface that currently exists. 9.68 + * If the driver goes DOWN while interfaces are still UP, the domain 9.69 + * will automatically take the interfaces DOWN. 9.70 + */ 9.71 +#define BLKIF_DRIVER_STATUS_DOWN 0 9.72 +#define BLKIF_DRIVER_STATUS_UP 1 9.73 +typedef struct { 9.74 + unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ 9.75 +} blkif_fe_driver_status_changed_t; 9.76 + 9.77 +/* 9.78 + * CMSG_BLKIF_FE_INTERFACE_CONNECT: 9.79 + * If successful, the domain controller will acknowledge with a 9.80 + * STATUS_CONNECTED message. 9.81 + */ 9.82 +typedef struct { 9.83 + unsigned int handle; 9.84 + unsigned long shmem_frame; 9.85 +} blkif_fe_interface_connect_t; 9.86 + 9.87 +/* 9.88 + * CMSG_BLKIF_FE_INTERFACE_DISCONNECT: 9.89 + * If successful, the domain controller will acknowledge with a 9.90 + * STATUS_DISCONNECTED message. 9.91 + */ 9.92 +typedef struct { 9.93 + /* IN */ 9.94 + unsigned int handle; 9.95 + /* OUT */ 9.96 + /* 9.97 + * Tells driver how many interfaces it should expect to immediately 9.98 + * receive notifications about. 9.99 + */ 9.100 + unsigned int nr_interfaces; 9.101 +} blkif_fe_interface_disconnect_t; 9.102 + 9.103 + 9.104 +/****************************************************************************** 9.105 + * BLOCK-INTERFACE BACKEND DEFINITIONS 9.106 + */ 9.107 + 9.108 +/* Messages from domain controller. */ 9.109 +#define CMSG_BLKIF_BE_CREATE 0 /* Create a new block-device interface. */ 9.110 +#define CMSG_BLKIF_BE_DESTROY 1 /* Destroy a block-device interface. */ 9.111 +#define CMSG_BLKIF_BE_CONNECT 2 /* Connect i/f to remote driver. */ 9.112 +#define CMSG_BLKIF_BE_DISCONNECT 3 /* Disconnect i/f from remote driver. */ 9.113 +#define CMSG_BLKIF_BE_VBD_CREATE 4 /* Create a new VBD for an interface. */ 9.114 +#define CMSG_BLKIF_BE_VBD_DESTROY 5 /* Delete a VBD from an interface. */ 9.115 +#define CMSG_BLKIF_BE_VBD_GROW 6 /* Append an extent to a given VBD. */ 9.116 +#define CMSG_BLKIF_BE_VBD_SHRINK 7 /* Remove last extent from a given VBD. */ 9.117 + 9.118 +/* Messages to domain controller. */ 9.119 +#define CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED 32 9.120 + 9.121 +/* 9.122 + * Message request/response definitions for block-device messages. 9.123 + */ 9.124 + 9.125 typedef struct { 9.126 blkif_pdev_t device; 9.127 blkif_sector_t sector_start; 9.128 @@ -86,21 +170,66 @@ typedef struct { 9.129 } blkif_extent_t; 9.130 9.131 /* Non-specific 'okay' return. */ 9.132 -#define BLKIF_STATUS_OKAY 0 9.133 +#define BLKIF_BE_STATUS_OKAY 0 9.134 /* Non-specific 'error' return. */ 9.135 -#define BLKIF_STATUS_ERROR 1 9.136 +#define BLKIF_BE_STATUS_ERROR 1 9.137 /* The following are specific error returns. */ 9.138 -#define BLKIF_STATUS_INTERFACE_EXISTS 2 9.139 -#define BLKIF_STATUS_INTERFACE_NOT_FOUND 3 9.140 +#define BLKIF_BE_STATUS_INTERFACE_EXISTS 2 9.141 +#define BLKIF_BE_STATUS_INTERFACE_NOT_FOUND 3 9.142 +#define BLKIF_BE_STATUS_INTERFACE_CONNECTED 4 9.143 +#define BLKIF_BE_STATUS_VBD_EXISTS 5 9.144 +#define BLKIF_BE_STATUS_VBD_NOT_FOUND 6 9.145 +#define BLKIF_BE_STATUS_OUT_OF_MEMORY 7 9.146 +#define BLKIF_BE_STATUS_EXTENT_NOT_FOUND 8 9.147 +#define BLKIF_BE_STATUS_MAPPING_ERROR 9 9.148 9.149 /* This macro can be used to create an array of descriptive error strings. */ 9.150 -#define BLKIF_STATUS_ERRORS { \ 9.151 - "Okay", \ 9.152 - "Non-specific error", \ 9.153 - "Interface already exists", \ 9.154 - "Interface not found" } 9.155 +#define BLKIF_BE_STATUS_ERRORS { \ 9.156 + "Okay", \ 9.157 + "Non-specific error", \ 9.158 + "Interface already exists", \ 9.159 + "Interface not found", \ 9.160 + "Interface is still connected", \ 9.161 + "VBD already exists", \ 9.162 + "VBD not found", \ 9.163 + "Out of memory", \ 9.164 + "Extent not found for VBD", \ 9.165 + "Could not map domain memory" } 9.166 9.167 -/* CMSG_BLKIF_CREATE */ 9.168 +/* 9.169 + * CMSG_BLKIF_BE_CREATE: 9.170 + * When the driver sends a successful response then the interface is fully 9.171 + * created. The controller will send a DOWN notification to the front-end 9.172 + * driver. 9.173 + */ 9.174 +typedef struct { 9.175 + /* IN */ 9.176 + domid_t domid; /* Domain attached to new interface. */ 9.177 + unsigned int blkif_handle; /* Domain-specific interface handle. */ 9.178 + /* OUT */ 9.179 + unsigned int status; 9.180 +} blkif_be_create_t; 9.181 + 9.182 +/* 9.183 + * CMSG_BLKIF_BE_DESTROY: 9.184 + * When the driver sends a successful response then the interface is fully 9.185 + * torn down. The controller will send a DESTROYED notification to the 9.186 + * front-end driver. 9.187 + */ 9.188 +typedef struct { 9.189 + /* IN */ 9.190 + domid_t domid; /* Identify interface to be destroyed. */ 9.191 + unsigned int blkif_handle; /* ...ditto... */ 9.192 + /* OUT */ 9.193 + unsigned int status; 9.194 +} blkif_be_destroy_t; 9.195 + 9.196 +/* 9.197 + * CMSG_BLKIF_BE_CONNECT: 9.198 + * When the driver sends a successful response then the interface is fully 9.199 + * connected. The controller will send a CONNECTED notification to the 9.200 + * front-end driver. 9.201 + */ 9.202 typedef struct { 9.203 /* IN */ 9.204 domid_t domid; /* Domain attached to new interface. */ 9.205 @@ -109,18 +238,23 @@ typedef struct { 9.206 unsigned long shmem_frame; /* Page cont. shared comms window. */ 9.207 /* OUT */ 9.208 unsigned int status; 9.209 -} blkif_create_t; 9.210 +} blkif_be_connect_t; 9.211 9.212 -/* CMSG_BLKIF_DESTROY */ 9.213 +/* 9.214 + * CMSG_BLKIF_BE_DISCONNECT: 9.215 + * When the driver sends a successful response then the interface is fully 9.216 + * disconnected. The controller will send a DOWN notification to the front-end 9.217 + * driver. 9.218 + */ 9.219 typedef struct { 9.220 /* IN */ 9.221 - domid_t domid; /* Identify interface to be destroyed. */ 9.222 - unsigned int blkif_handle; /* ...ditto... */ 9.223 + domid_t domid; /* Domain attached to new interface. */ 9.224 + unsigned int blkif_handle; /* Domain-specific interface handle. */ 9.225 /* OUT */ 9.226 unsigned int status; 9.227 -} blkif_destroy_t; 9.228 +} blkif_be_disconnect_t; 9.229 9.230 -/* CMSG_BLKIF_VBD_CREATE */ 9.231 +/* CMSG_BLKIF_BE_VBD_CREATE */ 9.232 typedef struct { 9.233 /* IN */ 9.234 domid_t domid; /* Identify blkdev interface. */ 9.235 @@ -129,9 +263,9 @@ typedef struct { 9.236 int readonly; /* Non-zero -> VBD isn't writeable. */ 9.237 /* OUT */ 9.238 unsigned int status; 9.239 -} blkif_vbd_create_t; 9.240 +} blkif_be_vbd_create_t; 9.241 9.242 -/* CMSG_BLKIF_VBD_DESTROY */ 9.243 +/* CMSG_BLKIF_BE_VBD_DESTROY */ 9.244 typedef struct { 9.245 /* IN */ 9.246 domid_t domid; /* Identify blkdev interface. */ 9.247 @@ -139,9 +273,9 @@ typedef struct { 9.248 blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */ 9.249 /* OUT */ 9.250 unsigned int status; 9.251 -} blkif_vbd_destroy_t; 9.252 +} blkif_be_vbd_destroy_t; 9.253 9.254 -/* CMSG_BLKIF_VBD_GROW */ 9.255 +/* CMSG_BLKIF_BE_VBD_GROW */ 9.256 typedef struct { 9.257 /* IN */ 9.258 domid_t domid; /* Identify blkdev interface. */ 9.259 @@ -150,9 +284,9 @@ typedef struct { 9.260 blkif_extent_t extent; /* Physical extent to append to VBD. */ 9.261 /* OUT */ 9.262 unsigned int status; 9.263 -} blkif_vbd_grow_t; 9.264 +} blkif_be_vbd_grow_t; 9.265 9.266 -/* CMSG_BLKIF_VBD_SHRINK */ 9.267 +/* CMSG_BLKIF_BE_VBD_SHRINK */ 9.268 typedef struct { 9.269 /* IN */ 9.270 domid_t domid; /* Identify blkdev interface. */ 9.271 @@ -160,6 +294,23 @@ typedef struct { 9.272 blkif_vdev_t vdevice; /* Interface-specific id of the VBD. */ 9.273 /* OUT */ 9.274 unsigned int status; 9.275 -} blkif_vbd_shrink_t; 9.276 +} blkif_be_vbd_shrink_t; 9.277 + 9.278 +/* 9.279 + * CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED: 9.280 + * Notify the domain controller that the back-end driver is DOWN or UP. 9.281 + * If the driver goes DOWN while interfaces are still UP, the domain 9.282 + * will automatically send DOWN notifications. 9.283 + */ 9.284 +typedef struct { 9.285 + /* IN */ 9.286 + unsigned int status; /* BLKIF_DRIVER_STATUS_??? */ 9.287 + /* OUT */ 9.288 + /* 9.289 + * Tells driver how many interfaces it should expect to immediately 9.290 + * receive notifications about. 9.291 + */ 9.292 + unsigned int nr_interfaces; 9.293 +} blkif_be_driver_status_changed_t; 9.294 9.295 #endif /* __DOMAIN_CONTROLLER_H__ */
10.1 --- a/tools/xend/lib/main.py Sun May 02 00:27:58 2004 +0000 10.2 +++ b/tools/xend/lib/main.py Sun May 02 00:52:45 2004 +0000 10.3 @@ -5,7 +5,7 @@ 10.4 ########################################################### 10.5 10.6 import errno, re, os, pwd, select, signal, socket, struct, sys, time 10.7 -import xend.console, xend.manager, xend.utils, Xc 10.8 +import xend.blkif, xend.console, xend.manager, xend.utils, Xc 10.9 10.10 10.11 # The following parameters could be placed in a configuration file. 10.12 @@ -16,13 +16,35 @@ CONTROL_DIR = '/var/run/xend' 10.13 UNIX_SOCK = 'management_sock' # relative to CONTROL_DIR 10.14 10.15 10.16 +CMSG_CONSOLE = 0 10.17 +CMSG_BLKIF_BE = 1 10.18 +CMSG_BLKIF_FE = 2 10.19 + 10.20 + 10.21 +def port_from_dom(dom): 10.22 + global port_list 10.23 + for idx, port in port_list.items(): 10.24 + if port.remote_dom == dom: 10.25 + return port 10.26 + return None 10.27 + 10.28 + 10.29 +def send_management_response(response, addr): 10.30 + try: 10.31 + response = str(response) 10.32 + print "Mgmt_rsp[%s]: %s" % (addr, response) 10.33 + management_interface.sendto(response, addr) 10.34 + except socket.error, error: 10.35 + pass 10.36 + 10.37 + 10.38 def daemon_loop(): 10.39 # Could we do this more nicely? The xend.manager functions need access 10.40 # to this global state to do their work. 10.41 - global control_list, notifier 10.42 + global port_list, notifier, management_interface, mgmt_req_addr, dom0_port 10.43 10.44 - # List of all control interfaces, indexed by local event-channel port. 10.45 - control_list = {} 10.46 + # Lists of all interfaces, indexed by local event-channel port. 10.47 + port_list = {} 10.48 10.49 xc = Xc.new() 10.50 10.51 @@ -44,6 +66,13 @@ def daemon_loop(): 10.52 # notifications. 10.53 notifier = xend.utils.notifier() 10.54 10.55 + # The DOM0 control interface is not set up via the management interface. 10.56 + # Note that console messages don't come our way (actually, only driver 10.57 + # back-ends should use the DOM0 control interface). 10.58 + dom0_port = xend.utils.port(0) 10.59 + notifier.bind(dom0_port.local_port) 10.60 + port_list[dom0_port.local_port] = dom0_port 10.61 + 10.62 ## 10.63 ## MAIN LOOP 10.64 ## 10.65 @@ -58,10 +87,10 @@ def daemon_loop(): 10.66 waitset = select.poll() 10.67 waitset.register(management_interface, select.POLLIN) 10.68 waitset.register(notifier, select.POLLIN) 10.69 - for idx, (port, rbuf, wbuf, con_if) in control_list.items(): 10.70 + for idx, con_if in xend.console.interface.list_by_fd.items(): 10.71 if not con_if.closed(): 10.72 pflags = select.POLLIN 10.73 - if not rbuf.empty() and con_if.connected(): 10.74 + if not con_if.rbuf.empty() and con_if.connected(): 10.75 pflags = select.POLLIN | select.POLLOUT 10.76 waitset.register(con_if.sock.fileno(), pflags) 10.77 10.78 @@ -72,16 +101,16 @@ def daemon_loop(): 10.79 # These should consist of executable Python statements that call 10.80 # well-known management functions (e.g., new_control_interface(dom=9)). 10.81 try: 10.82 - data, addr = management_interface.recvfrom(2048) 10.83 + data, mgmt_req_addr = management_interface.recvfrom(2048) 10.84 except socket.error, error: 10.85 if error[0] != errno.EAGAIN: 10.86 raise 10.87 else: 10.88 - if addr: 10.89 + if mgmt_req_addr: 10.90 # Evaluate the request in an exception-trapping sandbox. 10.91 try: 10.92 - print "Mgmt_req[%s]: %s" % (addr, data) 10.93 - response = str(eval('xend.manager.'+data)) 10.94 + print "Mgmt_req[%s]: %s" % (mgmt_req_addr, data) 10.95 + response = eval('xend.manager.'+data) 10.96 10.97 except: 10.98 # Catch all exceptions and turn into an error response: 10.99 @@ -97,69 +126,20 @@ def daemon_loop(): 10.100 response = str(response) 10.101 10.102 # Try to send a response to the requester. 10.103 - try: 10.104 - print "Mgmt_rsp[%s]: %s" % (addr, response) 10.105 - management_interface.sendto(response, addr) 10.106 - except socket.error, error: 10.107 - pass 10.108 + if response: 10.109 + send_management_response(response, mgmt_req_addr) 10.110 10.111 # Do work for every console interface that hit in the poll set. 10.112 for (fd, events) in fdset: 10.113 - if not xend.console.interface.interface_list.has_key(fd): 10.114 - continue 10.115 - con_if = xend.console.interface.interface_list[fd] 10.116 - 10.117 - # If the interface is listening, check for pending connections. 10.118 - if con_if.listening(): 10.119 - con_if.connect() 10.120 - 10.121 - # All done if the interface is not connected. 10.122 - if not con_if.connected(): 10.123 - continue 10.124 - (port, rbuf, wbuf, con_if) = control_list[con_if.key] 10.125 - 10.126 - # Send as much pending data as possible via the socket. 10.127 - while not rbuf.empty(): 10.128 - try: 10.129 - bytes = con_if.sock.send(rbuf.peek()) 10.130 - if bytes > 0: 10.131 - rbuf.discard(bytes) 10.132 - except socket.error, error: 10.133 - pass 10.134 - 10.135 - # Read as much data as is available. Don't worry about 10.136 - # overflowing our buffer: it's more important to read the 10.137 - # incoming data stream and detect errors or closure of the 10.138 - # remote end in a timely manner. 10.139 - try: 10.140 - while 1: 10.141 - data = con_if.sock.recv(2048) 10.142 - # Return of zero means the remote end has disconnected. 10.143 - # We therefore return the console interface to listening. 10.144 - if not data: 10.145 - con_if.listen() 10.146 - break 10.147 - wbuf.write(data) 10.148 - except socket.error, error: 10.149 - # Assume that most errors mean that the connection is dead. 10.150 - # In such cases we return the interface to 'listening' state. 10.151 - if error[0] != errno.EAGAIN: 10.152 - print "Better return to listening" 10.153 - con_if.listen() 10.154 - print "New status: " + str(con_if.status) 10.155 - 10.156 - # We may now have pending data to send via the relevant 10.157 - # inter-domain control interface. If so then we send all we can 10.158 - # and notify the remote end. 10.159 - work_done = False 10.160 - while not wbuf.empty() and port.space_to_write_request(): 10.161 - msg = xend.utils.message(0, 0, 0) 10.162 - msg.append_payload(wbuf.read(msg.MAX_PAYLOAD)) 10.163 - port.write_request(msg) 10.164 - work_done = True 10.165 - if work_done: 10.166 - port.notify() 10.167 - 10.168 + if xend.console.interface.list_by_fd.has_key(fd): 10.169 + con_if = xend.console.interface.list_by_fd[fd] 10.170 + con_if.socket_work() 10.171 + # We may now have pending data to send via the control 10.172 + # interface. If so then send all we can and notify the remote. 10.173 + port = port_list[con_if.key] 10.174 + if con_if.ctrlif_transmit_work(port): 10.175 + port.notify() 10.176 + 10.177 # Process control-interface notifications from other guest OSes. 10.178 while 1: 10.179 # Grab a notification, if there is one. 10.180 @@ -168,42 +148,69 @@ def daemon_loop(): 10.181 break 10.182 (idx, type) = notification 10.183 10.184 - if not control_list.has_key(idx): 10.185 + if not port_list.has_key(idx): 10.186 continue 10.187 10.188 - (port, rbuf, wbuf, con_if) = control_list[idx] 10.189 + port = port_list[idx] 10.190 work_done = False 10.191 10.192 + con_if = False 10.193 + if xend.console.interface.list.has_key(idx): 10.194 + con_if = xend.console.interface.list[idx] 10.195 + 10.196 + blk_if = False 10.197 + if xend.blkif.interface.list.has_key(idx): 10.198 + blk_if = xend.blkif.interface.list[idx] 10.199 + 10.200 # If we pick up a disconnect notification then we do any necessary 10.201 # cleanup. 10.202 if type == notifier.EXCEPTION: 10.203 ret = xc.evtchn_status(idx) 10.204 if ret['status'] == 'unbound': 10.205 notifier.unbind(idx) 10.206 - con_if.close() 10.207 - del control_list[idx], port, rbuf, wbuf, con_if 10.208 + del port_list[idx], port 10.209 + if con_if: 10.210 + con_if.destroy() 10.211 + del con_if 10.212 + if blk_if: 10.213 + blk_if.destroy() 10.214 + del blk_if 10.215 continue 10.216 10.217 - # Read incoming requests. Currently assume that request 10.218 - # message always containb console data. 10.219 + # Process incoming requests. 10.220 while port.request_to_read(): 10.221 msg = port.read_request() 10.222 - rbuf.write(msg.get_payload()) 10.223 - port.write_response(msg) 10.224 work_done = True 10.225 + type = (msg.get_header())['type'] 10.226 + if type == CMSG_CONSOLE and con_if: 10.227 + con_if.ctrlif_rx_req(port, msg) 10.228 + elif type == CMSG_BLKIF_FE and blk_if: 10.229 + blk_if.ctrlif_rx_req(port, msg) 10.230 + elif type == CMSG_BLKIF_BE and port == dom0_port: 10.231 + xend.blkif.backend_rx_req(port, msg) 10.232 + else: 10.233 + port.write_response(msg) 10.234 10.235 - # Incoming responses are currently thrown on the floor. 10.236 + # Process incoming responses. 10.237 while port.response_to_read(): 10.238 msg = port.read_response() 10.239 work_done = True 10.240 + type = (msg.get_header())['type'] 10.241 + if type == CMSG_BLKIF_BE and port == dom0_port: 10.242 + xend.blkif.backend_rx_rsp(port, msg) 10.243 10.244 - # Send as much pending console data as there is room for. 10.245 - while not wbuf.empty() and port.space_to_write_request(): 10.246 - msg = xend.utils.message(0, 0, 0) 10.247 - msg.append_payload(wbuf.read(msg.MAX_PAYLOAD)) 10.248 - port.write_request(msg) 10.249 + # Send console data. 10.250 + if con_if and con_if.ctrlif_transmit_work(port): 10.251 work_done = True 10.252 10.253 + # Send blkif messages. 10.254 + if blk_if and blk_if.ctrlif_transmit_work(port): 10.255 + work_done = True 10.256 + 10.257 + # Back-end block-device work. 10.258 + if port == dom0_port and xend.blkif.backend_do_work(port): 10.259 + work_done = True 10.260 + 10.261 # Finally, notify the remote end of any work that we did. 10.262 if work_done: 10.263 port.notify()
11.1 --- a/tools/xend/lib/manager.py Sun May 02 00:27:58 2004 +0000 11.2 +++ b/tools/xend/lib/manager.py Sun May 02 00:52:45 2004 +0000 11.3 @@ -4,13 +4,13 @@ 11.4 ## Copyright (c) 2004, K A Fraser (University of Cambridge) 11.5 ############################################################# 11.6 11.7 -import xend.console, xend.main, xend.utils 11.8 +import xend.blkif, xend.console, xend.main, xend.utils 11.9 11.10 11.11 ## 11.12 ## new_control_interface: 11.13 -## Create a new control interface with the specified domain 'dom'. 11.14 -## The console port may also be specified; otehrwise a suitable port is 11.15 +## Create a new control interface with the specified domain @dom. 11.16 +## The console port may also be specified; otherwise a suitable port is 11.17 ## automatically allocated. 11.18 ## 11.19 def new_control_interface(dom, console_port=-1): 11.20 @@ -26,9 +26,8 @@ def new_control_interface(dom, console_p 11.21 con_if = xend.console.interface(console_port, port.local_port) 11.22 con_if.listen() 11.23 11.24 - # Add control state to the master list. 11.25 - xend.main.control_list[port.local_port] = \ 11.26 - (port, xend.utils.buffer(), xend.utils.buffer(), con_if) 11.27 + # Update the master port list. 11.28 + xend.main.port_list[port.local_port] = port 11.29 11.30 # Construct the successful response to be returned to the requester. 11.31 response = { 'success': True } 11.32 @@ -36,3 +35,81 @@ def new_control_interface(dom, console_p 11.33 response['remote_port'] = port.remote_port 11.34 response['console_port'] = console_port 11.35 return response 11.36 + 11.37 + 11.38 +## 11.39 +## new_block_interface: 11.40 +## Create a new block interface for the specified domain @dom. 11.41 +## 11.42 +def new_block_interface(dom, handle=-1): 11.43 + # By default we create an interface with handle zero. 11.44 + if handle < 0: 11.45 + handle = 0 11.46 + 11.47 + # We only support one interface per domain, which must have handle zero. 11.48 + if handle != 0: 11.49 + response = { 'success': False } 11.50 + response['error_type'] = 'Bad handle %d (only handle 0 ' + \ 11.51 + 'is supported)' % handle 11.52 + return response 11.53 + 11.54 + # Find local event-channel port associated with the specified domain. 11.55 + port = xend.main.port_from_dom(dom) 11.56 + if not port: 11.57 + response = { 'success': False } 11.58 + response['error_type'] = 'Unknown domain %d' % dom 11.59 + return response 11.60 + 11.61 + # The interface must not already exist. 11.62 + if xend.blkif.interface.list.has_key(port.local_port): 11.63 + response = { 'success': False } 11.64 + response['error_type'] = 'Interface (dom=%d,handle=%d) already ' + \ 11.65 + 'exists' % (dom, handle) 11.66 + return response 11.67 + 11.68 + # Create the new interface. Initially no virtual devices are attached. 11.69 + xend.blkif.interface(dom, port.local_port) 11.70 + 11.71 + # Response is deferred until back-end driver sends acknowledgement. 11.72 + return None 11.73 + 11.74 + 11.75 +## 11.76 +## new_block_device: 11.77 +## Attach a new virtual block device to the specified block interface 11.78 +## (@dom, @handle). The new device is identified by @vdev, and maps to 11.79 +## the real block extent (@pdev, @start_sect, @nr_sect). If @readonly then 11.80 +## write requests to @vdev will be rejected. 11.81 +## 11.82 +def new_block_device(dom, handle, vdev, pdev, start_sect, nr_sect, readonly): 11.83 + # We only support one interface per domain, which must have handle zero. 11.84 + if handle != 0: 11.85 + response = { 'success': False } 11.86 + response['error_type'] = 'Bad handle %d (only handle 0 ' + \ 11.87 + 'is supported)' % handle 11.88 + return response 11.89 + 11.90 + # Find local event-channel port associated with the specified domain. 11.91 + port = xend.main.port_from_dom(dom) 11.92 + if not port: 11.93 + response = { 'success': False } 11.94 + response['error_type'] = 'Unknown domain %d' % dom 11.95 + return response 11.96 + 11.97 + # The interface must exist. 11.98 + if not xend.blkif.interface.list.has_key(port.local_port): 11.99 + response = { 'success': False } 11.100 + response['error_type'] = 'Interface (dom=%d,handle=%d) does not ' + \ 11.101 + 'exists' % (dom, handle) 11.102 + return response 11.103 + 11.104 + # The virtual device must not yet exist. 11.105 + blkif = xend.blkif.interface.list[port.local_port] 11.106 + if not blkif.attach_device(vdev, pdev, start_sect, nr_sect, readonly): 11.107 + response = { 'success': False } 11.108 + response['error_type'] = 'Vdevice (dom=%d,handle=%d,vdevice=%d) ' + \ 11.109 + 'already exists' % (dom, handle, vdev) 11.110 + return response 11.111 + 11.112 + # Response is deferred until back-end driver sends acknowledgement. 11.113 + return None
12.1 --- a/tools/xend/lib/utils.c Sun May 02 00:27:58 2004 +0000 12.2 +++ b/tools/xend/lib/utils.c Sun May 02 00:52:45 2004 +0000 12.3 @@ -22,6 +22,8 @@ 12.4 #include <signal.h> 12.5 #include <xc.h> 12.6 12.7 +#include <asm-xen/proc_cmd.h> 12.8 + 12.9 #include <hypervisor-if.h> 12.10 #include "domain_controller.h" 12.11 12.12 @@ -684,8 +686,23 @@ static PyObject *xu_port_new(PyObject *s 12.13 goto fail2; 12.14 } 12.15 12.16 - if ( xc_evtchn_bind_interdomain(xup->xc_handle, 12.17 - DOMID_SELF, dom, &port1, &port2) != 0 ) 12.18 + if ( dom == 0ULL ) 12.19 + { 12.20 + /* 12.21 + * The control-interface event channel for DOM0 is already set up. 12.22 + * We use an ioctl to discover the port at our end of the channel. 12.23 + */ 12.24 + port1 = ioctl(xup->xc_handle, IOCTL_PRIVCMD_INITDOMAIN_EVTCHN, NULL); 12.25 + port2 = -1; /* We don't need the remote end of the DOM0 link. */ 12.26 + if ( port1 < 0 ) 12.27 + { 12.28 + PyErr_SetString(port_error, "Could not open channel to DOM0"); 12.29 + goto fail3; 12.30 + } 12.31 + } 12.32 + else if ( xc_evtchn_bind_interdomain(xup->xc_handle, 12.33 + DOMID_SELF, dom, 12.34 + &port1, &port2) != 0 ) 12.35 { 12.36 PyErr_SetString(port_error, "Could not open channel to domain"); 12.37 goto fail3; 12.38 @@ -744,7 +761,8 @@ static void xu_port_dealloc(PyObject *se 12.39 { 12.40 xu_port_object *xup = (xu_port_object *)self; 12.41 unmap_control_interface(xup->mem_fd, xup->interface); 12.42 - (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port); 12.43 + if ( xup->remote_dom != 0ULL ) 12.44 + (void)xc_evtchn_close(xup->xc_handle, DOMID_SELF, xup->local_port); 12.45 (void)xc_interface_close(xup->xc_handle); 12.46 (void)close(xup->mem_fd); 12.47 PyObject_Del(self);
13.1 --- a/tools/xend/setup.py Sun May 02 00:27:58 2004 +0000 13.2 +++ b/tools/xend/setup.py Sun May 02 00:52:45 2004 +0000 13.3 @@ -4,7 +4,8 @@ from distutils.core import setup, Extens 13.4 utils = Extension("utils", 13.5 extra_compile_args = ["-fno-strict-aliasing"], 13.6 include_dirs = ["../xc/lib", 13.7 - "../../xen/include/hypervisor-ifs"], 13.8 + "../../xen/include/hypervisor-ifs", 13.9 + "../../xenolinux-sparse/include"], 13.10 library_dirs = ["../xc/lib"], 13.11 libraries = ["xc"], 13.12 sources = ["lib/utils.c"])
14.1 --- a/xen/arch/i386/pdb-stub.c Sun May 02 00:27:58 2004 +0000 14.2 +++ b/xen/arch/i386/pdb-stub.c Sun May 02 00:52:45 2004 +0000 14.3 @@ -51,6 +51,8 @@ static unsigned char pdb_xmit_checksum; 14.4 unsigned long pdb_linux_pid_ptbr (unsigned long cr3, int pid); 14.5 void pdb_linux_get_values(char *buffer, int length, unsigned long address, 14.6 int pid, unsigned long cr3); 14.7 +void pdb_linux_set_values(char *buffer, int length, unsigned long address, 14.8 + int pid, unsigned long cr3); 14.9 14.10 struct pdb_context 14.11 { 14.12 @@ -571,6 +573,12 @@ pdb_process_command (char *ptr, struct p 14.13 { 14.14 hex2mem (ptr, (char *)addr, length); 14.15 } 14.16 + else if (pdb_ctx.process != -1) 14.17 + { 14.18 + pdb_linux_set_values(ptr, length, addr, 14.19 + pdb_ctx.process, 14.20 + pdb_ctx.ptbr); 14.21 + } 14.22 else 14.23 { 14.24 pdb_set_values (ptr, length,
15.1 --- a/xen/common/debug-linux.c Sun May 02 00:27:58 2004 +0000 15.2 +++ b/xen/common/debug-linux.c Sun May 02 00:52:45 2004 +0000 15.3 @@ -171,6 +171,44 @@ void pdb_linux_get_values(char *buffer, 15.4 } 15.5 } 15.6 15.7 + 15.8 +void pdb_linux_set_value(int pid, unsigned long cr3, unsigned long addr, 15.9 + u_char *value) 15.10 +{ 15.11 + unsigned long pgd; 15.12 + unsigned long l2tab, page; 15.13 + 15.14 + /* get the process' pgd */ 15.15 + pgd = pdb_linux_pid_ptbr(cr3, pid); 15.16 + 15.17 + /* get the l2 table entry */ 15.18 + pdb_get_values((u_char *) &l2tab, sizeof(l2tab), 15.19 + cr3, pgd + (addr >> PGDIR_SHIFT) * 4); 15.20 + l2tab = (unsigned long)__va(machine_to_phys(cr3, l2tab) & PAGE_MASK); 15.21 + 15.22 + /* get the page table entry */ 15.23 + pdb_get_values((u_char *) &page, sizeof(page), 15.24 + cr3, l2tab + ((addr & L1_PAGE_BITS) >> PAGE_SHIFT) * 4); 15.25 + page = (unsigned long)__va(machine_to_phys(cr3, page) & PAGE_MASK); 15.26 + 15.27 + /* set the byte */ 15.28 + pdb_set_values(value, sizeof(u_char), cr3, page + (addr & ~PAGE_MASK)); 15.29 +} 15.30 + 15.31 +void pdb_linux_set_values(char *buffer, int length, unsigned long address, 15.32 + int pid, unsigned long cr3) 15.33 +{ 15.34 + int loop; 15.35 + 15.36 + /* it's difficult to imagine a more inefficient algorithm */ 15.37 + for (loop = 0; loop < length; loop++) 15.38 + { 15.39 + pdb_linux_set_value(pid, cr3, address + loop, &buffer[loop * 2]); 15.40 + } 15.41 +} 15.42 + 15.43 +/**********************************************************************/ 15.44 + 15.45 /* 15.46 * return 1 if is the virtual address is in the operating system's 15.47 * address space, else 0
16.1 --- a/xen/common/event_channel.c Sun May 02 00:27:58 2004 +0000 16.2 +++ b/xen/common/event_channel.c Sun May 02 00:52:45 2004 +0000 16.3 @@ -109,15 +109,18 @@ static long evtchn_bind_interdomain(evtc 16.4 goto out; 16.5 } 16.6 16.7 + /* 'Allocate' port1 before searching for a free port2. */ 16.8 + p1->event_channel[port1].state = ECS_INTERDOMAIN; 16.9 + 16.10 if ( (port2 = get_free_port(p2)) < 0 ) 16.11 { 16.12 + p1->event_channel[port1].state = ECS_FREE; 16.13 rc = port2; 16.14 goto out; 16.15 } 16.16 16.17 p1->event_channel[port1].u.remote.dom = p2; 16.18 p1->event_channel[port1].u.remote.port = (u16)port2; 16.19 - p1->event_channel[port1].state = ECS_INTERDOMAIN; 16.20 16.21 p2->event_channel[port2].u.remote.dom = p1; 16.22 p2->event_channel[port2].u.remote.port = (u16)port1;
17.1 --- a/xen/common/kernel.c Sun May 02 00:27:58 2004 +0000 17.2 +++ b/xen/common/kernel.c Sun May 02 00:52:45 2004 +0000 17.3 @@ -73,6 +73,9 @@ unsigned char opt_pdb[10] = "none"; 17.4 unsigned int opt_tbuf_size = 1; 17.5 /* opt_sched: scheduler - default to Borrowed Virtual Time */ 17.6 char opt_sched[10] = "bvt"; 17.7 +/* opt_physdev_dom0_hide: list of PCI slots to hide from dom0 17.8 + * Should have the format '(%02x:%02x.%1x)(%02x:%02x.%1x)...etc' */ 17.9 +char opt_physdev_dom0_hide[20] = ""; 17.10 17.11 static struct { 17.12 unsigned char *name; 17.13 @@ -94,6 +97,7 @@ static struct { 17.14 { "pdb", OPT_STR, &opt_pdb }, 17.15 { "tbuf_size", OPT_UINT, &opt_tbuf_size }, 17.16 { "sched", OPT_STR, &opt_sched }, 17.17 + { "physdev_dom0_hide",OPT_STR, &opt_physdev_dom0_hide }, 17.18 { NULL, 0, NULL } 17.19 }; 17.20
18.1 --- a/xen/common/physdev.c Sun May 02 00:27:58 2004 +0000 18.2 +++ b/xen/common/physdev.c Sun May 02 00:52:45 2004 +0000 18.3 @@ -115,16 +115,17 @@ static void add_dev_to_task(struct task_ 18.4 18.5 /* 18.6 * physdev_pci_access_modify: 18.7 - * Allow/disallow access to a specific PCI device. Also allow read access to 18.8 - * PCI devices from the device to the root of the device tree. If the given 18.9 - * device is a bridge, then the domain should get access to all the devices 18.10 - * attached to that bridge (XXX this is unimplemented!). 18.11 + * Allow/disallow access to a specific PCI device. Guests should not be 18.12 + * allowed to see bridge devices as it needlessly complicates things (one 18.13 + * possible exception to this is the AGP bridge). If the given device is a 18.14 + * bridge, then the domain should get access to all the leaf devices below 18.15 + * that bridge (XXX this is unimplemented!). 18.16 */ 18.17 int physdev_pci_access_modify( 18.18 domid_t dom, int bus, int dev, int func, int enable) 18.19 { 18.20 struct task_struct *p; 18.21 - struct pci_dev *pdev, *rdev, *tdev; 18.22 + struct pci_dev *pdev; 18.23 int rc = 0; 18.24 18.25 if ( !IS_PRIV(current) ) 18.26 @@ -145,7 +146,7 @@ int physdev_pci_access_modify( 18.27 return -ESRCH; 18.28 18.29 /* Make the domain privileged. */ 18.30 - set_bit(PF_PRIVILEGED, &p->flags); 18.31 + set_bit(PF_PRIVILEGED, &p->flags); 18.32 18.33 /* Grant write access to the specified device. */ 18.34 if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL ) 18.35 @@ -155,27 +156,10 @@ int physdev_pci_access_modify( 18.36 goto out; 18.37 } 18.38 add_dev_to_task(p, pdev, ACC_WRITE); 18.39 + 18.40 INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number, 18.41 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 18.42 18.43 - /* Grant read access to the root device. */ 18.44 - if ( (rdev = pci_find_slot(0, PCI_DEVFN(0, 0))) == NULL ) 18.45 - { 18.46 - INFO(" bizarre -- no PCI root dev\n"); 18.47 - rc = -ENODEV; 18.48 - goto out; 18.49 - } 18.50 - add_dev_to_task(p, rdev, ACC_READ); 18.51 - INFO(" add R0 %02x:%02x:%02x\n", 0, 0, 0); 18.52 - 18.53 - /* Grant read access to all devices on the path to the root. */ 18.54 - for ( tdev = pdev->bus->self; tdev != NULL; tdev = tdev->bus->self ) 18.55 - { 18.56 - add_dev_to_task(p, tdev, ACC_READ); 18.57 - INFO(" add RO %02x:%02x:%02x\n", tdev->bus->number, 18.58 - PCI_SLOT(tdev->devfn), PCI_FUNC(tdev->devfn)); 18.59 - } 18.60 - 18.61 /* Is the device a bridge or cardbus? */ 18.62 if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL ) 18.63 INFO("XXX can't give access to bridge devices yet\n"); 18.64 @@ -256,8 +240,16 @@ static int do_base_address_access(phys_d 18.65 18.66 if ( len != sizeof(u32) ) 18.67 { 18.68 - INFO("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", 18.69 + /* This isn't illegal, but there doesn't seem to be a very good reason 18.70 + * to do it for normal devices (bridges are another matter). Since it 18.71 + * would complicate the code below, we don't support this for now. */ 18.72 + 18.73 + /* We could set *val to some value but the guest may well be in trouble 18.74 + * anyway if this write fails. Hopefully the printk will give us a 18.75 + * clue what went wrong. */ 18.76 + printk("Guest attempting sub-dword %s to BASE_ADDRESS %d\n", 18.77 (acc == ACC_READ) ? "read" : "write", idx); 18.78 + 18.79 return -EPERM; 18.80 } 18.81 18.82 @@ -420,7 +412,13 @@ static long pci_cfgreg_read(int bus, int 18.83 phys_dev_t *pdev; 18.84 18.85 if ( (ret = check_dev_acc(current, bus, dev, func, &pdev)) != 0 ) 18.86 - return ret; 18.87 + { 18.88 + /* PCI spec states that reads from non-existent devices should return 18.89 + * all 1s. In this case the domain has no read access, which should 18.90 + * also look like the device is non-existent. */ 18.91 + *val = 0xFFFFFFFF; 18.92 + return ret; /* KAF: error return seems to matter on my test machine. */ 18.93 + } 18.94 18.95 /* Fake out read requests for some registers. */ 18.96 switch ( reg ) 18.97 @@ -608,6 +606,21 @@ long do_physdev_op(physdev_op_t *uop) 18.98 return ret; 18.99 } 18.100 18.101 +/* Test if boot params specify this device should NOT be visible to DOM0 18.102 + * (e.g. so that another domain can control it instead) */ 18.103 +int pcidev_dom0_hidden(struct pci_dev *dev) 18.104 +{ 18.105 + extern char opt_physdev_dom0_hide[]; 18.106 + char cmp[10] = "(.......)"; 18.107 + 18.108 + strncpy(&cmp[1], dev->slot_name, 7); 18.109 + 18.110 + if ( strstr(opt_physdev_dom0_hide, dev->slot_name) == NULL ) 18.111 + return 0; 18.112 + 18.113 + return 1; 18.114 +} 18.115 + 18.116 18.117 /* Domain 0 has read access to all devices. */ 18.118 void physdev_init_dom0(struct task_struct *p) 18.119 @@ -619,14 +632,22 @@ void physdev_init_dom0(struct task_struc 18.120 18.121 pci_for_each_dev(dev) 18.122 { 18.123 - /* Skip bridges and other peculiarities for now. */ 18.124 - if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL ) 18.125 - continue; 18.126 - pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL); 18.127 - pdev->dev = dev; 18.128 - pdev->flags = ACC_WRITE; 18.129 - pdev->state = 0; 18.130 - pdev->owner = p; 18.131 - list_add(&pdev->node, &p->pcidev_list); 18.132 - } 18.133 + if ( !pcidev_dom0_hidden(dev) ) 18.134 + { 18.135 + /* Skip bridges and other peculiarities for now. */ 18.136 + if ( dev->hdr_type != PCI_HEADER_TYPE_NORMAL ) 18.137 + continue; 18.138 + pdev = kmalloc(sizeof(phys_dev_t), GFP_KERNEL); 18.139 + pdev->dev = dev; 18.140 + pdev->flags = ACC_WRITE; 18.141 + pdev->state = 0; 18.142 + pdev->owner = p; 18.143 + list_add(&pdev->node, &p->pcidev_list); 18.144 + } 18.145 + else 18.146 + { 18.147 + printk("Hiding PCI device %s from DOM0\n", dev->slot_name); 18.148 + } 18.149 + } 18.150 } 18.151 +
19.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h Sun May 02 00:27:58 2004 +0000 19.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/common.h Sun May 02 00:52:45 2004 +0000 19.3 @@ -34,23 +34,37 @@ typedef struct blkif_st { 19.4 unsigned int evtchn; 19.5 int irq; 19.6 /* Comms information. */ 19.7 - blk_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ 19.8 + blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ 19.9 BLK_RING_IDX blk_req_cons; /* Request consumer. */ 19.10 BLK_RING_IDX blk_resp_prod; /* Private version of response producer. */ 19.11 /* VBDs attached to this interface. */ 19.12 rb_root_t vbd_rb; /* Mapping from 16-bit vdevices to VBDs. */ 19.13 spinlock_t vbd_lock; /* Protects VBD mapping. */ 19.14 /* Private fields. */ 19.15 + enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; 19.16 + /* 19.17 + * DISCONNECT response is deferred until pending requests are ack'ed. 19.18 + * We therefore need to store the id from the original request. 19.19 + */ 19.20 + u8 disconnect_rspid; 19.21 struct blkif_st *hash_next; 19.22 struct list_head blkdev_list; 19.23 spinlock_t blk_ring_lock; 19.24 + atomic_t refcnt; 19.25 } blkif_t; 19.26 19.27 -void blkif_create(blkif_create_t *create); 19.28 -void blkif_destroy(blkif_destroy_t *destroy); 19.29 +void blkif_create(blkif_be_create_t *create); 19.30 +void blkif_destroy(blkif_be_destroy_t *destroy); 19.31 +void blkif_connect(blkif_be_connect_t *connect); 19.32 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id); 19.33 +void __blkif_disconnect_complete(blkif_t *blkif); 19.34 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); 19.35 -void blkif_get(blkif_t *blkif); 19.36 -void blkif_put(blkif_t *blkif); 19.37 +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 19.38 +#define blkif_put(_b) \ 19.39 + do { \ 19.40 + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ 19.41 + __blkif_disconnect_complete(_b); \ 19.42 + } while (0) 19.43 19.44 /* An entry in a list of xen_extents. */ 19.45 typedef struct _blkif_extent_le { 19.46 @@ -60,25 +74,25 @@ typedef struct _blkif_extent_le { 19.47 19.48 typedef struct _vbd { 19.49 blkif_vdev_t vdevice; /* what the domain refers to this vbd as */ 19.50 - unsigned char mode; /* VBD_MODE_{R,W} */ 19.51 + unsigned char readonly; /* Non-zero -> read-only */ 19.52 unsigned char type; /* XD_TYPE_xxx */ 19.53 blkif_extent_le_t *extents; /* list of xen_extents making up this vbd */ 19.54 rb_node_t rb; /* for linking into R-B tree lookup struct */ 19.55 } vbd_t; 19.56 19.57 -long vbd_create(blkif_vbd_create_t *create_params); 19.58 -long vbd_grow(blkif_vbd_grow_t *grow_params); 19.59 -long vbd_shrink(blkif_vbd_shrink_t *shrink_params); 19.60 -long vbd_destroy(blkif_vbd_destroy_t *delete_params); 19.61 - 19.62 -void destroy_all_vbds(struct task_struct *p); 19.63 +void vbd_create(blkif_be_vbd_create_t *create); 19.64 +void vbd_grow(blkif_be_vbd_grow_t *grow); 19.65 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink); 19.66 +void vbd_destroy(blkif_be_vbd_destroy_t *delete); 19.67 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds); 19.68 +void destroy_all_vbds(blkif_t *blkif); 19.69 19.70 typedef struct { 19.71 blkif_t *blkif; 19.72 unsigned long id; 19.73 atomic_t pendcnt; 19.74 unsigned short operation; 19.75 - unsigned short status; 19.76 + int status; 19.77 } pending_req_t; 19.78 19.79 /* Describes a [partial] disk extent (part of a block io request) */ 19.80 @@ -91,7 +105,10 @@ typedef struct { 19.81 19.82 int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation); 19.83 19.84 -int blkif_be_controller_init(void); 19.85 +void blkif_interface_init(void); 19.86 +void blkif_ctrlif_init(void); 19.87 + 19.88 +void blkif_deschedule(blkif_t *blkif); 19.89 19.90 void blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); 19.91
20.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c Sun May 02 00:27:58 2004 +0000 20.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/control.c Sun May 02 00:52:45 2004 +0000 20.3 @@ -10,37 +10,50 @@ 20.4 20.5 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 20.6 { 20.7 + DPRINTK("Received blkif backend message, subtype=%d\n", msg->subtype); 20.8 + 20.9 switch ( msg->subtype ) 20.10 { 20.11 case CMSG_BLKIF_BE_CREATE: 20.12 - if ( msg->length != sizeof(blkif_create_t) ) 20.13 + if ( msg->length != sizeof(blkif_be_create_t) ) 20.14 goto parse_error; 20.15 - blkif_create((blkif_create_t *)&msg->msg[0]); 20.16 + blkif_create((blkif_be_create_t *)&msg->msg[0]); 20.17 break; 20.18 case CMSG_BLKIF_BE_DESTROY: 20.19 - if ( msg->length != sizeof(blkif_destroy_t) ) 20.20 + if ( msg->length != sizeof(blkif_be_destroy_t) ) 20.21 + goto parse_error; 20.22 + blkif_destroy((blkif_be_destroy_t *)&msg->msg[0]); 20.23 + break; 20.24 + case CMSG_BLKIF_BE_CONNECT: 20.25 + if ( msg->length != sizeof(blkif_be_connect_t) ) 20.26 goto parse_error; 20.27 - blkif_destroy((blkif_destroy_t *)&msg->msg[0]); 20.28 + blkif_connect((blkif_be_connect_t *)&msg->msg[0]); 20.29 + break; 20.30 + case CMSG_BLKIF_BE_DISCONNECT: 20.31 + if ( msg->length != sizeof(blkif_be_disconnect_t) ) 20.32 + goto parse_error; 20.33 + if ( !blkif_disconnect((blkif_be_disconnect_t *)&msg->msg[0],msg->id) ) 20.34 + return; /* Sending the response is deferred until later. */ 20.35 break; 20.36 case CMSG_BLKIF_BE_VBD_CREATE: 20.37 - if ( msg->length != sizeof(blkif_vbd_create_t) ) 20.38 + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) 20.39 goto parse_error; 20.40 - vbd_create((blkif_vbd_create_t *)&msg->msg[0]); 20.41 + vbd_create((blkif_be_vbd_create_t *)&msg->msg[0]); 20.42 break; 20.43 case CMSG_BLKIF_BE_VBD_DESTROY: 20.44 - if ( msg->length != sizeof(blkif_vbd_destroy_t) ) 20.45 + if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) 20.46 goto parse_error; 20.47 - vbd_destroy((blkif_vbd_destroy_t *)&msg->msg[0]); 20.48 + vbd_destroy((blkif_be_vbd_destroy_t *)&msg->msg[0]); 20.49 break; 20.50 case CMSG_BLKIF_BE_VBD_GROW: 20.51 - if ( msg->length != sizeof(blkif_vbd_grow_t) ) 20.52 + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) 20.53 goto parse_error; 20.54 - vbd_grow((blkif_vbd_grow_t *)&msg->msg[0]); 20.55 + vbd_grow((blkif_be_vbd_grow_t *)&msg->msg[0]); 20.56 break; 20.57 case CMSG_BLKIF_BE_VBD_SHRINK: 20.58 - if ( msg->length != sizeof(blkif_vbd_shrink_t) ) 20.59 + if ( msg->length != sizeof(blkif_be_vbd_shrink_t) ) 20.60 goto parse_error; 20.61 - vbd_shrink((blkif_vbd_shrink_t *)&msg->msg[0]); 20.62 + vbd_shrink((blkif_be_vbd_shrink_t *)&msg->msg[0]); 20.63 break; 20.64 default: 20.65 goto parse_error; 20.66 @@ -50,12 +63,24 @@ static void blkif_ctrlif_rx(ctrl_msg_t * 20.67 return; 20.68 20.69 parse_error: 20.70 + DPRINTK("Parse error while reading message subtype %d, len %d\n", 20.71 + msg->subtype, msg->length); 20.72 msg->length = 0; 20.73 ctrl_if_send_response(msg); 20.74 } 20.75 20.76 -int blkif_ctrlif_init(void) 20.77 +void blkif_ctrlif_init(void) 20.78 { 20.79 + ctrl_msg_t cmsg; 20.80 + blkif_be_driver_status_changed_t st; 20.81 + 20.82 (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx); 20.83 - return 0; 20.84 + 20.85 + /* Send a driver-UP notification to the domain controller. */ 20.86 + cmsg.type = CMSG_BLKIF_BE; 20.87 + cmsg.subtype = CMSG_BLKIF_BE_DRIVER_STATUS_CHANGED; 20.88 + cmsg.length = sizeof(blkif_be_driver_status_changed_t); 20.89 + st.status = BLKIF_DRIVER_STATUS_UP; 20.90 + memcpy(cmsg.msg, &st, sizeof(st)); 20.91 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 20.92 }
21.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c Sun May 02 00:27:58 2004 +0000 21.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/interface.c Sun May 02 00:52:45 2004 +0000 21.3 @@ -12,85 +12,223 @@ 21.4 #define BLKIF_HASH(_d,_h) \ 21.5 (((int)(_d)^(int)((_d)>>32)^(int)(_h))&(BLKIF_HASHSZ-1)) 21.6 21.7 -static blkif_t *blkif_hash[BLKIF_HASHSZ]; 21.8 +static kmem_cache_t *blkif_cachep; 21.9 +static blkif_t *blkif_hash[BLKIF_HASHSZ]; 21.10 21.11 blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) 21.12 { 21.13 blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; 21.14 while ( (blkif != NULL) && 21.15 - (blkif->domid != domid) && 21.16 - (blkif->handle != handle) ) 21.17 + ((blkif->domid != domid) || (blkif->handle != handle)) ) 21.18 blkif = blkif->hash_next; 21.19 return blkif; 21.20 } 21.21 21.22 -void blkif_create(blkif_create_t *create) 21.23 +void __blkif_disconnect_complete(blkif_t *blkif) 21.24 +{ 21.25 + ctrl_msg_t cmsg; 21.26 + blkif_be_disconnect_t disc; 21.27 + 21.28 + /* 21.29 + * These can't be done in __blkif_disconnect() because at that point there 21.30 + * may be outstanding requests at the disc whose asynchronous responses 21.31 + * must still be notified to the remote driver. 21.32 + */ 21.33 + unbind_evtchn_from_irq(blkif->evtchn); 21.34 + vfree(blkif->blk_ring_base); 21.35 + 21.36 + /* Construct the deferred response message. */ 21.37 + cmsg.type = CMSG_BLKIF_BE; 21.38 + cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; 21.39 + cmsg.id = blkif->disconnect_rspid; 21.40 + cmsg.length = sizeof(blkif_be_disconnect_t); 21.41 + disc.domid = blkif->domid; 21.42 + disc.blkif_handle = blkif->handle; 21.43 + disc.status = BLKIF_BE_STATUS_OKAY; 21.44 + memcpy(cmsg.msg, &disc, sizeof(disc)); 21.45 + 21.46 + /* 21.47 + * Make sure message is constructed /before/ status change, because 21.48 + * after the status change the 'blkif' structure could be deallocated at 21.49 + * any time. Also make sure we send the response /after/ status change, 21.50 + * as otherwise a subsequent CONNECT request could spuriously fail if 21.51 + * another CPU doesn't see the status change yet. 21.52 + */ 21.53 + mb(); 21.54 + if ( blkif->status != DISCONNECTING ) 21.55 + BUG(); 21.56 + blkif->status = DISCONNECTED; 21.57 + mb(); 21.58 + 21.59 + /* Send the successful response. */ 21.60 + ctrl_if_send_response(&cmsg); 21.61 +} 21.62 + 21.63 +void blkif_create(blkif_be_create_t *create) 21.64 { 21.65 domid_t domid = create->domid; 21.66 unsigned int handle = create->blkif_handle; 21.67 - unsigned int evtchn = create->evtchn; 21.68 - unsigned long shmem_frame = create->shmem_frame; 21.69 blkif_t **pblkif, *blkif; 21.70 21.71 - pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 21.72 - while ( *pblkif == NULL ) 21.73 + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_ATOMIC)) == NULL ) 21.74 { 21.75 - if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 21.76 - goto found_match; 21.77 - pblkif = &(*pblkif)->hash_next; 21.78 + DPRINTK("Could not create blkif: out of memory\n"); 21.79 + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 21.80 + return; 21.81 } 21.82 21.83 - blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); 21.84 memset(blkif, 0, sizeof(*blkif)); 21.85 - blkif->domid = domid; 21.86 - blkif->handle = handle; 21.87 - blkif->evtchn = evtchn; 21.88 - blkif->irq = bind_evtchn_to_irq(evtchn); 21.89 - blkif->shmem_frame = shmem_frame; 21.90 - blkif->shmem_vbase = ioremap(shmem_frame<<PAGE_SHIFT, PAGE_SIZE); 21.91 + blkif->domid = domid; 21.92 + blkif->handle = handle; 21.93 + blkif->status = DISCONNECTED; 21.94 spin_lock_init(&blkif->vbd_lock); 21.95 spin_lock_init(&blkif->blk_ring_lock); 21.96 + atomic_set(&blkif->refcnt, 0); 21.97 21.98 - request_irq(irq, blkif_be_int, 0, "blkif-backend", blkif); 21.99 + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 21.100 + while ( *pblkif != NULL ) 21.101 + { 21.102 + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) 21.103 + { 21.104 + DPRINTK("Could not create blkif: already exists\n"); 21.105 + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; 21.106 + kmem_cache_free(blkif_cachep, blkif); 21.107 + return; 21.108 + } 21.109 + pblkif = &(*pblkif)->hash_next; 21.110 + } 21.111 21.112 blkif->hash_next = *pblkif; 21.113 *pblkif = blkif; 21.114 21.115 - create->status = BLKIF_STATUS_OKAY; 21.116 - return; 21.117 - 21.118 - found_match: 21.119 - create->status = BLKIF_STATUS_INTERFACE_EXISTS; 21.120 - return; 21.121 - 21.122 - evtchn_in_use: 21.123 - unbind_evtchn_from_irq(evtchn); /* drop refcnt */ 21.124 - create->status = BLKIF_STATUS_ERROR; 21.125 - return; 21.126 + DPRINTK("Successfully created blkif\n"); 21.127 + create->status = BLKIF_BE_STATUS_OKAY; 21.128 } 21.129 21.130 -void blkif_destroy(blkif_destroy_t *destroy) 21.131 +void blkif_destroy(blkif_be_destroy_t *destroy) 21.132 { 21.133 domid_t domid = destroy->domid; 21.134 unsigned int handle = destroy->blkif_handle; 21.135 blkif_t **pblkif, *blkif; 21.136 21.137 pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; 21.138 - while ( (blkif = *pblkif) == NULL ) 21.139 + while ( (blkif = *pblkif) != NULL ) 21.140 { 21.141 if ( (blkif->domid == domid) && (blkif->handle == handle) ) 21.142 - goto found_match; 21.143 + { 21.144 + if ( blkif->status != DISCONNECTED ) 21.145 + goto still_connected; 21.146 + goto destroy; 21.147 + } 21.148 pblkif = &blkif->hash_next; 21.149 } 21.150 21.151 - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; 21.152 + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 21.153 + return; 21.154 + 21.155 + still_connected: 21.156 + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 21.157 return; 21.158 21.159 - found_match: 21.160 - free_irq(blkif->irq, NULL); 21.161 - unbind_evtchn_from_irq(blkif->evtchn); 21.162 + destroy: 21.163 *pblkif = blkif->hash_next; 21.164 + destroy_all_vbds(blkif); 21.165 kmem_cache_free(blkif_cachep, blkif); 21.166 - destroy->status = BLKIF_STATUS_OKAY; 21.167 + destroy->status = BLKIF_BE_STATUS_OKAY; 21.168 } 21.169 21.170 +void blkif_connect(blkif_be_connect_t *connect) 21.171 +{ 21.172 + domid_t domid = connect->domid; 21.173 + unsigned int handle = connect->blkif_handle; 21.174 + unsigned int evtchn = connect->evtchn; 21.175 + unsigned long shmem_frame = connect->shmem_frame; 21.176 + struct vm_struct *vma; 21.177 + pgprot_t prot; 21.178 + int error; 21.179 + blkif_t *blkif; 21.180 + 21.181 + blkif = blkif_find_by_handle(domid, handle); 21.182 + if ( unlikely(blkif == NULL) ) 21.183 + { 21.184 + DPRINTK("blkif_connect attempted for non-existent blkif (%llu,%u)\n", 21.185 + connect->domid, connect->blkif_handle); 21.186 + connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 21.187 + return; 21.188 + } 21.189 + 21.190 + if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) 21.191 + { 21.192 + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 21.193 + return; 21.194 + } 21.195 + 21.196 + prot = __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED); 21.197 + error = direct_remap_area_pages(&init_mm, VMALLOC_VMADDR(vma->addr), 21.198 + shmem_frame<<PAGE_SHIFT, PAGE_SIZE, 21.199 + prot, domid); 21.200 + if ( error != 0 ) 21.201 + { 21.202 + if ( error == -ENOMEM ) 21.203 + connect->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 21.204 + else if ( error == -EFAULT ) 21.205 + connect->status = BLKIF_BE_STATUS_MAPPING_ERROR; 21.206 + else 21.207 + connect->status = BLKIF_BE_STATUS_ERROR; 21.208 + vfree(vma->addr); 21.209 + return; 21.210 + } 21.211 + 21.212 + if ( blkif->status != DISCONNECTED ) 21.213 + { 21.214 + connect->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; 21.215 + vfree(vma->addr); 21.216 + return; 21.217 + } 21.218 + 21.219 + blkif->evtchn = evtchn; 21.220 + blkif->irq = bind_evtchn_to_irq(evtchn); 21.221 + blkif->shmem_frame = shmem_frame; 21.222 + blkif->blk_ring_base = (blkif_ring_t *)vma->addr; 21.223 + blkif->status = CONNECTED; 21.224 + blkif_get(blkif); 21.225 + 21.226 + request_irq(blkif->irq, blkif_be_int, 0, "blkif-backend", blkif); 21.227 + 21.228 + connect->status = BLKIF_BE_STATUS_OKAY; 21.229 +} 21.230 + 21.231 +int blkif_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) 21.232 +{ 21.233 + domid_t domid = disconnect->domid; 21.234 + unsigned int handle = disconnect->blkif_handle; 21.235 + blkif_t *blkif; 21.236 + 21.237 + blkif = blkif_find_by_handle(domid, handle); 21.238 + if ( unlikely(blkif == NULL) ) 21.239 + { 21.240 + DPRINTK("blkif_disconnect attempted for non-existent blkif" 21.241 + " (%llu,%u)\n", disconnect->domid, disconnect->blkif_handle); 21.242 + disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 21.243 + return 1; /* Caller will send response error message. */ 21.244 + } 21.245 + 21.246 + if ( blkif->status == CONNECTED ) 21.247 + { 21.248 + blkif->status = DISCONNECTING; 21.249 + blkif->disconnect_rspid = rsp_id; 21.250 + wmb(); /* Let other CPUs see the status change. */ 21.251 + free_irq(blkif->irq, NULL); 21.252 + blkif_deschedule(blkif); 21.253 + blkif_put(blkif); 21.254 + } 21.255 + 21.256 + return 0; /* Caller should not send response message. */ 21.257 +} 21.258 + 21.259 +void __init blkif_interface_init(void) 21.260 +{ 21.261 + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 21.262 + 0, 0, NULL, NULL); 21.263 + memset(blkif_hash, 0, sizeof(blkif_hash)); 21.264 +}
22.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c Sun May 02 00:27:58 2004 +0000 22.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/main.c Sun May 02 00:52:45 2004 +0000 22.3 @@ -24,6 +24,18 @@ 22.4 #define MAX_PENDING_REQS 64 22.5 #define BATCH_PER_DOMAIN 16 22.6 22.7 +static struct vm_struct *mmap_vma; 22.8 +#define MMAP_PAGES_PER_SEGMENT \ 22.9 + ((BLKIF_MAX_SEGMENTS_PER_REQUEST >> (PAGE_SHIFT-9)) + 1) 22.10 +#define MMAP_PAGES_PER_REQUEST \ 22.11 + (2 * BLKIF_MAX_SEGMENTS_PER_REQUEST * MMAP_PAGES_PER_SEGMENT) 22.12 +#define MMAP_PAGES \ 22.13 + (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) 22.14 +#define MMAP_VADDR(_req,_seg) \ 22.15 + ((unsigned long)mmap_vma->addr + \ 22.16 + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 22.17 + ((_seg) * MMAP_PAGES_PER_SEGMENT * PAGE_SIZE)) 22.18 + 22.19 /* 22.20 * Each outstanding request that we've passed to the lower device layers has a 22.21 * 'pending_req' allocated to it. Each buffer_head that completes decrements 22.22 @@ -46,22 +58,11 @@ static PEND_RING_IDX pending_prod, pendi 22.23 22.24 static kmem_cache_t *buffer_head_cachep; 22.25 22.26 -static struct buffer_head *completed_bhs[NR_CPUS] __cacheline_aligned; 22.27 - 22.28 -static int lock_buffer(blkif_t *blkif, 22.29 - unsigned long buffer, 22.30 - unsigned short size, 22.31 - int writeable_buffer); 22.32 -static void unlock_buffer(unsigned long buffer, 22.33 - unsigned short size, 22.34 - int writeable_buffer); 22.35 - 22.36 -static void io_schedule(unsigned long unused); 22.37 static int do_block_io_op(blkif_t *blkif, int max_to_do); 22.38 -static void dispatch_rw_block_io(blkif_t *blkif, 22.39 - blk_ring_req_entry_t *req); 22.40 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req); 22.41 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req); 22.42 static void make_response(blkif_t *blkif, unsigned long id, 22.43 - unsigned short op, unsigned long st); 22.44 + unsigned short op, int st); 22.45 22.46 22.47 /****************************************************************** 22.48 @@ -95,7 +96,7 @@ static void add_to_blkdev_list_tail(blki 22.49 unsigned long flags; 22.50 if ( __on_blkdev_list(blkif) ) return; 22.51 spin_lock_irqsave(&io_schedule_list_lock, flags); 22.52 - if ( !__on_blkdev_list(blkif) ) 22.53 + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) 22.54 { 22.55 list_add_tail(&blkif->blkdev_list, &io_schedule_list); 22.56 blkif_get(blkif); 22.57 @@ -108,8 +109,6 @@ static void add_to_blkdev_list_tail(blki 22.58 * SCHEDULER FUNCTIONS 22.59 */ 22.60 22.61 -static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); 22.62 - 22.63 static void io_schedule(unsigned long unused) 22.64 { 22.65 blkif_t *blkif; 22.66 @@ -132,6 +131,8 @@ static void io_schedule(unsigned long un 22.67 run_task_queue(&tq_disk); 22.68 } 22.69 22.70 +static DECLARE_TASKLET(io_schedule_tasklet, io_schedule, 0); 22.71 + 22.72 static void maybe_trigger_io_schedule(void) 22.73 { 22.74 /* 22.75 @@ -155,28 +156,26 @@ static void maybe_trigger_io_schedule(vo 22.76 static void end_block_io_op(struct buffer_head *bh, int uptodate) 22.77 { 22.78 pending_req_t *pending_req = bh->b_private; 22.79 + unsigned long flags; 22.80 22.81 /* An error fails the entire request. */ 22.82 if ( !uptodate ) 22.83 { 22.84 DPRINTK("Buffer not up-to-date at end of operation\n"); 22.85 - pending_req->status = 2; 22.86 + pending_req->status = BLKIF_RSP_ERROR; 22.87 } 22.88 22.89 - unlock_buffer(virt_to_phys(bh->b_data), 22.90 - bh->b_size, 22.91 - (pending_req->operation==READ)); 22.92 - 22.93 if ( atomic_dec_and_test(&pending_req->pendcnt) ) 22.94 { 22.95 + int pending_idx = pending_req - pending_reqs; 22.96 + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 22.97 + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); 22.98 make_response(pending_req->blkif, pending_req->id, 22.99 pending_req->operation, pending_req->status); 22.100 blkif_put(pending_req->blkif); 22.101 - spin_lock(&pend_prod_lock); 22.102 - pending_ring[MASK_PEND_IDX(pending_prod)] = 22.103 - pending_req - pending_reqs; 22.104 - pending_prod++; 22.105 - spin_unlock(&pend_prod_lock); 22.106 + spin_lock_irqsave(&pend_prod_lock, flags); 22.107 + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; 22.108 + spin_unlock_irqrestore(&pend_prod_lock, flags); 22.109 maybe_trigger_io_schedule(); 22.110 } 22.111 } 22.112 @@ -200,45 +199,10 @@ void blkif_be_int(int irq, void *dev_id, 22.113 * DOWNWARD CALLS -- These interface with the block-device layer proper. 22.114 */ 22.115 22.116 -static int lock_buffer(blkif_t *blkif, 22.117 - unsigned long buffer, 22.118 - unsigned short size, 22.119 - int writeable_buffer) 22.120 -{ 22.121 - unsigned long pfn; 22.122 - 22.123 - for ( pfn = buffer >> PAGE_SHIFT; 22.124 - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); 22.125 - pfn++ ) 22.126 - { 22.127 - } 22.128 - 22.129 - return 1; 22.130 - 22.131 - fail: 22.132 - while ( pfn-- > (buffer >> PAGE_SHIFT) ) 22.133 - { 22.134 - } 22.135 - return 0; 22.136 -} 22.137 - 22.138 -static void unlock_buffer(unsigned long buffer, 22.139 - unsigned short size, 22.140 - int writeable_buffer) 22.141 -{ 22.142 - unsigned long pfn; 22.143 - 22.144 - for ( pfn = buffer >> PAGE_SHIFT; 22.145 - pfn < ((buffer + size + PAGE_SIZE - 1) >> PAGE_SHIFT); 22.146 - pfn++ ) 22.147 - { 22.148 - } 22.149 -} 22.150 - 22.151 static int do_block_io_op(blkif_t *blkif, int max_to_do) 22.152 { 22.153 - blk_ring_t *blk_ring = blkif->blk_ring_base; 22.154 - blk_ring_req_entry_t *req; 22.155 + blkif_ring_t *blk_ring = blkif->blk_ring_base; 22.156 + blkif_request_t *req; 22.157 BLK_RING_IDX i; 22.158 int more_to_do = 0; 22.159 22.160 @@ -262,11 +226,15 @@ static int do_block_io_op(blkif_t *blkif 22.161 dispatch_rw_block_io(blkif, req); 22.162 break; 22.163 22.164 + case BLKIF_OP_PROBE: 22.165 + dispatch_probe(blkif, req); 22.166 + break; 22.167 + 22.168 default: 22.169 DPRINTK("error: unknown block io operation [%d]\n", 22.170 blk_ring->ring[i].req.operation); 22.171 make_response(blkif, blk_ring->ring[i].req.id, 22.172 - blk_ring->ring[i].req.operation, 1); 22.173 + blk_ring->ring[i].req.operation, BLKIF_RSP_ERROR); 22.174 break; 22.175 } 22.176 } 22.177 @@ -275,24 +243,64 @@ static int do_block_io_op(blkif_t *blkif 22.178 return more_to_do; 22.179 } 22.180 22.181 -static void dispatch_rw_block_io(blkif_t *blkif, 22.182 - blk_ring_req_entry_t *req) 22.183 +static void dispatch_probe(blkif_t *blkif, blkif_request_t *req) 22.184 +{ 22.185 + int i, rc, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 22.186 + pgprot_t prot; 22.187 + 22.188 + /* Check that number of segments is sane. */ 22.189 + if ( unlikely(req->nr_segments == 0) || 22.190 + unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) 22.191 + { 22.192 + DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); 22.193 + goto bad_descriptor; 22.194 + } 22.195 + 22.196 + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW); 22.197 + for ( i = 0; i < req->nr_segments; i++ ) 22.198 + { 22.199 + if ( (req->buffer_and_sects[i] & ~PAGE_MASK) != (PAGE_SIZE / 512) ) 22.200 + goto bad_descriptor; 22.201 + rc = direct_remap_area_pages(&init_mm, 22.202 + MMAP_VADDR(pending_idx, i), 22.203 + req->buffer_and_sects[i] & PAGE_MASK, 22.204 + PAGE_SIZE, prot, blkif->domid); 22.205 + if ( rc != 0 ) 22.206 + goto bad_descriptor; 22.207 + } 22.208 + 22.209 + rc = vbd_probe(blkif, (vdisk_t *)MMAP_VADDR(pending_idx, 0), 22.210 + (req->nr_segments * PAGE_SIZE) / sizeof(vdisk_t)); 22.211 + 22.212 + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 22.213 + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); 22.214 + make_response(blkif, req->id, req->operation, rc); 22.215 + return; 22.216 + 22.217 + bad_descriptor: 22.218 + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 22.219 + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); 22.220 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 22.221 +} 22.222 + 22.223 +static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req) 22.224 { 22.225 extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 22.226 struct buffer_head *bh; 22.227 - int operation = (req->operation == XEN_BLOCK_WRITE) ? WRITE : READ; 22.228 + int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ; 22.229 unsigned short nr_sects; 22.230 unsigned long buffer; 22.231 - int i, tot_sects; 22.232 + int i, tot_sects, pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; 22.233 pending_req_t *pending_req; 22.234 + pgprot_t prot; 22.235 22.236 /* We map virtual scatter/gather segments to physical segments. */ 22.237 int new_segs, nr_psegs = 0; 22.238 - phys_seg_t phys_seg[MAX_BLK_SEGS * 2]; 22.239 + phys_seg_t phys_seg[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2]; 22.240 22.241 /* Check that number of segments is sane. */ 22.242 if ( unlikely(req->nr_segments == 0) || 22.243 - unlikely(req->nr_segments > MAX_BLK_SEGS) ) 22.244 + unlikely(req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) 22.245 { 22.246 DPRINTK("Bad number of segments in request (%d)\n", req->nr_segments); 22.247 goto bad_descriptor; 22.248 @@ -310,8 +318,11 @@ static void dispatch_rw_block_io(blkif_t 22.249 nr_sects = req->buffer_and_sects[i] & 0x1FF; 22.250 22.251 if ( unlikely(nr_sects == 0) ) 22.252 + continue; 22.253 + 22.254 + if ( unlikely(nr_sects > BLKIF_MAX_SECTORS_PER_SEGMENT) ) 22.255 { 22.256 - DPRINTK("zero-sized data request\n"); 22.257 + DPRINTK("Too many sectors in segment\n"); 22.258 goto bad_descriptor; 22.259 } 22.260 22.261 @@ -333,29 +344,41 @@ static void dispatch_rw_block_io(blkif_t 22.262 } 22.263 22.264 nr_psegs += new_segs; 22.265 - ASSERT(nr_psegs <= MAX_BLK_SEGS*2); 22.266 + ASSERT(nr_psegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST*2); 22.267 } 22.268 22.269 + /* Nonsensical zero-sized request? */ 22.270 + if ( unlikely(nr_psegs == 0) ) 22.271 + goto bad_descriptor; 22.272 + 22.273 + if ( operation == READ ) 22.274 + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_RW); 22.275 + else 22.276 + prot = __pgprot(_PAGE_PRESENT|_PAGE_DIRTY|_PAGE_ACCESSED); 22.277 + 22.278 for ( i = 0; i < nr_psegs; i++ ) 22.279 { 22.280 - if ( unlikely(!lock_buffer(blkif, phys_seg[i].buffer, 22.281 - phys_seg[i].nr_sects << 9, 22.282 - operation==READ)) ) 22.283 + unsigned long sz = ((phys_seg[i].buffer & ~PAGE_MASK) + 22.284 + (phys_seg[i].nr_sects << 9) + 22.285 + (PAGE_SIZE - 1)) & PAGE_MASK; 22.286 + int rc = direct_remap_area_pages(&init_mm, 22.287 + MMAP_VADDR(pending_idx, i), 22.288 + phys_seg[i].buffer & PAGE_MASK, 22.289 + sz, prot, blkif->domid); 22.290 + if ( rc != 0 ) 22.291 { 22.292 DPRINTK("invalid buffer\n"); 22.293 - while ( i-- > 0 ) 22.294 - unlock_buffer(phys_seg[i].buffer, 22.295 - phys_seg[i].nr_sects << 9, 22.296 - operation==READ); 22.297 + vmfree_area_pages(MMAP_VADDR(pending_idx, 0), 22.298 + MMAP_PAGES_PER_REQUEST * PAGE_SIZE); 22.299 goto bad_descriptor; 22.300 } 22.301 } 22.302 22.303 - pending_req = &pending_reqs[pending_ring[MASK_PEND_IDX(pending_cons++)]]; 22.304 + pending_req = &pending_reqs[pending_idx]; 22.305 pending_req->blkif = blkif; 22.306 pending_req->id = req->id; 22.307 pending_req->operation = operation; 22.308 - pending_req->status = 0; 22.309 + pending_req->status = BLKIF_RSP_OKAY; 22.310 atomic_set(&pending_req->pendcnt, nr_psegs); 22.311 22.312 blkif_get(blkif); 22.313 @@ -363,38 +386,37 @@ static void dispatch_rw_block_io(blkif_t 22.314 /* Now we pass each segment down to the real blkdev layer. */ 22.315 for ( i = 0; i < nr_psegs; i++ ) 22.316 { 22.317 - bh = kmem_cache_alloc(buffer_head_cachep, GFP_KERNEL); 22.318 + bh = kmem_cache_alloc(buffer_head_cachep, GFP_ATOMIC); 22.319 if ( unlikely(bh == NULL) ) 22.320 panic("bh is null\n"); 22.321 memset(bh, 0, sizeof (struct buffer_head)); 22.322 - 22.323 + 22.324 + init_waitqueue_head(&bh->b_wait); 22.325 bh->b_size = phys_seg[i].nr_sects << 9; 22.326 bh->b_dev = phys_seg[i].dev; 22.327 + bh->b_rdev = phys_seg[i].dev; 22.328 bh->b_rsector = (unsigned long)phys_seg[i].sector_number; 22.329 - 22.330 - /* SMH: we store a 'pseudo-virtual' bogus address in b_data since 22.331 - later code will undo this transformation (i.e. +-PAGE_OFFSET). */ 22.332 - bh->b_data = phys_to_virt(phys_seg[i].buffer); 22.333 - 22.334 - /* SMH: bh_phys() uses the below field as a 'cheap' virt_to_phys */ 22.335 - bh->b_page = &mem_map[phys_seg[i].buffer>>PAGE_SHIFT]; 22.336 + bh->b_data = (char *)MMAP_VADDR(pending_idx, i) + 22.337 + (phys_seg[i].buffer & ~PAGE_MASK); 22.338 bh->b_end_io = end_block_io_op; 22.339 bh->b_private = pending_req; 22.340 22.341 - bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock); 22.342 + bh->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | 22.343 + (1 << BH_Req) | (1 << BH_Launder); 22.344 if ( operation == WRITE ) 22.345 bh->b_state |= (1 << BH_JBD) | (1 << BH_Req) | (1 << BH_Uptodate); 22.346 22.347 atomic_set(&bh->b_count, 1); 22.348 22.349 /* Dispatch a single request. We'll flush it to disc later. */ 22.350 - submit_bh(operation, bh); 22.351 + generic_make_request(operation, bh); 22.352 } 22.353 22.354 + pending_cons++; 22.355 return; 22.356 22.357 bad_descriptor: 22.358 - make_response(blkif, req->id, req->operation, 1); 22.359 + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); 22.360 } 22.361 22.362 22.363 @@ -405,12 +427,13 @@ static void dispatch_rw_block_io(blkif_t 22.364 22.365 22.366 static void make_response(blkif_t *blkif, unsigned long id, 22.367 - unsigned short op, unsigned long st) 22.368 + unsigned short op, int st) 22.369 { 22.370 - blk_ring_resp_entry_t *resp; 22.371 + blkif_response_t *resp; 22.372 + unsigned long flags; 22.373 22.374 /* Place on the response ring for the relevant domain. */ 22.375 - spin_lock(&blkif->blk_ring_lock); 22.376 + spin_lock_irqsave(&blkif->blk_ring_lock, flags); 22.377 resp = &blkif->blk_ring_base-> 22.378 ring[MASK_BLK_IDX(blkif->blk_resp_prod)].resp; 22.379 resp->id = id; 22.380 @@ -418,85 +441,44 @@ static void make_response(blkif_t *blkif 22.381 resp->status = st; 22.382 wmb(); 22.383 blkif->blk_ring_base->resp_prod = ++blkif->blk_resp_prod; 22.384 - spin_unlock(&blkif->blk_ring_lock); 22.385 + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); 22.386 22.387 /* Kick the relevant domain. */ 22.388 notify_via_evtchn(blkif->evtchn); 22.389 } 22.390 22.391 -static void blkif_debug_int(int irq, void *unused, struct pt_regs *regs) 22.392 +void blkif_deschedule(blkif_t *blkif) 22.393 { 22.394 -#if 0 22.395 - unsigned long flags; 22.396 - struct task_struct *p; 22.397 - blk_ring_t *blk_ring; 22.398 - int i; 22.399 - 22.400 - printk("Dumping block queue stats: nr_pending = %d" 22.401 - " (prod=0x%08x,cons=0x%08x)\n", 22.402 - NR_PENDING_REQS, pending_prod, pending_cons); 22.403 - 22.404 - read_lock_irqsave(&tasklist_lock, flags); 22.405 - for_each_domain ( p ) 22.406 - { 22.407 - printk("Domain: %llu\n", blkif->domain); 22.408 - blk_ring = blkif->blk_ring_base; 22.409 - printk(" req_prod:0x%08x, req_cons:0x%08x resp_prod:0x%08x/" 22.410 - "0x%08x on_list=%d\n", 22.411 - blk_ring->req_prod, blkif->blk_req_cons, 22.412 - blk_ring->resp_prod, blkif->blk_resp_prod, 22.413 - __on_blkdev_list(p)); 22.414 - } 22.415 - read_unlock_irqrestore(&tasklist_lock, flags); 22.416 - 22.417 - for ( i = 0; i < MAX_PENDING_REQS; i++ ) 22.418 - { 22.419 - printk("Pend%d: dom=%p, id=%08lx, cnt=%d, op=%d, status=%d\n", 22.420 - i, pending_reqs[i].domain, pending_reqs[i].id, 22.421 - atomic_read(&pending_reqs[i].pendcnt), 22.422 - pending_reqs[i].operation, pending_reqs[i].status); 22.423 - } 22.424 -#endif 22.425 -} 22.426 - 22.427 -void unlink_blkdev_info(blkif_t *blkif) 22.428 -{ 22.429 - unsigned long flags; 22.430 - 22.431 - spin_lock_irqsave(&io_schedule_list_lock, flags); 22.432 - if ( __on_blkdev_list(blkif) ) 22.433 - { 22.434 - list_del(&blkif->blkdev_list); 22.435 - blkif->blkdev_list.next = (void *)0xdeadbeef; 22.436 - blkif_put(blkif); 22.437 - } 22.438 - spin_unlock_irqrestore(&io_schedule_list_lock, flags); 22.439 + remove_from_blkdev_list(blkif); 22.440 } 22.441 22.442 static int __init init_module(void) 22.443 { 22.444 int i; 22.445 22.446 + blkif_interface_init(); 22.447 + 22.448 + if ( (mmap_vma = get_vm_area(MMAP_PAGES * PAGE_SIZE, VM_IOREMAP)) == NULL ) 22.449 + { 22.450 + printk(KERN_WARNING "Could not allocate VMA for blkif backend.\n"); 22.451 + return -ENOMEM; 22.452 + } 22.453 + 22.454 pending_cons = 0; 22.455 pending_prod = MAX_PENDING_REQS; 22.456 memset(pending_reqs, 0, sizeof(pending_reqs)); 22.457 for ( i = 0; i < MAX_PENDING_REQS; i++ ) 22.458 pending_ring[i] = i; 22.459 22.460 - for ( i = 0; i < NR_CPUS; i++ ) 22.461 - completed_bhs[i] = NULL; 22.462 - 22.463 spin_lock_init(&io_schedule_list_lock); 22.464 INIT_LIST_HEAD(&io_schedule_list); 22.465 22.466 - if ( request_irq(bind_virq_to_irq(VIRQ_DEBUG), blkif_debug_int, 22.467 - SA_SHIRQ, "blkif-backend-dbg", &blkif_debug_int) != 0 ) 22.468 - printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n"); 22.469 - 22.470 buffer_head_cachep = kmem_cache_create( 22.471 "buffer_head_cache", sizeof(struct buffer_head), 22.472 0, SLAB_HWCACHE_ALIGN, NULL, NULL); 22.473 22.474 + blkif_ctrlif_init(); 22.475 + 22.476 return 0; 22.477 } 22.478
23.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c Sun May 02 00:27:58 2004 +0000 23.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/backend/vbd.c Sun May 02 00:52:45 2004 +0000 23.3 @@ -8,7 +8,7 @@ 23.4 23.5 #include "common.h" 23.6 23.7 -void vbd_create(blkif_vbd_create_t *create) 23.8 +void vbd_create(blkif_be_vbd_create_t *create) 23.9 { 23.10 vbd_t *vbd; 23.11 rb_node_t **rb_p, *rb_parent = NULL; 23.12 @@ -18,9 +18,9 @@ void vbd_create(blkif_vbd_create_t *crea 23.13 blkif = blkif_find_by_handle(create->domid, create->blkif_handle); 23.14 if ( unlikely(blkif == NULL) ) 23.15 { 23.16 - DPRINTK("vbd_create attempted for non-existent blkif (%llu,&u)\n", 23.17 + DPRINTK("vbd_create attempted for non-existent blkif (%llu,%u)\n", 23.18 create->domid, create->blkif_handle); 23.19 - create->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; 23.20 + create->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 23.21 return; 23.22 } 23.23 23.24 @@ -42,49 +42,50 @@ void vbd_create(blkif_vbd_create_t *crea 23.25 else 23.26 { 23.27 DPRINTK("vbd_create attempted for already existing vbd\n"); 23.28 - create->status = BLKIF_STATUS_VBD_EXISTS; 23.29 + create->status = BLKIF_BE_STATUS_VBD_EXISTS; 23.30 goto out; 23.31 } 23.32 } 23.33 23.34 - if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_KERNEL)) == NULL) ) 23.35 + if ( unlikely((vbd = kmalloc(sizeof(vbd_t), GFP_ATOMIC)) == NULL) ) 23.36 { 23.37 DPRINTK("vbd_create: out of memory\n"); 23.38 - create->status = BLKIF_STATUS_OUT_OF_MEMORY; 23.39 + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 23.40 goto out; 23.41 } 23.42 23.43 - vbd->vdevice = vdevice; 23.44 - vbd->mode = create->mode; 23.45 - vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 23.46 - vbd->extents = NULL; 23.47 + vbd->vdevice = vdevice; 23.48 + vbd->readonly = create->readonly; 23.49 + vbd->type = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; 23.50 + vbd->extents = NULL; 23.51 23.52 rb_link_node(&vbd->rb, rb_parent, rb_p); 23.53 rb_insert_color(&vbd->rb, &blkif->vbd_rb); 23.54 23.55 - create->status = BLKIF_STATUS_OKAY; 23.56 + DPRINTK("Successful creation of vdev=%04x (dom=%llu)\n", 23.57 + vdevice, create->domid); 23.58 + create->status = BLKIF_BE_STATUS_OKAY; 23.59 23.60 out: 23.61 spin_unlock(&blkif->vbd_lock); 23.62 - blkif_put(blkif); 23.63 } 23.64 23.65 23.66 /* Grow a VBD by appending a new extent. Fails if the VBD doesn't exist. */ 23.67 -void vbd_grow(blkif_vbd_grow_t *grow) 23.68 +void vbd_grow(blkif_be_vbd_grow_t *grow) 23.69 { 23.70 - blkif_t *blkif; 23.71 - xen_extent_le_t **px, *x; 23.72 - vbd_t *vbd = NULL; 23.73 - rb_node_t *rb; 23.74 - blkif_vdev_t vdevice = grow->vdevice; 23.75 + blkif_t *blkif; 23.76 + blkif_extent_le_t **px, *x; 23.77 + vbd_t *vbd = NULL; 23.78 + rb_node_t *rb; 23.79 + blkif_vdev_t vdevice = grow->vdevice; 23.80 23.81 blkif = blkif_find_by_handle(grow->domid, grow->blkif_handle); 23.82 if ( unlikely(blkif == NULL) ) 23.83 { 23.84 - DPRINTK("vbd_grow attempted for non-existent blkif (%llu,&u)\n", 23.85 + DPRINTK("vbd_grow attempted for non-existent blkif (%llu,%u)\n", 23.86 grow->domid, grow->blkif_handle); 23.87 - grow->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; 23.88 + grow->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 23.89 return; 23.90 } 23.91 23.92 @@ -105,49 +106,51 @@ void vbd_grow(blkif_vbd_grow_t *grow) 23.93 if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) 23.94 { 23.95 DPRINTK("vbd_grow: attempted to append extent to non-existent VBD.\n"); 23.96 - grow->status = BLKIF_STATUS_VBD_NOT_FOUND; 23.97 + grow->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 23.98 goto out; 23.99 } 23.100 23.101 - if ( unlikely((x = kmalloc(sizeof(xen_extent_le_t), GFP_KERNEL)) == NULL) ) 23.102 + if ( unlikely((x = kmalloc(sizeof(blkif_extent_le_t), 23.103 + GFP_ATOMIC)) == NULL) ) 23.104 { 23.105 DPRINTK("vbd_grow: out of memory\n"); 23.106 - grow->status = BLKIF_STATUS_OUT_OF_MEMORY; 23.107 + grow->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; 23.108 goto out; 23.109 } 23.110 23.111 x->extent.device = grow->extent.device; 23.112 x->extent.sector_start = grow->extent.sector_start; 23.113 x->extent.sector_length = grow->extent.sector_length; 23.114 - x->next = (xen_extent_le_t *)NULL; 23.115 + x->next = (blkif_extent_le_t *)NULL; 23.116 23.117 for ( px = &vbd->extents; *px != NULL; px = &(*px)->next ) 23.118 continue; 23.119 23.120 *px = x; 23.121 23.122 - grow->status = BLKIF_STATUS_OKAY; 23.123 + DPRINTK("Successful grow of vdev=%04x (dom=%llu)\n", 23.124 + vdevice, grow->domid); 23.125 + grow->status = BLKIF_BE_STATUS_OKAY; 23.126 23.127 out: 23.128 spin_unlock(&blkif->vbd_lock); 23.129 - blkif_put(blkif); 23.130 } 23.131 23.132 23.133 -void vbd_shrink(blkif_vbd_shrink_t *shrink) 23.134 +void vbd_shrink(blkif_be_vbd_shrink_t *shrink) 23.135 { 23.136 - blkif_t *blkif; 23.137 - xen_extent_le_t **px, *x; 23.138 - vbd_t *vbd = NULL; 23.139 - rb_node_t *rb; 23.140 - blkif_vdev_t vdevice = shrink->vdevice; 23.141 + blkif_t *blkif; 23.142 + blkif_extent_le_t **px, *x; 23.143 + vbd_t *vbd = NULL; 23.144 + rb_node_t *rb; 23.145 + blkif_vdev_t vdevice = shrink->vdevice; 23.146 23.147 blkif = blkif_find_by_handle(shrink->domid, shrink->blkif_handle); 23.148 if ( unlikely(blkif == NULL) ) 23.149 { 23.150 - DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,&u)\n", 23.151 + DPRINTK("vbd_shrink attempted for non-existent blkif (%llu,%u)\n", 23.152 shrink->domid, shrink->blkif_handle); 23.153 - shrink->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; 23.154 + shrink->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 23.155 return; 23.156 } 23.157 23.158 @@ -167,13 +170,13 @@ void vbd_shrink(blkif_vbd_shrink_t *shri 23.159 23.160 if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != vdevice) ) 23.161 { 23.162 - shrink->status = BLKIF_STATUS_VBD_NOT_FOUND; 23.163 + shrink->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 23.164 goto out; 23.165 } 23.166 23.167 if ( unlikely(vbd->extents == NULL) ) 23.168 { 23.169 - shrink->status = BLKIF_STATUS_EXTENT_NOT_FOUND; 23.170 + shrink->status = BLKIF_BE_STATUS_EXTENT_NOT_FOUND; 23.171 goto out; 23.172 } 23.173 23.174 @@ -185,28 +188,27 @@ void vbd_shrink(blkif_vbd_shrink_t *shri 23.175 *px = x->next; 23.176 kfree(x); 23.177 23.178 - shrink->status = BLKIF_STATUS_OKAY; 23.179 + shrink->status = BLKIF_BE_STATUS_OKAY; 23.180 23.181 out: 23.182 spin_unlock(&blkif->vbd_lock); 23.183 - blkif_put(blkif); 23.184 } 23.185 23.186 23.187 -void vbd_destroy(blkif_vbd_destroy_t *destroy) 23.188 +void vbd_destroy(blkif_be_vbd_destroy_t *destroy) 23.189 { 23.190 - blkif_t *blkif; 23.191 - vbd_t *vbd; 23.192 - rb_node_t *rb; 23.193 - xen_extent_le_t *x, *t; 23.194 - blkif_vdev_t vdevice = destroy->vdevice; 23.195 + blkif_t *blkif; 23.196 + vbd_t *vbd; 23.197 + rb_node_t *rb; 23.198 + blkif_extent_le_t *x, *t; 23.199 + blkif_vdev_t vdevice = destroy->vdevice; 23.200 23.201 blkif = blkif_find_by_handle(destroy->domid, destroy->blkif_handle); 23.202 if ( unlikely(blkif == NULL) ) 23.203 { 23.204 - DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,&u)\n", 23.205 + DPRINTK("vbd_destroy attempted for non-existent blkif (%llu,%u)\n", 23.206 destroy->domid, destroy->blkif_handle); 23.207 - destroy->status = BLKIF_STATUS_INTERFACE_NOT_FOUND; 23.208 + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; 23.209 return; 23.210 } 23.211 23.212 @@ -224,7 +226,7 @@ void vbd_destroy(blkif_vbd_destroy_t *de 23.213 goto found; 23.214 } 23.215 23.216 - destroy->status = BLKIF_STATUS_VBD_NOT_FOUND; 23.217 + destroy->status = BLKIF_BE_STATUS_VBD_NOT_FOUND; 23.218 goto out; 23.219 23.220 found: 23.221 @@ -241,7 +243,6 @@ void vbd_destroy(blkif_vbd_destroy_t *de 23.222 23.223 out: 23.224 spin_unlock(&blkif->vbd_lock); 23.225 - blkif_put(blkif); 23.226 } 23.227 23.228 23.229 @@ -249,7 +250,7 @@ void destroy_all_vbds(blkif_t *blkif) 23.230 { 23.231 vbd_t *vbd; 23.232 rb_node_t *rb; 23.233 - xen_extent_le_t *x, *t; 23.234 + blkif_extent_le_t *x, *t; 23.235 23.236 spin_lock(&blkif->vbd_lock); 23.237 23.238 @@ -273,51 +274,30 @@ void destroy_all_vbds(blkif_t *blkif) 23.239 } 23.240 23.241 23.242 -static int vbd_probe_single(xen_disk_info_t *xdi, 23.243 - vbd_t *vbd, 23.244 - struct task_struct *p) 23.245 +static int vbd_probe_single(blkif_t *blkif, vdisk_t *vbd_info, vbd_t *vbd) 23.246 { 23.247 - xen_extent_le_t *x; 23.248 - xen_disk_t cur_disk; 23.249 - 23.250 - if ( xdi->count == xdi->max ) 23.251 - { 23.252 - DPRINTK("vbd_probe_devices: out of space for probe.\n"); 23.253 - return -ENOMEM; 23.254 - } 23.255 + blkif_extent_le_t *x; 23.256 23.257 - cur_disk.device = vbd->vdevice; 23.258 - cur_disk.info = vbd->type; 23.259 - if ( !VBD_CAN_WRITE(vbd) ) 23.260 - cur_disk.info |= XD_FLAG_RO; 23.261 - cur_disk.capacity = 0ULL; 23.262 + vbd_info->device = vbd->vdevice; 23.263 + vbd_info->info = vbd->type; 23.264 + if ( vbd->readonly ) 23.265 + vbd_info->info |= VDISK_FLAG_RO; 23.266 + vbd_info->capacity = 0ULL; 23.267 for ( x = vbd->extents; x != NULL; x = x->next ) 23.268 - cur_disk.capacity += x->extent.nr_sectors; 23.269 - cur_disk.domain = p->domain; 23.270 + vbd_info->capacity += x->extent.sector_length; 23.271 23.272 - /* Now copy into relevant part of user-space buffer */ 23.273 - if( copy_to_user(&xdi->disks[xdi->count], 23.274 - &cur_disk, 23.275 - sizeof(xen_disk_t)) ) 23.276 - { 23.277 - DPRINTK("vbd_probe_devices: copy_to_user failed\n"); 23.278 - return -EFAULT; 23.279 - } 23.280 - 23.281 - xdi->count++; 23.282 - 23.283 return 0; 23.284 } 23.285 23.286 23.287 -static int vbd_probe_devices(xen_disk_info_t *xdi, struct task_struct *p) 23.288 +int vbd_probe(blkif_t *blkif, vdisk_t *vbd_info, int max_vbds) 23.289 { 23.290 - int rc = 0; 23.291 + int rc = 0, nr_vbds = 0; 23.292 rb_node_t *rb; 23.293 23.294 - spin_lock(&p->vbd_lock); 23.295 + spin_lock(&blkif->vbd_lock); 23.296 23.297 - if ( (rb = p->vbd_rb.rb_node) == NULL ) 23.298 + if ( (rb = blkif->vbd_rb.rb_node) == NULL ) 23.299 goto out; 23.300 23.301 new_subtree: 23.302 @@ -328,7 +308,10 @@ static int vbd_probe_devices(xen_disk_in 23.303 for ( ; ; ) 23.304 { 23.305 /* STEP 2. Dealt with left subtree. Now process current node. */ 23.306 - if ( (rc = vbd_probe_single(xdi, rb_entry(rb, vbd_t, rb), p)) != 0 ) 23.307 + if ( (rc = vbd_probe_single(blkif, &vbd_info[nr_vbds], 23.308 + rb_entry(rb, vbd_t, rb))) != 0 ) 23.309 + goto out; 23.310 + if ( ++nr_vbds == max_vbds ) 23.311 goto out; 23.312 23.313 /* STEP 3. Process right subtree, if any. */ 23.314 @@ -355,146 +338,22 @@ static int vbd_probe_devices(xen_disk_in 23.315 } 23.316 23.317 out: 23.318 - spin_unlock(&p->vbd_lock); 23.319 - return rc; 23.320 -} 23.321 - 23.322 - 23.323 -/* 23.324 - * Return information about the VBDs available for a given domain, or for all 23.325 - * domains; in the general case the 'domain' argument will be 0 which means 23.326 - * "information about the caller"; otherwise the 'domain' argument will 23.327 - * specify either a given domain, or all domains ("VBD_PROBE_ALL") -- both of 23.328 - * these cases require the caller to be privileged. 23.329 - */ 23.330 -long vbd_probe(vbd_probe_t *probe) 23.331 -{ 23.332 - struct task_struct *p = NULL; 23.333 - unsigned long flags; 23.334 - long ret = 0; 23.335 - 23.336 - if ( probe->domain != 0 ) 23.337 - { 23.338 - /* We can only probe for ourselves (unless we're privileged). */ 23.339 - if( (probe->domain != current->domain) && !IS_PRIV(current) ) 23.340 - return -EPERM; 23.341 - 23.342 - if ( (probe->domain != VBD_PROBE_ALL) && 23.343 - ((p = find_domain_by_id(probe->domain)) == NULL) ) 23.344 - { 23.345 - DPRINTK("vbd_probe attempted for non-existent domain %llu\n", 23.346 - probe->domain); 23.347 - return -EINVAL; 23.348 - } 23.349 - } 23.350 - else 23.351 - { 23.352 - /* Default is to probe for ourselves. */ 23.353 - p = current; 23.354 - get_task_struct(p); /* to mirror final put_task_struct */ 23.355 - } 23.356 - 23.357 - if ( probe->domain == VBD_PROBE_ALL ) 23.358 - { 23.359 - read_lock_irqsave(&tasklist_lock, flags); 23.360 - for_each_domain ( p ) 23.361 - { 23.362 - if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) 23.363 - { 23.364 - read_unlock_irqrestore(&tasklist_lock, flags); 23.365 - goto out; 23.366 - } 23.367 - } 23.368 - read_unlock_irqrestore(&tasklist_lock, flags); 23.369 - } 23.370 - else if ( (ret = vbd_probe_devices(&probe->xdi, p)) != 0 ) 23.371 - goto out; 23.372 - 23.373 - out: 23.374 - if ( ret != 0 ) 23.375 - DPRINTK("vbd_probe: err %ld in probing virtual devices\n", ret); 23.376 - if ( p != NULL ) 23.377 - put_task_struct(p); 23.378 - return ret; 23.379 + spin_unlock(&blkif->vbd_lock); 23.380 + return (rc == 0) ? nr_vbds : rc; 23.381 } 23.382 23.383 23.384 -long vbd_info(vbd_info_t *info) 23.385 +int vbd_translate(phys_seg_t *pseg, blkif_t *blkif, int operation) 23.386 { 23.387 - struct task_struct *p; 23.388 - xen_extent_le_t *x; 23.389 - xen_extent_t *extents; 23.390 - vbd_t *vbd = NULL; 23.391 - rb_node_t *rb; 23.392 - long ret = 0; 23.393 - 23.394 - if ( (info->domain != current->domain) && !IS_PRIV(current) ) 23.395 - return -EPERM; 23.396 - 23.397 - if ( (p = find_domain_by_id(info->domain)) == NULL ) 23.398 - { 23.399 - DPRINTK("vbd_info attempted for non-existent domain %llu\n", 23.400 - info->domain); 23.401 - return -EINVAL; 23.402 - } 23.403 - 23.404 - spin_lock(&p->vbd_lock); 23.405 - 23.406 - rb = p->vbd_rb.rb_node; 23.407 - while ( rb != NULL ) 23.408 - { 23.409 - vbd = rb_entry(rb, vbd_t, rb); 23.410 - if ( info->vdevice < vbd->vdevice ) 23.411 - rb = rb->rb_left; 23.412 - else if ( info->vdevice > vbd->vdevice ) 23.413 - rb = rb->rb_right; 23.414 - else 23.415 - break; 23.416 - } 23.417 + blkif_extent_le_t *x; 23.418 + vbd_t *vbd; 23.419 + rb_node_t *rb; 23.420 + blkif_sector_t sec_off; 23.421 + unsigned long nr_secs; 23.422 23.423 - if ( unlikely(vbd == NULL) || unlikely(vbd->vdevice != info->vdevice) ) 23.424 - { 23.425 - DPRINTK("vbd_info attempted on non-existent VBD.\n"); 23.426 - ret = -EINVAL; 23.427 - goto out; 23.428 - } 23.429 - 23.430 - info->mode = vbd->mode; 23.431 - info->nextents = 0; 23.432 + spin_lock(&blkif->vbd_lock); 23.433 23.434 - extents = info->extents; 23.435 - for ( x = vbd->extents; x != NULL; x = x->next ) 23.436 - { 23.437 - if ( info->nextents == info->maxextents ) 23.438 - break; 23.439 - if ( copy_to_user(extents, &x->extent, sizeof(xen_extent_t)) ) 23.440 - { 23.441 - DPRINTK("vbd_info: copy_to_user failed\n"); 23.442 - ret = -EFAULT; 23.443 - goto out; 23.444 - } 23.445 - extents++; 23.446 - info->nextents++; 23.447 - } 23.448 - 23.449 - out: 23.450 - spin_unlock(&p->vbd_lock); 23.451 - put_task_struct(p); 23.452 - return ret; 23.453 -} 23.454 - 23.455 - 23.456 -int vbd_translate(phys_seg_t *pseg, struct task_struct *p, int operation) 23.457 -{ 23.458 - xen_extent_le_t *x; 23.459 - vbd_t *vbd; 23.460 - rb_node_t *rb; 23.461 - xen_sector_t sec_off; 23.462 - unsigned long nr_secs; 23.463 - 23.464 - spin_lock(&p->vbd_lock); 23.465 - 23.466 - rb = p->vbd_rb.rb_node; 23.467 + rb = blkif->vbd_rb.rb_node; 23.468 while ( rb != NULL ) 23.469 { 23.470 vbd = rb_entry(rb, vbd_t, rb); 23.471 @@ -507,42 +366,41 @@ int vbd_translate(phys_seg_t *pseg, stru 23.472 } 23.473 23.474 DPRINTK("vbd_translate; domain %llu attempted to access " 23.475 - "non-existent VBD.\n", p->domain); 23.476 + "non-existent VBD.\n", blkif->domid); 23.477 23.478 - spin_unlock(&p->vbd_lock); 23.479 + spin_unlock(&blkif->vbd_lock); 23.480 return -ENODEV; 23.481 23.482 found: 23.483 23.484 - if ( ((operation == READ) && !VBD_CAN_READ(vbd)) || 23.485 - ((operation == WRITE) && !VBD_CAN_WRITE(vbd)) ) 23.486 + if ( (operation == WRITE) && vbd->readonly ) 23.487 { 23.488 - spin_unlock(&p->vbd_lock); 23.489 + spin_unlock(&blkif->vbd_lock); 23.490 return -EACCES; 23.491 } 23.492 23.493 /* 23.494 - * Now iterate through the list of xen_extents, working out which should 23.495 + * Now iterate through the list of blkif_extents, working out which should 23.496 * be used to perform the translation. 23.497 */ 23.498 sec_off = pseg->sector_number; 23.499 nr_secs = pseg->nr_sects; 23.500 for ( x = vbd->extents; x != NULL; x = x->next ) 23.501 { 23.502 - if ( sec_off < x->extent.nr_sectors ) 23.503 + if ( sec_off < x->extent.sector_length ) 23.504 { 23.505 pseg->dev = x->extent.device; 23.506 - pseg->sector_number = x->extent.start_sector + sec_off; 23.507 - if ( unlikely((sec_off + nr_secs) > x->extent.nr_sectors) ) 23.508 + pseg->sector_number = x->extent.sector_start + sec_off; 23.509 + if ( unlikely((sec_off + nr_secs) > x->extent.sector_length) ) 23.510 goto overrun; 23.511 spin_unlock(&p->vbd_lock); 23.512 return 1; 23.513 } 23.514 - sec_off -= x->extent.nr_sectors; 23.515 + sec_off -= x->extent.sector_length; 23.516 } 23.517 23.518 DPRINTK("vbd_translate: end of vbd.\n"); 23.519 - spin_unlock(&p->vbd_lock); 23.520 + spin_unlock(&blkif->vbd_lock); 23.521 return -EACCES; 23.522 23.523 /* 23.524 @@ -554,7 +412,7 @@ int vbd_translate(phys_seg_t *pseg, stru 23.525 overrun: 23.526 23.527 /* Adjust length of first chunk to run to end of first extent. */ 23.528 - pseg[0].nr_sects = x->extent.nr_sectors - sec_off; 23.529 + pseg[0].nr_sects = x->extent.sector_length - sec_off; 23.530 23.531 /* Set second chunk buffer and length to start where first chunk ended. */ 23.532 pseg[1].buffer = pseg[0].buffer + (pseg[0].nr_sects << 9); 23.533 @@ -562,7 +420,7 @@ int vbd_translate(phys_seg_t *pseg, stru 23.534 23.535 /* Now move to the next extent. Check it exists and is long enough! */ 23.536 if ( unlikely((x = x->next) == NULL) || 23.537 - unlikely(x->extent.nr_sectors < pseg[1].nr_sects) ) 23.538 + unlikely(x->extent.sector_length < pseg[1].nr_sects) ) 23.539 { 23.540 DPRINTK("vbd_translate: multiple overruns or end of vbd.\n"); 23.541 spin_unlock(&p->vbd_lock); 23.542 @@ -571,8 +429,8 @@ int vbd_translate(phys_seg_t *pseg, stru 23.543 23.544 /* Store the real device and start sector for the second chunk. */ 23.545 pseg[1].dev = x->extent.device; 23.546 - pseg[1].sector_number = x->extent.start_sector; 23.547 + pseg[1].sector_number = x->extent.sector_start; 23.548 23.549 - spin_unlock(&p->vbd_lock); 23.550 + spin_unlock(&blkif->vbd_lock); 23.551 return 2; 23.552 }
24.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h Sun May 02 00:27:58 2004 +0000 24.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/blkif.h Sun May 02 00:52:45 2004 +0000 24.3 @@ -16,24 +16,27 @@ 24.4 #define BLKIF_OP_WRITE 1 24.5 #define BLKIF_OP_PROBE 2 24.6 24.7 -/* NB. Ring size must be small enough for sizeof(blk_ring_t) <= PAGE_SIZE. */ 24.8 +/* NB. Ring size must be small enough for sizeof(blkif_ring_t) <= PAGE_SIZE. */ 24.9 #define BLKIF_RING_SIZE 64 24.10 24.11 /* 24.12 * Maximum scatter/gather segments per request. 24.13 - * This is carefully chosen so that sizeof(blk_ring_t) <= PAGE_SIZE. 24.14 + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. 24.15 * NB. This could be 12 if the ring indexes weren't stored in the same page. 24.16 */ 24.17 -#define BLKIF_REQUEST_MAX_SEGMENTS 11 24.18 +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 24.19 + 24.20 +#define BLKIF_MAX_SECTORS_PER_SEGMENT 16 24.21 24.22 typedef struct { 24.23 unsigned char operation; /* BLKIF_OP_??? */ 24.24 - unsigned char nr_segments; /* number of segments (<= MAX_BLK_SEGS) */ 24.25 + unsigned char nr_segments; /* number of segments */ 24.26 blkif_vdev_t device; /* only for read/write requests */ 24.27 unsigned long id; /* private guest value, echoed in resp */ 24.28 - xen_sector_t sector_number; /* start sector idx on disk (r/w only) */ 24.29 - /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ 24.30 - unsigned long buffer_and_sects[MAX_BLK_SEGS]; 24.31 + blkif_sector_t sector_number; /* start sector idx on disk (r/w only) */ 24.32 + /* Least 9 bits is 'nr_sects'. High 23 bits is the address. */ 24.33 + /* We must have '0 <= nr_sects <= BLKIF_MAX_SECTORS_PER_SEGMENT'. */ 24.34 + unsigned long buffer_and_sects[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 24.35 } blkif_request_t; 24.36 24.37 typedef struct { 24.38 @@ -59,8 +62,8 @@ typedef unsigned int BLKIF_RING_IDX; 24.39 #define MASK_BLKIF_IDX(_i) ((_i)&(BLKIF_RING_SIZE-1)) 24.40 24.41 typedef struct { 24.42 - BLKIF_RING_IDX req_prod; /* Request producer. Updated by guest OS. */ 24.43 - BLKIF_RING_IDX resp_prod; /* Response producer. Updated by Xen. */ 24.44 + BLKIF_RING_IDX req_prod; /* Request producer. Updated by front-end. */ 24.45 + BLKIF_RING_IDX resp_prod; /* Response producer. Updated by back-end. */ 24.46 union { 24.47 blkif_request_t req; 24.48 blkif_response_t resp; 24.49 @@ -103,7 +106,7 @@ typedef struct { 24.50 typedef struct { 24.51 blkif_vdev_t device; /* Device number (opaque 16 bit value). */ 24.52 unsigned short info; /* Device type and flags (VDISK_*). */ 24.53 - xen_sector_t capacity; /* Size in terms of 512-byte sectors. */ 24.54 + blkif_sector_t capacity; /* Size in terms of 512-byte sectors. */ 24.55 } vdisk_t; 24.56 24.57 #endif /* __SHARED_BLKIF_H__ */
25.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile Sun May 02 00:27:58 2004 +0000 25.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/Makefile Sun May 02 00:52:45 2004 +0000 25.3 @@ -1,3 +1,3 @@ 25.4 O_TARGET := drv.o 25.5 -obj-y := block.o vbd.o 25.6 +obj-y := main.o vbd.o 25.7 include $(TOPDIR)/Rules.make
26.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.c Sun May 02 00:27:58 2004 +0000 26.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 26.3 @@ -1,625 +0,0 @@ 26.4 -/****************************************************************************** 26.5 - * block.c 26.6 - * 26.7 - * Xenolinux virtual block-device driver. 26.8 - * 26.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 26.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 26.11 - */ 26.12 - 26.13 -#include "block.h" 26.14 -#include <linux/blk.h> 26.15 -#include <linux/cdrom.h> 26.16 -#include <linux/tqueue.h> 26.17 -#include <linux/sched.h> 26.18 -#include <scsi/scsi.h> 26.19 - 26.20 -#include <linux/interrupt.h> 26.21 - 26.22 -typedef unsigned char byte; /* from linux/ide.h */ 26.23 - 26.24 -#define STATE_ACTIVE 0 26.25 -#define STATE_SUSPENDED 1 26.26 -#define STATE_CLOSED 2 26.27 -static unsigned int state = STATE_SUSPENDED; 26.28 - 26.29 -/* Dynamically-mapped IRQs. */ 26.30 -static int xlblk_response_irq, xlblk_update_irq; 26.31 - 26.32 -static blk_ring_t *blk_ring; 26.33 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ 26.34 -static BLK_RING_IDX req_prod; /* Private request producer. */ 26.35 - 26.36 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */ 26.37 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ 26.38 - (state != STATE_ACTIVE)) 26.39 - 26.40 - 26.41 -/* 26.42 - * Request queues with outstanding work, but ring is currently full. 26.43 - * We need no special lock here, as we always access this with the 26.44 - * io_request_lock held. We only need a small maximum list. 26.45 - */ 26.46 -#define MAX_PENDING 8 26.47 -static request_queue_t *pending_queues[MAX_PENDING]; 26.48 -static int nr_pending; 26.49 - 26.50 -static kdev_t sg_dev; 26.51 -static int sg_operation = -1; 26.52 -static unsigned long sg_next_sect; 26.53 -#define DISABLE_SCATTERGATHER() (sg_operation = -1) 26.54 - 26.55 -static inline void signal_requests_to_xen(void) 26.56 -{ 26.57 - block_io_op_t op; 26.58 - 26.59 - DISABLE_SCATTERGATHER(); 26.60 - blk_ring->req_prod = req_prod; 26.61 - 26.62 - op.cmd = BLOCK_IO_OP_SIGNAL; 26.63 - HYPERVISOR_block_io_op(&op); 26.64 - return; 26.65 -} 26.66 - 26.67 - 26.68 -/* 26.69 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen 26.70 - * 26.71 - * Schedule a task for keventd to run, which will update the VBDs and perform 26.72 - * the corresponding updates to our view of VBD state, so the XenoLinux will 26.73 - * respond to changes / additions / deletions to the set of VBDs automatically. 26.74 - */ 26.75 -static struct tq_struct update_tq; 26.76 -static void update_vbds_task(void *unused) 26.77 -{ 26.78 - xlvbd_update_vbds(); 26.79 -} 26.80 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) 26.81 -{ 26.82 - update_tq.routine = update_vbds_task; 26.83 - schedule_task(&update_tq); 26.84 -} 26.85 - 26.86 - 26.87 -int xen_block_open(struct inode *inode, struct file *filep) 26.88 -{ 26.89 - short xldev = inode->i_rdev; 26.90 - struct gendisk *gd = get_gendisk(xldev); 26.91 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 26.92 - short minor = MINOR(xldev); 26.93 - 26.94 - if ( gd->part[minor].nr_sects == 0 ) 26.95 - { 26.96 - /* 26.97 - * Device either doesn't exist, or has zero capacity; we use a few 26.98 - * cheesy heuristics to return the relevant error code 26.99 - */ 26.100 - if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 26.101 - ((minor & (gd->max_p - 1)) != 0) ) 26.102 - { 26.103 - /* 26.104 - * We have a real device, but no such partition, or we just have a 26.105 - * partition number so guess this is the problem. 26.106 - */ 26.107 - return -ENXIO; /* no such device or address */ 26.108 - } 26.109 - else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 26.110 - { 26.111 - /* This is a removable device => assume that media is missing. */ 26.112 - return -ENOMEDIUM; /* media not present (this is a guess) */ 26.113 - } 26.114 - else 26.115 - { 26.116 - /* Just go for the general 'no such device' error. */ 26.117 - return -ENODEV; /* no such device */ 26.118 - } 26.119 - } 26.120 - 26.121 - /* Update of usage count is protected by per-device semaphore. */ 26.122 - disk->usage++; 26.123 - 26.124 - return 0; 26.125 -} 26.126 - 26.127 - 26.128 -int xen_block_release(struct inode *inode, struct file *filep) 26.129 -{ 26.130 - xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 26.131 - 26.132 - /* 26.133 - * When usage drops to zero it may allow more VBD updates to occur. 26.134 - * Update of usage count is protected by a per-device semaphore. 26.135 - */ 26.136 - if ( --disk->usage == 0 ) 26.137 - { 26.138 - update_tq.routine = update_vbds_task; 26.139 - schedule_task(&update_tq); 26.140 - } 26.141 - 26.142 - return 0; 26.143 -} 26.144 - 26.145 - 26.146 -int xen_block_ioctl(struct inode *inode, struct file *filep, 26.147 - unsigned command, unsigned long argument) 26.148 -{ 26.149 - kdev_t dev = inode->i_rdev; 26.150 - struct hd_geometry *geo = (struct hd_geometry *)argument; 26.151 - struct gendisk *gd; 26.152 - struct hd_struct *part; 26.153 - int i; 26.154 - 26.155 - /* NB. No need to check permissions. That is done for us. */ 26.156 - 26.157 - DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 26.158 - command, (long) argument, dev); 26.159 - 26.160 - gd = get_gendisk(dev); 26.161 - part = &gd->part[MINOR(dev)]; 26.162 - 26.163 - switch ( command ) 26.164 - { 26.165 - case BLKGETSIZE: 26.166 - DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 26.167 - return put_user(part->nr_sects, (unsigned long *) argument); 26.168 - 26.169 - case BLKGETSIZE64: 26.170 - DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 26.171 - (u64)part->nr_sects * 512); 26.172 - return put_user((u64)part->nr_sects * 512, (u64 *) argument); 26.173 - 26.174 - case BLKRRPART: /* re-read partition table */ 26.175 - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 26.176 - return xen_block_revalidate(dev); 26.177 - 26.178 - case BLKSSZGET: 26.179 - return hardsect_size[MAJOR(dev)][MINOR(dev)]; 26.180 - 26.181 - case BLKBSZGET: /* get block size */ 26.182 - DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 26.183 - break; 26.184 - 26.185 - case BLKBSZSET: /* set block size */ 26.186 - DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 26.187 - break; 26.188 - 26.189 - case BLKRASET: /* set read-ahead */ 26.190 - DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 26.191 - break; 26.192 - 26.193 - case BLKRAGET: /* get read-ahead */ 26.194 - DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 26.195 - break; 26.196 - 26.197 - case HDIO_GETGEO: 26.198 - /* note: these values are complete garbage */ 26.199 - DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 26.200 - if (!argument) return -EINVAL; 26.201 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 26.202 - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 26.203 - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 26.204 - if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; 26.205 - return 0; 26.206 - 26.207 - case HDIO_GETGEO_BIG: 26.208 - /* note: these values are complete garbage */ 26.209 - DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 26.210 - if (!argument) return -EINVAL; 26.211 - if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 26.212 - if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 26.213 - if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 26.214 - if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; 26.215 - return 0; 26.216 - 26.217 - case CDROMMULTISESSION: 26.218 - DPRINTK("FIXME: support multisession CDs later\n"); 26.219 - for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 26.220 - if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 26.221 - return 0; 26.222 - 26.223 - case SCSI_IOCTL_GET_BUS_NUMBER: 26.224 - DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev"); 26.225 - return -ENOSYS; 26.226 - 26.227 - default: 26.228 - printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command); 26.229 - return -ENOSYS; 26.230 - } 26.231 - 26.232 - return 0; 26.233 -} 26.234 - 26.235 -/* check media change: should probably do something here in some cases :-) */ 26.236 -int xen_block_check(kdev_t dev) 26.237 -{ 26.238 - DPRINTK("xen_block_check\n"); 26.239 - return 0; 26.240 -} 26.241 - 26.242 -int xen_block_revalidate(kdev_t dev) 26.243 -{ 26.244 - struct block_device *bd; 26.245 - struct gendisk *gd; 26.246 - xl_disk_t *disk; 26.247 - unsigned long capacity; 26.248 - int i, rc = 0; 26.249 - 26.250 - if ( (bd = bdget(dev)) == NULL ) 26.251 - return -EINVAL; 26.252 - 26.253 - /* 26.254 - * Update of partition info, and check of usage count, is protected 26.255 - * by the per-block-device semaphore. 26.256 - */ 26.257 - down(&bd->bd_sem); 26.258 - 26.259 - if ( ((gd = get_gendisk(dev)) == NULL) || 26.260 - ((disk = xldev_to_xldisk(dev)) == NULL) || 26.261 - ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 26.262 - { 26.263 - rc = -EINVAL; 26.264 - goto out; 26.265 - } 26.266 - 26.267 - if ( disk->usage > 1 ) 26.268 - { 26.269 - rc = -EBUSY; 26.270 - goto out; 26.271 - } 26.272 - 26.273 - /* Only reread partition table if VBDs aren't mapped to partitions. */ 26.274 - if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 26.275 - { 26.276 - for ( i = gd->max_p - 1; i >= 0; i-- ) 26.277 - { 26.278 - invalidate_device(dev+i, 1); 26.279 - gd->part[MINOR(dev+i)].start_sect = 0; 26.280 - gd->part[MINOR(dev+i)].nr_sects = 0; 26.281 - gd->sizes[MINOR(dev+i)] = 0; 26.282 - } 26.283 - 26.284 - grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 26.285 - } 26.286 - 26.287 - out: 26.288 - up(&bd->bd_sem); 26.289 - bdput(bd); 26.290 - return rc; 26.291 -} 26.292 - 26.293 - 26.294 -/* 26.295 - * hypervisor_request 26.296 - * 26.297 - * request block io 26.298 - * 26.299 - * id: for guest use only. 26.300 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*} 26.301 - * buffer: buffer to read/write into. this should be a 26.302 - * virtual address in the guest os. 26.303 - */ 26.304 -static int hypervisor_request(unsigned long id, 26.305 - int operation, 26.306 - char * buffer, 26.307 - unsigned long sector_number, 26.308 - unsigned short nr_sectors, 26.309 - kdev_t device) 26.310 -{ 26.311 - unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 26.312 - struct gendisk *gd; 26.313 - blk_ring_req_entry_t *req; 26.314 - struct buffer_head *bh; 26.315 - 26.316 - if ( unlikely(nr_sectors >= (1<<9)) ) 26.317 - BUG(); 26.318 - if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 26.319 - BUG(); 26.320 - 26.321 - if ( unlikely(state == STATE_CLOSED) ) 26.322 - return 1; 26.323 - 26.324 - switch ( operation ) 26.325 - { 26.326 - 26.327 - case XEN_BLOCK_READ: 26.328 - case XEN_BLOCK_WRITE: 26.329 - gd = get_gendisk(device); 26.330 - 26.331 - /* 26.332 - * Update the sector_number we'll pass down as appropriate; note that 26.333 - * we could sanity check that resulting sector will be in this 26.334 - * partition, but this will happen in xen anyhow. 26.335 - */ 26.336 - sector_number += gd->part[MINOR(device)].start_sect; 26.337 - 26.338 - /* 26.339 - * If this unit doesn't consist of virtual (i.e., Xen-specified) 26.340 - * partitions then we clear the partn bits from the device number. 26.341 - */ 26.342 - if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 26.343 - GENHD_FL_VIRT_PARTNS) ) 26.344 - device &= ~(gd->max_p - 1); 26.345 - 26.346 - if ( (sg_operation == operation) && 26.347 - (sg_dev == device) && 26.348 - (sg_next_sect == sector_number) ) 26.349 - { 26.350 - req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; 26.351 - bh = (struct buffer_head *)id; 26.352 - bh->b_reqnext = (struct buffer_head *)req->id; 26.353 - req->id = id; 26.354 - req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; 26.355 - if ( ++req->nr_segments < MAX_BLK_SEGS ) 26.356 - sg_next_sect += nr_sectors; 26.357 - else 26.358 - DISABLE_SCATTERGATHER(); 26.359 - return 0; 26.360 - } 26.361 - else if ( RING_PLUGGED ) 26.362 - { 26.363 - return 1; 26.364 - } 26.365 - else 26.366 - { 26.367 - sg_operation = operation; 26.368 - sg_dev = device; 26.369 - sg_next_sect = sector_number + nr_sectors; 26.370 - } 26.371 - break; 26.372 - 26.373 - default: 26.374 - panic("unknown op %d\n", operation); 26.375 - } 26.376 - 26.377 - /* Fill out a communications ring structure. */ 26.378 - req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; 26.379 - req->id = id; 26.380 - req->operation = operation; 26.381 - req->sector_number = (xen_sector_t)sector_number; 26.382 - req->device = device; 26.383 - req->nr_segments = 1; 26.384 - req->buffer_and_sects[0] = buffer_ma | nr_sectors; 26.385 - req_prod++; 26.386 - 26.387 - return 0; 26.388 -} 26.389 - 26.390 - 26.391 -/* 26.392 - * do_xlblk_request 26.393 - * read a block; request is in a request queue 26.394 - */ 26.395 -void do_xlblk_request(request_queue_t *rq) 26.396 -{ 26.397 - struct request *req; 26.398 - struct buffer_head *bh, *next_bh; 26.399 - int rw, nsect, full, queued = 0; 26.400 - 26.401 - DPRINTK("xlblk.c::do_xlblk_request\n"); 26.402 - 26.403 - while ( !rq->plugged && !list_empty(&rq->queue_head)) 26.404 - { 26.405 - if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 26.406 - goto out; 26.407 - 26.408 - DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 26.409 - req, req->cmd, req->sector, 26.410 - req->current_nr_sectors, req->nr_sectors, req->bh); 26.411 - 26.412 - rw = req->cmd; 26.413 - if ( rw == READA ) 26.414 - rw = READ; 26.415 - if ( unlikely((rw != READ) && (rw != WRITE)) ) 26.416 - panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 26.417 - 26.418 - req->errors = 0; 26.419 - 26.420 - bh = req->bh; 26.421 - while ( bh != NULL ) 26.422 - { 26.423 - next_bh = bh->b_reqnext; 26.424 - bh->b_reqnext = NULL; 26.425 - 26.426 - full = hypervisor_request( 26.427 - (unsigned long)bh, 26.428 - (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 26.429 - bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 26.430 - 26.431 - if ( full ) 26.432 - { 26.433 - bh->b_reqnext = next_bh; 26.434 - pending_queues[nr_pending++] = rq; 26.435 - if ( unlikely(nr_pending >= MAX_PENDING) ) 26.436 - BUG(); 26.437 - goto out; 26.438 - } 26.439 - 26.440 - queued++; 26.441 - 26.442 - /* Dequeue the buffer head from the request. */ 26.443 - nsect = bh->b_size >> 9; 26.444 - bh = req->bh = next_bh; 26.445 - 26.446 - if ( bh != NULL ) 26.447 - { 26.448 - /* There's another buffer head to do. Update the request. */ 26.449 - req->hard_sector += nsect; 26.450 - req->hard_nr_sectors -= nsect; 26.451 - req->sector = req->hard_sector; 26.452 - req->nr_sectors = req->hard_nr_sectors; 26.453 - req->current_nr_sectors = bh->b_size >> 9; 26.454 - req->buffer = bh->b_data; 26.455 - } 26.456 - else 26.457 - { 26.458 - /* That was the last buffer head. Finalise the request. */ 26.459 - if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 26.460 - BUG(); 26.461 - blkdev_dequeue_request(req); 26.462 - end_that_request_last(req); 26.463 - } 26.464 - } 26.465 - } 26.466 - 26.467 - out: 26.468 - if ( queued != 0 ) signal_requests_to_xen(); 26.469 -} 26.470 - 26.471 - 26.472 -static void kick_pending_request_queues(void) 26.473 -{ 26.474 - /* We kick pending request queues if the ring is reasonably empty. */ 26.475 - if ( (nr_pending != 0) && 26.476 - ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) 26.477 - { 26.478 - /* Attempt to drain the queue, but bail if the ring becomes full. */ 26.479 - while ( (nr_pending != 0) && !RING_PLUGGED ) 26.480 - do_xlblk_request(pending_queues[--nr_pending]); 26.481 - } 26.482 -} 26.483 - 26.484 - 26.485 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) 26.486 -{ 26.487 - BLK_RING_IDX i; 26.488 - unsigned long flags; 26.489 - struct buffer_head *bh, *next_bh; 26.490 - 26.491 - if ( unlikely(state == STATE_CLOSED) ) 26.492 - return; 26.493 - 26.494 - spin_lock_irqsave(&io_request_lock, flags); 26.495 - 26.496 - for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) 26.497 - { 26.498 - blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; 26.499 - switch ( bret->operation ) 26.500 - { 26.501 - case XEN_BLOCK_READ: 26.502 - case XEN_BLOCK_WRITE: 26.503 - if ( unlikely(bret->status != 0) ) 26.504 - DPRINTK("Bad return from blkdev data request: %lx\n", 26.505 - bret->status); 26.506 - for ( bh = (struct buffer_head *)bret->id; 26.507 - bh != NULL; 26.508 - bh = next_bh ) 26.509 - { 26.510 - next_bh = bh->b_reqnext; 26.511 - bh->b_reqnext = NULL; 26.512 - bh->b_end_io(bh, !bret->status); 26.513 - } 26.514 - break; 26.515 - 26.516 - default: 26.517 - BUG(); 26.518 - } 26.519 - } 26.520 - 26.521 - resp_cons = i; 26.522 - 26.523 - kick_pending_request_queues(); 26.524 - 26.525 - spin_unlock_irqrestore(&io_request_lock, flags); 26.526 -} 26.527 - 26.528 - 26.529 -static void reset_xlblk_interface(void) 26.530 -{ 26.531 - block_io_op_t op; 26.532 - 26.533 - nr_pending = 0; 26.534 - 26.535 - op.cmd = BLOCK_IO_OP_RESET; 26.536 - if ( HYPERVISOR_block_io_op(&op) != 0 ) 26.537 - printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n"); 26.538 - 26.539 - op.cmd = BLOCK_IO_OP_RING_ADDRESS; 26.540 - (void)HYPERVISOR_block_io_op(&op); 26.541 - 26.542 - set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT); 26.543 - blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE); 26.544 - blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; 26.545 - 26.546 - wmb(); 26.547 - state = STATE_ACTIVE; 26.548 -} 26.549 - 26.550 - 26.551 -int __init xlblk_init(void) 26.552 -{ 26.553 - int error; 26.554 - 26.555 - reset_xlblk_interface(); 26.556 - 26.557 - xlblk_response_irq = bind_virq_to_irq(VIRQ_BLKDEV); 26.558 - xlblk_update_irq = bind_virq_to_irq(VIRQ_VBD_UPD); 26.559 - 26.560 - error = request_irq(xlblk_response_irq, xlblk_response_int, 26.561 - SA_SAMPLE_RANDOM, "blkdev", NULL); 26.562 - if ( error ) 26.563 - { 26.564 - printk(KERN_ALERT "Could not allocate receive interrupt\n"); 26.565 - goto fail; 26.566 - } 26.567 - 26.568 - error = request_irq(xlblk_update_irq, xlblk_update_int, 26.569 - 0, "blkdev", NULL); 26.570 - 26.571 - if ( error ) 26.572 - { 26.573 - printk(KERN_ALERT "Could not allocate block update interrupt\n"); 26.574 - goto fail; 26.575 - } 26.576 - 26.577 - (void)xlvbd_init(); 26.578 - 26.579 - return 0; 26.580 - 26.581 - fail: 26.582 - return error; 26.583 -} 26.584 - 26.585 - 26.586 -static void __exit xlblk_cleanup(void) 26.587 -{ 26.588 - xlvbd_cleanup(); 26.589 - free_irq(xlblk_response_irq, NULL); 26.590 - free_irq(xlblk_update_irq, NULL); 26.591 - unbind_virq_from_irq(VIRQ_BLKDEV); 26.592 - unbind_virq_from_irq(VIRQ_VBD_UPD); 26.593 -} 26.594 - 26.595 - 26.596 -#ifdef MODULE 26.597 -module_init(xlblk_init); 26.598 -module_exit(xlblk_cleanup); 26.599 -#endif 26.600 - 26.601 - 26.602 -void blkdev_suspend(void) 26.603 -{ 26.604 - state = STATE_SUSPENDED; 26.605 - wmb(); 26.606 - 26.607 - while ( resp_cons != blk_ring->req_prod ) 26.608 - { 26.609 - barrier(); 26.610 - current->state = TASK_INTERRUPTIBLE; 26.611 - schedule_timeout(1); 26.612 - } 26.613 - 26.614 - wmb(); 26.615 - state = STATE_CLOSED; 26.616 - wmb(); 26.617 - 26.618 - clear_fixmap(FIX_BLKRING_BASE); 26.619 -} 26.620 - 26.621 - 26.622 -void blkdev_resume(void) 26.623 -{ 26.624 - reset_xlblk_interface(); 26.625 - spin_lock_irq(&io_request_lock); 26.626 - kick_pending_request_queues(); 26.627 - spin_unlock_irq(&io_request_lock); 26.628 -}
27.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/block.h Sun May 02 00:27:58 2004 +0000 27.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 27.3 @@ -1,82 +0,0 @@ 27.4 -/****************************************************************************** 27.5 - * block.h 27.6 - * 27.7 - * Shared definitions between all levels of XenoLinux Virtual block devices. 27.8 - */ 27.9 - 27.10 -#ifndef __XEN_DRIVERS_BLOCK_H__ 27.11 -#define __XEN_DRIVERS_BLOCK_H__ 27.12 - 27.13 -#include <linux/config.h> 27.14 -#include <linux/module.h> 27.15 - 27.16 -#include <linux/kernel.h> 27.17 -#include <linux/sched.h> 27.18 -#include <linux/slab.h> 27.19 -#include <linux/string.h> 27.20 -#include <linux/errno.h> 27.21 - 27.22 -#include <linux/fs.h> 27.23 -#include <linux/hdreg.h> 27.24 -#include <linux/blkdev.h> 27.25 -#include <linux/major.h> 27.26 - 27.27 -#include <asm/hypervisor-ifs/hypervisor-if.h> 27.28 -#include <asm/hypervisor-ifs/vbd.h> 27.29 -#include <asm/io.h> 27.30 -#include <asm/atomic.h> 27.31 -#include <asm/uaccess.h> 27.32 - 27.33 -#if 0 27.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 27.35 -#else 27.36 -#define DPRINTK(_f, _a...) ((void)0) 27.37 -#endif 27.38 - 27.39 -#if 0 27.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 27.41 -#else 27.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0) 27.43 -#endif 27.44 - 27.45 -/* Private gendisk->flags[] values. */ 27.46 -#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ 27.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ 27.48 - 27.49 -/* 27.50 - * We have one of these per vbd, whether ide, scsi or 'other'. 27.51 - * They hang in an array off the gendisk structure. We may end up putting 27.52 - * all kinds of interesting stuff here :-) 27.53 - */ 27.54 -typedef struct xl_disk { 27.55 - int usage; 27.56 -} xl_disk_t; 27.57 - 27.58 -extern int xen_control_msg(int operration, char *buffer, int size); 27.59 -extern int xen_block_open(struct inode *inode, struct file *filep); 27.60 -extern int xen_block_release(struct inode *inode, struct file *filep); 27.61 -extern int xen_block_ioctl(struct inode *inode, struct file *filep, 27.62 - unsigned command, unsigned long argument); 27.63 -extern int xen_block_check(kdev_t dev); 27.64 -extern int xen_block_revalidate(kdev_t dev); 27.65 -extern void do_xlblk_request (request_queue_t *rq); 27.66 - 27.67 -extern void xlvbd_update_vbds(void); 27.68 - 27.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) 27.70 -{ 27.71 - struct gendisk *gd = get_gendisk(xldev); 27.72 - 27.73 - if ( gd == NULL ) 27.74 - return NULL; 27.75 - 27.76 - return (xl_disk_t *)gd->real_devices + 27.77 - (MINOR(xldev) >> gd->minor_shift); 27.78 -} 27.79 - 27.80 - 27.81 -/* Virtual block-device subsystem. */ 27.82 -extern int xlvbd_init(void); 27.83 -extern void xlvbd_cleanup(void); 27.84 - 27.85 -#endif /* __XEN_DRIVERS_BLOCK_H__ */
28.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 28.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/common.h Sun May 02 00:52:45 2004 +0000 28.3 @@ -0,0 +1,84 @@ 28.4 +/****************************************************************************** 28.5 + * arch/xen/drivers/blkif/frontend/common.h 28.6 + * 28.7 + * Shared definitions between all levels of XenoLinux Virtual block devices. 28.8 + */ 28.9 + 28.10 +#ifndef __XEN_DRIVERS_COMMON_H__ 28.11 +#define __XEN_DRIVERS_COMMON_H__ 28.12 + 28.13 +#include <linux/config.h> 28.14 +#include <linux/module.h> 28.15 + 28.16 +#include <linux/kernel.h> 28.17 +#include <linux/sched.h> 28.18 +#include <linux/slab.h> 28.19 +#include <linux/string.h> 28.20 +#include <linux/errno.h> 28.21 + 28.22 +#include <linux/fs.h> 28.23 +#include <linux/hdreg.h> 28.24 +#include <linux/blkdev.h> 28.25 +#include <linux/major.h> 28.26 + 28.27 +#include <asm/hypervisor-ifs/hypervisor-if.h> 28.28 +#include <asm/hypervisor-ifs/vbd.h> 28.29 +#include <asm/io.h> 28.30 +#include <asm/atomic.h> 28.31 +#include <asm/uaccess.h> 28.32 + 28.33 +#include "../blkif.h" 28.34 + 28.35 +#if 0 28.36 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 28.37 +#else 28.38 +#define DPRINTK(_f, _a...) ((void)0) 28.39 +#endif 28.40 + 28.41 +#if 0 28.42 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a ) 28.43 +#else 28.44 +#define DPRINTK_IOCTL(_f, _a...) ((void)0) 28.45 +#endif 28.46 + 28.47 +/* Private gendisk->flags[] values. */ 28.48 +#define GENHD_FL_XEN 2 /* Is unit a Xen block device? */ 28.49 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */ 28.50 + 28.51 +/* 28.52 + * We have one of these per vbd, whether ide, scsi or 'other'. 28.53 + * They hang in an array off the gendisk structure. We may end up putting 28.54 + * all kinds of interesting stuff here :-) 28.55 + */ 28.56 +typedef struct xl_disk { 28.57 + int usage; 28.58 +} xl_disk_t; 28.59 + 28.60 +extern int blkif_open(struct inode *inode, struct file *filep); 28.61 +extern int blkif_release(struct inode *inode, struct file *filep); 28.62 +extern int blkif_ioctl(struct inode *inode, struct file *filep, 28.63 + unsigned command, unsigned long argument); 28.64 +extern int blkif_check(kdev_t dev); 28.65 +extern int blkif_revalidate(kdev_t dev); 28.66 +extern void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp); 28.67 +extern void do_blkif_request (request_queue_t *rq); 28.68 + 28.69 +extern void xlvbd_update_vbds(void); 28.70 + 28.71 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) 28.72 +{ 28.73 + struct gendisk *gd = get_gendisk(xldev); 28.74 + 28.75 + if ( gd == NULL ) 28.76 + return NULL; 28.77 + 28.78 + return (xl_disk_t *)gd->real_devices + 28.79 + (MINOR(xldev) >> gd->minor_shift); 28.80 +} 28.81 + 28.82 + 28.83 +/* Virtual block-device subsystem. */ 28.84 +extern int xlvbd_init(void); 28.85 +extern void xlvbd_cleanup(void); 28.86 + 28.87 +#endif /* __XEN_DRIVERS_COMMON_H__ */
29.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 29.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/main.c Sun May 02 00:52:45 2004 +0000 29.3 @@ -0,0 +1,727 @@ 29.4 +/****************************************************************************** 29.5 + * arch/xen/drivers/blkif/frontend/main.c 29.6 + * 29.7 + * Xenolinux virtual block-device driver. 29.8 + * 29.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 29.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 29.11 + */ 29.12 + 29.13 +#include "common.h" 29.14 +#include <linux/blk.h> 29.15 +#include <linux/cdrom.h> 29.16 +#include <linux/tqueue.h> 29.17 +#include <linux/sched.h> 29.18 +#include <linux/interrupt.h> 29.19 +#include <scsi/scsi.h> 29.20 +#include <asm/ctrl_if.h> 29.21 + 29.22 +typedef unsigned char byte; /* from linux/ide.h */ 29.23 + 29.24 +#define BLKIF_STATE_CLOSED 0 29.25 +#define BLKIF_STATE_DISCONNECTED 1 29.26 +#define BLKIF_STATE_CONNECTED 2 29.27 +static unsigned int blkif_state = BLKIF_STATE_CLOSED; 29.28 +static unsigned int blkif_evtchn, blkif_irq; 29.29 + 29.30 +static struct tq_struct blkif_statechange_tq; 29.31 + 29.32 +static int blkif_control_rsp_valid; 29.33 +static blkif_response_t blkif_control_rsp; 29.34 + 29.35 +static blkif_ring_t *blk_ring; 29.36 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ 29.37 +static BLK_RING_IDX req_prod; /* Private request producer. */ 29.38 + 29.39 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */ 29.40 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ 29.41 + (blkif_state != BLKIF_STATE_CONNECTED)) 29.42 + 29.43 + 29.44 +/* 29.45 + * Request queues with outstanding work, but ring is currently full. 29.46 + * We need no special lock here, as we always access this with the 29.47 + * io_request_lock held. We only need a small maximum list. 29.48 + */ 29.49 +#define MAX_PENDING 8 29.50 +static request_queue_t *pending_queues[MAX_PENDING]; 29.51 +static int nr_pending; 29.52 + 29.53 +static kdev_t sg_dev; 29.54 +static int sg_operation = -1; 29.55 +static unsigned long sg_next_sect; 29.56 +#define DISABLE_SCATTERGATHER() (sg_operation = -1) 29.57 + 29.58 +static inline void flush_requests(void) 29.59 +{ 29.60 + DISABLE_SCATTERGATHER(); 29.61 + blk_ring->req_prod = req_prod; 29.62 + notify_via_evtchn(blkif_evtchn); 29.63 +} 29.64 + 29.65 + 29.66 +/* 29.67 + * blkif_update_int/update-vbds_task - handle VBD update events. 29.68 + * Schedule a task for keventd to run, which will update the VBDs and perform 29.69 + * the corresponding updates to our view of VBD state. 29.70 + */ 29.71 +static struct tq_struct update_tq; 29.72 +static void update_vbds_task(void *unused) 29.73 +{ 29.74 + xlvbd_update_vbds(); 29.75 +} 29.76 + 29.77 + 29.78 +int blkif_open(struct inode *inode, struct file *filep) 29.79 +{ 29.80 + short xldev = inode->i_rdev; 29.81 + struct gendisk *gd = get_gendisk(xldev); 29.82 + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 29.83 + short minor = MINOR(xldev); 29.84 + 29.85 + if ( gd->part[minor].nr_sects == 0 ) 29.86 + { 29.87 + /* 29.88 + * Device either doesn't exist, or has zero capacity; we use a few 29.89 + * cheesy heuristics to return the relevant error code 29.90 + */ 29.91 + if ( (gd->sizes[minor >> gd->minor_shift] != 0) || 29.92 + ((minor & (gd->max_p - 1)) != 0) ) 29.93 + { 29.94 + /* 29.95 + * We have a real device, but no such partition, or we just have a 29.96 + * partition number so guess this is the problem. 29.97 + */ 29.98 + return -ENXIO; /* no such device or address */ 29.99 + } 29.100 + else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE ) 29.101 + { 29.102 + /* This is a removable device => assume that media is missing. */ 29.103 + return -ENOMEDIUM; /* media not present (this is a guess) */ 29.104 + } 29.105 + else 29.106 + { 29.107 + /* Just go for the general 'no such device' error. */ 29.108 + return -ENODEV; /* no such device */ 29.109 + } 29.110 + } 29.111 + 29.112 + /* Update of usage count is protected by per-device semaphore. */ 29.113 + disk->usage++; 29.114 + 29.115 + return 0; 29.116 +} 29.117 + 29.118 + 29.119 +int blkif_release(struct inode *inode, struct file *filep) 29.120 +{ 29.121 + xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); 29.122 + 29.123 + /* 29.124 + * When usage drops to zero it may allow more VBD updates to occur. 29.125 + * Update of usage count is protected by a per-device semaphore. 29.126 + */ 29.127 + if ( --disk->usage == 0 ) 29.128 + { 29.129 +#if 0 29.130 + update_tq.routine = update_vbds_task; 29.131 + schedule_task(&update_tq); 29.132 +#endif 29.133 + } 29.134 + 29.135 + return 0; 29.136 +} 29.137 + 29.138 + 29.139 +int blkif_ioctl(struct inode *inode, struct file *filep, 29.140 + unsigned command, unsigned long argument) 29.141 +{ 29.142 + kdev_t dev = inode->i_rdev; 29.143 + struct hd_geometry *geo = (struct hd_geometry *)argument; 29.144 + struct gendisk *gd; 29.145 + struct hd_struct *part; 29.146 + int i; 29.147 + 29.148 + /* NB. No need to check permissions. That is done for us. */ 29.149 + 29.150 + DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", 29.151 + command, (long) argument, dev); 29.152 + 29.153 + gd = get_gendisk(dev); 29.154 + part = &gd->part[MINOR(dev)]; 29.155 + 29.156 + switch ( command ) 29.157 + { 29.158 + case BLKGETSIZE: 29.159 + DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 29.160 + return put_user(part->nr_sects, (unsigned long *) argument); 29.161 + 29.162 + case BLKGETSIZE64: 29.163 + DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64, 29.164 + (u64)part->nr_sects * 512); 29.165 + return put_user((u64)part->nr_sects * 512, (u64 *) argument); 29.166 + 29.167 + case BLKRRPART: /* re-read partition table */ 29.168 + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); 29.169 + return blkif_revalidate(dev); 29.170 + 29.171 + case BLKSSZGET: 29.172 + return hardsect_size[MAJOR(dev)][MINOR(dev)]; 29.173 + 29.174 + case BLKBSZGET: /* get block size */ 29.175 + DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET); 29.176 + break; 29.177 + 29.178 + case BLKBSZSET: /* set block size */ 29.179 + DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET); 29.180 + break; 29.181 + 29.182 + case BLKRASET: /* set read-ahead */ 29.183 + DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET); 29.184 + break; 29.185 + 29.186 + case BLKRAGET: /* get read-ahead */ 29.187 + DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET); 29.188 + break; 29.189 + 29.190 + case HDIO_GETGEO: 29.191 + /* note: these values are complete garbage */ 29.192 + DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO); 29.193 + if (!argument) return -EINVAL; 29.194 + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 29.195 + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 29.196 + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 29.197 + if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT; 29.198 + return 0; 29.199 + 29.200 + case HDIO_GETGEO_BIG: 29.201 + /* note: these values are complete garbage */ 29.202 + DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG); 29.203 + if (!argument) return -EINVAL; 29.204 + if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT; 29.205 + if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT; 29.206 + if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT; 29.207 + if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT; 29.208 + return 0; 29.209 + 29.210 + case CDROMMULTISESSION: 29.211 + DPRINTK("FIXME: support multisession CDs later\n"); 29.212 + for ( i = 0; i < sizeof(struct cdrom_multisession); i++ ) 29.213 + if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT; 29.214 + return 0; 29.215 + 29.216 + case SCSI_IOCTL_GET_BUS_NUMBER: 29.217 + DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif"); 29.218 + return -ENOSYS; 29.219 + 29.220 + default: 29.221 + printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command); 29.222 + return -ENOSYS; 29.223 + } 29.224 + 29.225 + return 0; 29.226 +} 29.227 + 29.228 +/* check media change: should probably do something here in some cases :-) */ 29.229 +int blkif_check(kdev_t dev) 29.230 +{ 29.231 + DPRINTK("blkif_check\n"); 29.232 + return 0; 29.233 +} 29.234 + 29.235 +int blkif_revalidate(kdev_t dev) 29.236 +{ 29.237 + struct block_device *bd; 29.238 + struct gendisk *gd; 29.239 + xl_disk_t *disk; 29.240 + unsigned long capacity; 29.241 + int i, rc = 0; 29.242 + 29.243 + if ( (bd = bdget(dev)) == NULL ) 29.244 + return -EINVAL; 29.245 + 29.246 + /* 29.247 + * Update of partition info, and check of usage count, is protected 29.248 + * by the per-block-device semaphore. 29.249 + */ 29.250 + down(&bd->bd_sem); 29.251 + 29.252 + if ( ((gd = get_gendisk(dev)) == NULL) || 29.253 + ((disk = xldev_to_xldisk(dev)) == NULL) || 29.254 + ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) ) 29.255 + { 29.256 + rc = -EINVAL; 29.257 + goto out; 29.258 + } 29.259 + 29.260 + if ( disk->usage > 1 ) 29.261 + { 29.262 + rc = -EBUSY; 29.263 + goto out; 29.264 + } 29.265 + 29.266 + /* Only reread partition table if VBDs aren't mapped to partitions. */ 29.267 + if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) ) 29.268 + { 29.269 + for ( i = gd->max_p - 1; i >= 0; i-- ) 29.270 + { 29.271 + invalidate_device(dev+i, 1); 29.272 + gd->part[MINOR(dev+i)].start_sect = 0; 29.273 + gd->part[MINOR(dev+i)].nr_sects = 0; 29.274 + gd->sizes[MINOR(dev+i)] = 0; 29.275 + } 29.276 + 29.277 + grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity); 29.278 + } 29.279 + 29.280 + out: 29.281 + up(&bd->bd_sem); 29.282 + bdput(bd); 29.283 + return rc; 29.284 +} 29.285 + 29.286 + 29.287 +/* 29.288 + * blkif_queue_request 29.289 + * 29.290 + * request block io 29.291 + * 29.292 + * id: for guest use only. 29.293 + * operation: BLKIF_OP_{READ,WRITE,PROBE} 29.294 + * buffer: buffer to read/write into. this should be a 29.295 + * virtual address in the guest os. 29.296 + */ 29.297 +static int blkif_queue_request(unsigned long id, 29.298 + int operation, 29.299 + char * buffer, 29.300 + unsigned long sector_number, 29.301 + unsigned short nr_sectors, 29.302 + kdev_t device) 29.303 +{ 29.304 + unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 29.305 + struct gendisk *gd; 29.306 + blkif_request_t *req; 29.307 + struct buffer_head *bh; 29.308 + 29.309 + if ( unlikely(nr_sectors >= (1<<9)) ) 29.310 + BUG(); 29.311 + if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) ) 29.312 + BUG(); 29.313 + 29.314 + if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) ) 29.315 + return 1; 29.316 + 29.317 + switch ( operation ) 29.318 + { 29.319 + 29.320 + case BLKIF_OP_READ: 29.321 + case BLKIF_OP_WRITE: 29.322 + gd = get_gendisk(device); 29.323 + 29.324 + /* 29.325 + * Update the sector_number we'll pass down as appropriate; note that 29.326 + * we could sanity check that resulting sector will be in this 29.327 + * partition, but this will happen in driver backend anyhow. 29.328 + */ 29.329 + sector_number += gd->part[MINOR(device)].start_sect; 29.330 + 29.331 + /* 29.332 + * If this unit doesn't consist of virtual partitions then we clear 29.333 + * the partn bits from the device number. 29.334 + */ 29.335 + if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 29.336 + GENHD_FL_VIRT_PARTNS) ) 29.337 + device &= ~(gd->max_p - 1); 29.338 + 29.339 + if ( (sg_operation == operation) && 29.340 + (sg_dev == device) && 29.341 + (sg_next_sect == sector_number) ) 29.342 + { 29.343 + req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req; 29.344 + bh = (struct buffer_head *)id; 29.345 + bh->b_reqnext = (struct buffer_head *)req->id; 29.346 + req->id = id; 29.347 + req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors; 29.348 + if ( ++req->nr_segments < MAX_BLK_SEGS ) 29.349 + sg_next_sect += nr_sectors; 29.350 + else 29.351 + DISABLE_SCATTERGATHER(); 29.352 + return 0; 29.353 + } 29.354 + else if ( RING_PLUGGED ) 29.355 + { 29.356 + return 1; 29.357 + } 29.358 + else 29.359 + { 29.360 + sg_operation = operation; 29.361 + sg_dev = device; 29.362 + sg_next_sect = sector_number + nr_sectors; 29.363 + } 29.364 + break; 29.365 + 29.366 + default: 29.367 + panic("unknown op %d\n", operation); 29.368 + } 29.369 + 29.370 + /* Fill out a communications ring structure. */ 29.371 + req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req; 29.372 + req->id = id; 29.373 + req->operation = operation; 29.374 + req->sector_number = (blkif_sector_t)sector_number; 29.375 + req->device = device; 29.376 + req->nr_segments = 1; 29.377 + req->buffer_and_sects[0] = buffer_ma | nr_sectors; 29.378 + req_prod++; 29.379 + 29.380 + return 0; 29.381 +} 29.382 + 29.383 + 29.384 +/* 29.385 + * do_blkif_request 29.386 + * read a block; request is in a request queue 29.387 + */ 29.388 +void do_blkif_request(request_queue_t *rq) 29.389 +{ 29.390 + struct request *req; 29.391 + struct buffer_head *bh, *next_bh; 29.392 + int rw, nsect, full, queued = 0; 29.393 + 29.394 + DPRINTK("Entered do_blkif_request\n"); 29.395 + 29.396 + while ( !rq->plugged && !list_empty(&rq->queue_head)) 29.397 + { 29.398 + if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 29.399 + goto out; 29.400 + 29.401 + DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n", 29.402 + req, req->cmd, req->sector, 29.403 + req->current_nr_sectors, req->nr_sectors, req->bh); 29.404 + 29.405 + rw = req->cmd; 29.406 + if ( rw == READA ) 29.407 + rw = READ; 29.408 + if ( unlikely((rw != READ) && (rw != WRITE)) ) 29.409 + panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw); 29.410 + 29.411 + req->errors = 0; 29.412 + 29.413 + bh = req->bh; 29.414 + while ( bh != NULL ) 29.415 + { 29.416 + next_bh = bh->b_reqnext; 29.417 + bh->b_reqnext = NULL; 29.418 + 29.419 + full = blkif_queue_request( 29.420 + (unsigned long)bh, 29.421 + (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE, 29.422 + bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev); 29.423 + 29.424 + if ( full ) 29.425 + { 29.426 + bh->b_reqnext = next_bh; 29.427 + pending_queues[nr_pending++] = rq; 29.428 + if ( unlikely(nr_pending >= MAX_PENDING) ) 29.429 + BUG(); 29.430 + goto out; 29.431 + } 29.432 + 29.433 + queued++; 29.434 + 29.435 + /* Dequeue the buffer head from the request. */ 29.436 + nsect = bh->b_size >> 9; 29.437 + bh = req->bh = next_bh; 29.438 + 29.439 + if ( bh != NULL ) 29.440 + { 29.441 + /* There's another buffer head to do. Update the request. */ 29.442 + req->hard_sector += nsect; 29.443 + req->hard_nr_sectors -= nsect; 29.444 + req->sector = req->hard_sector; 29.445 + req->nr_sectors = req->hard_nr_sectors; 29.446 + req->current_nr_sectors = bh->b_size >> 9; 29.447 + req->buffer = bh->b_data; 29.448 + } 29.449 + else 29.450 + { 29.451 + /* That was the last buffer head. Finalise the request. */ 29.452 + if ( unlikely(end_that_request_first(req, 1, "XenBlk")) ) 29.453 + BUG(); 29.454 + blkdev_dequeue_request(req); 29.455 + end_that_request_last(req); 29.456 + } 29.457 + } 29.458 + } 29.459 + 29.460 + out: 29.461 + if ( queued != 0 ) 29.462 + flush_requests(); 29.463 +} 29.464 + 29.465 + 29.466 +static void kick_pending_request_queues(void) 29.467 +{ 29.468 + /* We kick pending request queues if the ring is reasonably empty. */ 29.469 + if ( (nr_pending != 0) && 29.470 + ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) ) 29.471 + { 29.472 + /* Attempt to drain the queue, but bail if the ring becomes full. */ 29.473 + while ( (nr_pending != 0) && !RING_PLUGGED ) 29.474 + do_blkif_request(pending_queues[--nr_pending]); 29.475 + } 29.476 +} 29.477 + 29.478 + 29.479 +static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs) 29.480 +{ 29.481 + BLK_RING_IDX i; 29.482 + unsigned long flags; 29.483 + struct buffer_head *bh, *next_bh; 29.484 + 29.485 + if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ) 29.486 + return; 29.487 + 29.488 + spin_lock_irqsave(&io_request_lock, flags); 29.489 + 29.490 + for ( i = resp_cons; i != blk_ring->resp_prod; i++ ) 29.491 + { 29.492 + blkif_response_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp; 29.493 + switch ( bret->operation ) 29.494 + { 29.495 + case BLKIF_OP_READ: 29.496 + case BLKIF_OP_WRITE: 29.497 + if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) 29.498 + DPRINTK("Bad return from blkdev data request: %lx\n", 29.499 + bret->status); 29.500 + for ( bh = (struct buffer_head *)bret->id; 29.501 + bh != NULL; 29.502 + bh = next_bh ) 29.503 + { 29.504 + next_bh = bh->b_reqnext; 29.505 + bh->b_reqnext = NULL; 29.506 + bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY); 29.507 + } 29.508 + break; 29.509 + case BLKIF_OP_PROBE: 29.510 + memcpy(&blkif_control_rsp, bret, sizeof(*bret)); 29.511 + blkif_control_rsp_valid = 1; 29.512 + break; 29.513 + default: 29.514 + BUG(); 29.515 + } 29.516 + } 29.517 + 29.518 + resp_cons = i; 29.519 + 29.520 + kick_pending_request_queues(); 29.521 + 29.522 + spin_unlock_irqrestore(&io_request_lock, flags); 29.523 +} 29.524 + 29.525 + 29.526 +void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp) 29.527 +{ 29.528 + unsigned long flags; 29.529 + 29.530 + retry: 29.531 + while ( (req_prod - resp_cons) == BLK_RING_SIZE ) 29.532 + { 29.533 + set_current_state(TASK_INTERRUPTIBLE); 29.534 + schedule_timeout(1); 29.535 + } 29.536 + 29.537 + spin_lock_irqsave(&io_request_lock, flags); 29.538 + if ( (req_prod - resp_cons) == BLK_RING_SIZE ) 29.539 + { 29.540 + spin_unlock_irqrestore(&io_request_lock, flags); 29.541 + goto retry; 29.542 + } 29.543 + 29.544 + DISABLE_SCATTERGATHER(); 29.545 + memcpy(&blk_ring->ring[MASK_BLK_IDX(req_prod)].req, req, sizeof(*req)); 29.546 + req_prod++; 29.547 + flush_requests(); 29.548 + 29.549 + spin_unlock_irqrestore(&io_request_lock, flags); 29.550 + 29.551 + while ( !blkif_control_rsp_valid ) 29.552 + { 29.553 + set_current_state(TASK_INTERRUPTIBLE); 29.554 + schedule_timeout(1); 29.555 + } 29.556 + 29.557 + memcpy(rsp, &blkif_control_rsp, sizeof(*rsp)); 29.558 + blkif_control_rsp_valid = 0; 29.559 +} 29.560 + 29.561 + 29.562 +static void blkif_bringup_phase1(void *unused) 29.563 +{ 29.564 + ctrl_msg_t cmsg; 29.565 + blkif_fe_interface_connect_t up; 29.566 + 29.567 + /* Move from CLOSED to DISCONNECTED state. */ 29.568 + blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); 29.569 + blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0; 29.570 + blkif_state = BLKIF_STATE_DISCONNECTED; 29.571 + 29.572 + /* Construct an interface-CONNECT message for the domain controller. */ 29.573 + cmsg.type = CMSG_BLKIF_FE; 29.574 + cmsg.subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT; 29.575 + cmsg.length = sizeof(blkif_fe_interface_connect_t); 29.576 + up.handle = 0; 29.577 + up.shmem_frame = virt_to_machine(blk_ring) >> PAGE_SHIFT; 29.578 + memcpy(cmsg.msg, &up, sizeof(up)); 29.579 + 29.580 + /* Tell the controller to bring up the interface. */ 29.581 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 29.582 +} 29.583 + 29.584 +static void blkif_bringup_phase2(void *unused) 29.585 +{ 29.586 + blkif_irq = bind_evtchn_to_irq(blkif_evtchn); 29.587 + (void)request_irq(blkif_irq, blkif_int, 0, "blkif", NULL); 29.588 + 29.589 + /* Probe for discs that are attached to the interface. */ 29.590 + xlvbd_init(); 29.591 + 29.592 + blkif_state = BLKIF_STATE_CONNECTED; 29.593 + 29.594 + /* Kick pending requests. */ 29.595 + spin_lock_irq(&io_request_lock); 29.596 + kick_pending_request_queues(); 29.597 + spin_unlock_irq(&io_request_lock); 29.598 +} 29.599 + 29.600 +static void blkif_status_change(blkif_fe_interface_status_changed_t *status) 29.601 +{ 29.602 + if ( status->handle != 0 ) 29.603 + { 29.604 + printk(KERN_WARNING "Status change on unsupported blkif %d\n", 29.605 + status->handle); 29.606 + return; 29.607 + } 29.608 + 29.609 + switch ( status->status ) 29.610 + { 29.611 + case BLKIF_INTERFACE_STATUS_DESTROYED: 29.612 + printk(KERN_WARNING "Unexpected blkif-DESTROYED message in state %d\n", 29.613 + blkif_state); 29.614 + break; 29.615 + 29.616 + case BLKIF_INTERFACE_STATUS_DISCONNECTED: 29.617 + if ( blkif_state != BLKIF_STATE_CLOSED ) 29.618 + { 29.619 + printk(KERN_WARNING "Unexpected blkif-DISCONNECTED message" 29.620 + " in state %d\n", blkif_state); 29.621 + break; 29.622 + } 29.623 + blkif_statechange_tq.routine = blkif_bringup_phase1; 29.624 + schedule_task(&blkif_statechange_tq); 29.625 + break; 29.626 + 29.627 + case BLKIF_INTERFACE_STATUS_CONNECTED: 29.628 + if ( blkif_state == BLKIF_STATE_CLOSED ) 29.629 + { 29.630 + printk(KERN_WARNING "Unexpected blkif-CONNECTED message" 29.631 + " in state %d\n", blkif_state); 29.632 + break; 29.633 + } 29.634 + blkif_evtchn = status->evtchn; 29.635 + blkif_statechange_tq.routine = blkif_bringup_phase2; 29.636 + schedule_task(&blkif_statechange_tq); 29.637 + break; 29.638 + 29.639 + default: 29.640 + printk(KERN_WARNING "Status change to unknown value %d\n", 29.641 + status->status); 29.642 + break; 29.643 + } 29.644 +} 29.645 + 29.646 + 29.647 +static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 29.648 +{ 29.649 + switch ( msg->subtype ) 29.650 + { 29.651 + case CMSG_BLKIF_FE_INTERFACE_STATUS_CHANGED: 29.652 + if ( msg->length != sizeof(blkif_fe_interface_status_changed_t) ) 29.653 + goto parse_error; 29.654 + blkif_status_change((blkif_fe_interface_status_changed_t *) 29.655 + &msg->msg[0]); 29.656 + break; 29.657 +#if 0 29.658 + case CMSG_BLKIF_FE_VBD_STATUS_CHANGED: 29.659 + update_tq.routine = update_vbds_task; 29.660 + schedule_task(&update_tq); 29.661 + break; 29.662 +#endif 29.663 + default: 29.664 + goto parse_error; 29.665 + } 29.666 + 29.667 + ctrl_if_send_response(msg); 29.668 + return; 29.669 + 29.670 + parse_error: 29.671 + msg->length = 0; 29.672 + ctrl_if_send_response(msg); 29.673 +} 29.674 + 29.675 + 29.676 +int __init xlblk_init(void) 29.677 +{ 29.678 + ctrl_msg_t cmsg; 29.679 + blkif_fe_driver_status_changed_t st; 29.680 + 29.681 + (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx); 29.682 + 29.683 + /* Send a driver-UP notification to the domain controller. */ 29.684 + cmsg.type = CMSG_BLKIF_FE; 29.685 + cmsg.subtype = CMSG_BLKIF_FE_DRIVER_STATUS_CHANGED; 29.686 + cmsg.length = sizeof(blkif_fe_driver_status_changed_t); 29.687 + st.status = BLKIF_DRIVER_STATUS_UP; 29.688 + memcpy(cmsg.msg, &st, sizeof(st)); 29.689 + ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); 29.690 + 29.691 + /* 29.692 + * We should read 'nr_interfaces' from response message and wait 29.693 + * for notifications before proceeding. For now we assume that we 29.694 + * will be notified of exactly one interface. 29.695 + */ 29.696 + while ( blkif_state != BLKIF_STATE_CONNECTED ) 29.697 + { 29.698 + set_current_state(TASK_INTERRUPTIBLE); 29.699 + schedule_timeout(1); 29.700 + } 29.701 + 29.702 + return 0; 29.703 +} 29.704 + 29.705 + 29.706 +static void __exit xlblk_cleanup(void) 29.707 +{ 29.708 + /* XXX FIXME */ 29.709 + BUG(); 29.710 +} 29.711 + 29.712 + 29.713 +#ifdef MODULE 29.714 +module_init(xlblk_init); 29.715 +module_exit(xlblk_cleanup); 29.716 +#endif 29.717 + 29.718 + 29.719 +void blkdev_suspend(void) 29.720 +{ 29.721 + /* XXX FIXME */ 29.722 + BUG(); 29.723 +} 29.724 + 29.725 + 29.726 +void blkdev_resume(void) 29.727 +{ 29.728 + /* XXX FIXME */ 29.729 + BUG(); 29.730 +}
30.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c Sun May 02 00:27:58 2004 +0000 30.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/blkif/frontend/vbd.c Sun May 02 00:52:45 2004 +0000 30.3 @@ -1,13 +1,13 @@ 30.4 /****************************************************************************** 30.5 - * vbd.c 30.6 + * arch/xen/drivers/blkif/frontend/vbd.c 30.7 * 30.8 - * Xenolinux virtual block-device driver (xvd). 30.9 + * Xenolinux virtual block-device driver. 30.10 * 30.11 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand 30.12 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge 30.13 */ 30.14 30.15 -#include "block.h" 30.16 +#include "common.h" 30.17 #include <linux/blk.h> 30.18 30.19 /* 30.20 @@ -43,54 +43,59 @@ static int xlvbd_blksize_size[256]; 30.21 static int xlvbd_hardsect_size[256]; 30.22 static int xlvbd_max_sectors[256]; 30.23 30.24 -/* Information from Xen about our VBDs. */ 30.25 +/* Information about our VBDs. */ 30.26 #define MAX_VBDS 64 30.27 static int nr_vbds; 30.28 -static xen_disk_t *vbd_info; 30.29 +static vdisk_t *vbd_info; 30.30 30.31 static struct block_device_operations xlvbd_block_fops = 30.32 { 30.33 - open: xen_block_open, 30.34 - release: xen_block_release, 30.35 - ioctl: xen_block_ioctl, 30.36 - check_media_change: xen_block_check, 30.37 - revalidate: xen_block_revalidate, 30.38 + open: blkif_open, 30.39 + release: blkif_release, 30.40 + ioctl: blkif_ioctl, 30.41 + check_media_change: blkif_check, 30.42 + revalidate: blkif_revalidate, 30.43 }; 30.44 30.45 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info) 30.46 +static int xlvbd_get_vbd_info(vdisk_t *disk_info) 30.47 { 30.48 - int error; 30.49 - block_io_op_t op; 30.50 + vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL); 30.51 + blkif_request_t req; 30.52 + blkif_response_t rsp; 30.53 + int nr; 30.54 30.55 - /* Probe for disk information. */ 30.56 - memset(&op, 0, sizeof(op)); 30.57 - op.cmd = BLOCK_IO_OP_VBD_PROBE; 30.58 - op.u.probe_params.domain = 0; 30.59 - op.u.probe_params.xdi.max = MAX_VBDS; 30.60 - op.u.probe_params.xdi.disks = disk_info; 30.61 - op.u.probe_params.xdi.count = 0; 30.62 + memset(&req, 0, sizeof(req)); 30.63 + req.operation = BLKIF_OP_PROBE; 30.64 + req.nr_segments = 1; 30.65 + req.buffer_and_sects[0] = virt_to_machine(buf) | (PAGE_SIZE/512); 30.66 30.67 - if ( (error = HYPERVISOR_block_io_op(&op)) != 0 ) 30.68 + blkif_control_send(&req, &rsp); 30.69 + 30.70 + if ( rsp.status <= 0 ) 30.71 { 30.72 - printk(KERN_ALERT "Could not probe disks (%d)\n", error); 30.73 + printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status); 30.74 return -1; 30.75 } 30.76 30.77 - return op.u.probe_params.xdi.count; 30.78 + if ( (nr = rsp.status) > MAX_VBDS ) 30.79 + nr = MAX_VBDS; 30.80 + memcpy(disk_info, buf, nr * sizeof(vdisk_t)); 30.81 + 30.82 + return nr; 30.83 } 30.84 30.85 /* 30.86 * xlvbd_init_device - initialise a VBD device 30.87 - * @disk: a xen_disk_t describing the VBD 30.88 + * @disk: a vdisk_t describing the VBD 30.89 * 30.90 - * Takes a xen_disk_t * that describes a VBD the domain has access to. 30.91 + * Takes a vdisk_t * that describes a VBD the domain has access to. 30.92 * Performs appropriate initialisation and registration of the device. 30.93 * 30.94 * Care needs to be taken when making re-entrant calls to ensure that 30.95 * corruption does not occur. Also, devices that are in use should not have 30.96 * their details updated. This is the caller's responsibility. 30.97 */ 30.98 -static int xlvbd_init_device(xen_disk_t *xd) 30.99 +static int xlvbd_init_device(vdisk_t *xd) 30.100 { 30.101 int device = xd->device; 30.102 int major = MAJOR(device); 30.103 @@ -181,11 +186,11 @@ static int xlvbd_init_device(xen_disk_t 30.104 read_ahead[major] = 8; 30.105 } 30.106 30.107 - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); 30.108 + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_blkif_request); 30.109 30.110 /* 30.111 * Turn off barking 'headactive' mode. We dequeue buffer heads as 30.112 - * soon as we pass them down to Xen. 30.113 + * soon as we pass them to the back-end driver. 30.114 */ 30.115 blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); 30.116 30.117 @@ -431,12 +436,12 @@ static int xlvbd_remove_device(int devic 30.118 void xlvbd_update_vbds(void) 30.119 { 30.120 int i, j, k, old_nr, new_nr; 30.121 - xen_disk_t *old_info, *new_info, *merged_info; 30.122 + vdisk_t *old_info, *new_info, *merged_info; 30.123 30.124 old_info = vbd_info; 30.125 old_nr = nr_vbds; 30.126 30.127 - new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); 30.128 + new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 30.129 if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 ) 30.130 { 30.131 kfree(new_info); 30.132 @@ -448,7 +453,7 @@ void xlvbd_update_vbds(void) 30.133 * old list and new list do not overlap at all, and we cannot yet destroy 30.134 * VBDs in the old list because the usage counts are busy. 30.135 */ 30.136 - merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL); 30.137 + merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL); 30.138 30.139 /* @i tracks old list; @j tracks new list; @k tracks merged list. */ 30.140 i = j = k = 0; 30.141 @@ -458,13 +463,13 @@ void xlvbd_update_vbds(void) 30.142 if ( old_info[i].device < new_info[j].device ) 30.143 { 30.144 if ( xlvbd_remove_device(old_info[i].device) != 0 ) 30.145 - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); 30.146 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 30.147 i++; 30.148 } 30.149 else if ( old_info[i].device > new_info[j].device ) 30.150 { 30.151 if ( xlvbd_init_device(&new_info[j]) == 0 ) 30.152 - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); 30.153 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 30.154 j++; 30.155 } 30.156 else 30.157 @@ -472,9 +477,9 @@ void xlvbd_update_vbds(void) 30.158 if ( ((old_info[i].capacity == new_info[j].capacity) && 30.159 (old_info[i].info == new_info[j].info)) || 30.160 (xlvbd_remove_device(old_info[i].device) != 0) ) 30.161 - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); 30.162 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 30.163 else if ( xlvbd_init_device(&new_info[j]) == 0 ) 30.164 - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); 30.165 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 30.166 i++; j++; 30.167 } 30.168 } 30.169 @@ -482,13 +487,13 @@ void xlvbd_update_vbds(void) 30.170 for ( ; i < old_nr; i++ ) 30.171 { 30.172 if ( xlvbd_remove_device(old_info[i].device) != 0 ) 30.173 - memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t)); 30.174 + memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t)); 30.175 } 30.176 30.177 for ( ; j < new_nr; j++ ) 30.178 { 30.179 if ( xlvbd_init_device(&new_info[j]) == 0 ) 30.180 - memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t)); 30.181 + memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t)); 30.182 } 30.183 30.184 vbd_info = merged_info; 30.185 @@ -500,14 +505,14 @@ void xlvbd_update_vbds(void) 30.186 30.187 30.188 /* 30.189 - * Set up all the linux device goop for the virtual block devices (vbd's) that 30.190 - * xen tells us about. Note that although from xen's pov VBDs are addressed 30.191 - * simply an opaque 16-bit device number, the domain creation tools 30.192 + * Set up all the linux device goop for the virtual block devices (vbd's) that 30.193 + * we know about. Note that although from the backend driver's p.o.v. VBDs are 30.194 + * addressed simply an opaque 16-bit device number, the domain creation tools 30.195 * conventionally allocate these numbers to correspond to those used by 'real' 30.196 * linux -- this is just for convenience as it means e.g. that the same 30.197 - * /etc/fstab can be used when booting with or without xen. 30.198 + * /etc/fstab can be used when booting with or without Xen. 30.199 */ 30.200 -int __init xlvbd_init(void) 30.201 +int xlvbd_init(void) 30.202 { 30.203 int i; 30.204 30.205 @@ -537,7 +542,7 @@ int __init xlvbd_init(void) 30.206 xlvbd_max_sectors[i] = 128; 30.207 } 30.208 30.209 - vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL); 30.210 + vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL); 30.211 nr_vbds = xlvbd_get_vbd_info(vbd_info); 30.212 30.213 if ( nr_vbds < 0 ) 30.214 @@ -554,8 +559,3 @@ int __init xlvbd_init(void) 30.215 30.216 return 0; 30.217 } 30.218 - 30.219 - 30.220 -#ifdef MODULE 30.221 -module_init(xlvbd_init); 30.222 -#endif
31.1 --- a/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c Sun May 02 00:27:58 2004 +0000 31.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/drivers/dom0/core.c Sun May 02 00:52:45 2004 +0000 31.3 @@ -36,7 +36,7 @@ static struct proc_dir_entry *privcmd_in 31.4 static int privcmd_ioctl(struct inode *inode, struct file *file, 31.5 unsigned int cmd, unsigned long data) 31.6 { 31.7 - int ret = 0; 31.8 + int ret = -ENOSYS; 31.9 31.10 switch ( cmd ) 31.11 { 31.12 @@ -108,6 +108,13 @@ static int privcmd_ioctl(struct inode *i 31.13 } 31.14 break; 31.15 31.16 + case IOCTL_PRIVCMD_INITDOMAIN_EVTCHN: 31.17 + { 31.18 + extern int initdom_ctrlif_domcontroller_port; 31.19 + ret = initdom_ctrlif_domcontroller_port; 31.20 + } 31.21 + break; 31.22 + 31.23 default: 31.24 ret = -EINVAL; 31.25 break; 31.26 @@ -140,7 +147,7 @@ static int __init init_module(void) 31.27 { 31.28 privcmd_intf->owner = THIS_MODULE; 31.29 privcmd_intf->nlink = 1; 31.30 - privcmd_intf->proc_fops = &privcmd_file_ops; 31.31 + privcmd_intf->proc_fops = &privcmd_file_ops; 31.32 } 31.33 31.34 return 0;
32.1 --- a/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c Sun May 02 00:27:58 2004 +0000 32.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/kernel/ctrl_if.c Sun May 02 00:52:45 2004 +0000 32.3 @@ -17,6 +17,13 @@ 32.4 #include <asm/ctrl_if.h> 32.5 #include <asm/evtchn.h> 32.6 32.7 +/* 32.8 + * Only used by initial domain which must create its own control-interface 32.9 + * event channel. This value is picked up by the user-space domain controller 32.10 + * via an ioctl. 32.11 + */ 32.12 +int initdom_ctrlif_domcontroller_port = -1; 32.13 + 32.14 static int ctrl_if_evtchn; 32.15 static int ctrl_if_irq; 32.16 static spinlock_t ctrl_if_lock; 32.17 @@ -276,9 +283,6 @@ void ctrl_if_unregister_receiver(u8 type 32.18 32.19 void ctrl_if_suspend(void) 32.20 { 32.21 - if ( start_info.flags & SIF_INITDOMAIN ) 32.22 - return; 32.23 - 32.24 free_irq(ctrl_if_irq, NULL); 32.25 unbind_evtchn_from_irq(ctrl_if_evtchn); 32.26 } 32.27 @@ -286,7 +290,21 @@ void ctrl_if_suspend(void) 32.28 void ctrl_if_resume(void) 32.29 { 32.30 if ( start_info.flags & SIF_INITDOMAIN ) 32.31 - return; 32.32 + { 32.33 + /* 32.34 + * The initial domain must create its own domain-controller link. 32.35 + * The controller is probably not running at this point, but will 32.36 + * pick up its end of the event channel from 32.37 + */ 32.38 + evtchn_op_t op; 32.39 + op.cmd = EVTCHNOP_bind_interdomain; 32.40 + op.u.bind_interdomain.dom1 = DOMID_SELF; 32.41 + op.u.bind_interdomain.dom2 = DOMID_SELF; 32.42 + if ( HYPERVISOR_event_channel_op(&op) != 0 ) 32.43 + BUG(); 32.44 + start_info.domain_controller_evtchn = op.u.bind_interdomain.port1; 32.45 + initdom_ctrlif_domcontroller_port = op.u.bind_interdomain.port2; 32.46 + } 32.47 32.48 ctrl_if_tx_resp_cons = 0; 32.49 ctrl_if_rx_req_cons = 0;
33.1 --- a/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c Sun May 02 00:27:58 2004 +0000 33.2 +++ b/xenolinux-2.4.26-sparse/arch/xen/mm/ioremap.c Sun May 02 00:52:45 2004 +0000 33.3 @@ -5,7 +5,7 @@ 33.4 * 33.5 * (C) Copyright 1995 1996 Linus Torvalds 33.6 * 33.7 - * Modifications for Xenolinux (c) 2003 Keir Fraser 33.8 + * Modifications for Xenolinux (c) 2003-2004 Keir Fraser 33.9 */ 33.10 33.11 #include <linux/slab.h> 33.12 @@ -28,21 +28,26 @@ 33.13 __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot) 33.14 33.15 static inline int direct_remap_area_pte(pte_t *pte, 33.16 - unsigned long address, 33.17 - unsigned long size, 33.18 - unsigned long machine_addr, 33.19 - pgprot_t prot, 33.20 - domid_t domid) 33.21 + unsigned long address, 33.22 + unsigned long size, 33.23 + unsigned long machine_addr, 33.24 + pgprot_t prot, 33.25 + domid_t domid) 33.26 { 33.27 unsigned long end; 33.28 - 33.29 - mmu_update_t *u, *v; 33.30 - u = v = vmalloc(3*PAGE_SIZE); /* plenty */ 33.31 +#define MAX_DIRECTMAP_MMU_QUEUE 64 33.32 + mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *v; 33.33 33.34 - if (!u) 33.35 - return -ENOMEM; 33.36 + address &= ~PMD_MASK; 33.37 + end = address + size; 33.38 + if (end > PMD_SIZE) 33.39 + end = PMD_SIZE; 33.40 + if (address >= end) 33.41 + BUG(); 33.42 33.43 + reset_buffer: 33.44 /* If not I/O mapping then specify General-Purpose Subject Domain (GPS). */ 33.45 + v = &u[0]; 33.46 if ( domid != 0 ) 33.47 { 33.48 v[0].val = (unsigned long)(domid<<16) & ~0xFFFFUL; 33.49 @@ -56,12 +61,6 @@ static inline int direct_remap_area_pte( 33.50 v += 2; 33.51 } 33.52 33.53 - address &= ~PMD_MASK; 33.54 - end = address + size; 33.55 - if (end > PMD_SIZE) 33.56 - end = PMD_SIZE; 33.57 - if (address >= end) 33.58 - BUG(); 33.59 do { 33.60 #if 0 /* thanks to new ioctl mmaping interface this is no longer a bug */ 33.61 if (!pte_none(*pte)) { 33.62 @@ -71,7 +70,12 @@ static inline int direct_remap_area_pte( 33.63 #endif 33.64 v->ptr = virt_to_machine(pte); 33.65 v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot) | _PAGE_IO; 33.66 - v++; 33.67 + if ( ++v == MAX_DIRECTMAP_MMU_QUEUE ) 33.68 + { 33.69 + if ( HYPERVISOR_mmu_update(u, MAX_DIRECTMAP_MMU_QUEUE) < 0 ) 33.70 + return -EFAULT; 33.71 + goto reset_buffer; 33.72 + } 33.73 address += PAGE_SIZE; 33.74 machine_addr += PAGE_SIZE; 33.75 pte++; 33.76 @@ -84,7 +88,6 @@ static inline int direct_remap_area_pte( 33.77 return -EINVAL; 33.78 } 33.79 33.80 - vfree(u); 33.81 return 0; 33.82 } 33.83 33.84 @@ -96,8 +99,8 @@ static inline int direct_remap_area_pmd( 33.85 pgprot_t prot, 33.86 domid_t domid) 33.87 { 33.88 + int error = 0; 33.89 unsigned long end; 33.90 - int rc; 33.91 33.92 address &= ~PGDIR_MASK; 33.93 end = address + size; 33.94 @@ -111,14 +114,14 @@ static inline int direct_remap_area_pmd( 33.95 if (!pte) 33.96 return -ENOMEM; 33.97 33.98 - if ( rc = direct_remap_area_pte(pte, address, end - address, 33.99 - address + machine_addr, prot, domid) ) 33.100 - return rc; 33.101 - 33.102 + error = direct_remap_area_pte(pte, address, end - address, 33.103 + address + machine_addr, prot, domid); 33.104 + if ( error ) 33.105 + break; 33.106 address = (address + PMD_SIZE) & PMD_MASK; 33.107 pmd++; 33.108 } while (address && (address < end)); 33.109 - return 0; 33.110 + return error; 33.111 } 33.112 33.113 int direct_remap_area_pages(struct mm_struct *mm,
34.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h Sun May 02 00:27:58 2004 +0000 34.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/ctrl_if.h Sun May 02 00:52:45 2004 +0000 34.3 @@ -52,7 +52,7 @@ int ctrl_if_send_message_noblock( 34.4 * function returns. 34.5 * 2. If @hnd is NULL then no callback is executed. 34.6 */ 34.7 -int ctrl_if_send_message( 34.8 +int ctrl_if_send_message_block( 34.9 ctrl_msg_t *msg, 34.10 ctrl_msg_handler_t hnd, 34.11 unsigned long id,
35.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/io.h Sun May 02 00:27:58 2004 +0000 35.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/io.h Sun May 02 00:52:45 2004 +0000 35.3 @@ -159,13 +159,47 @@ extern void iounmap(void *addr); 35.4 extern void *bt_ioremap(unsigned long offset, unsigned long size); 35.5 extern void bt_iounmap(void *addr, unsigned long size); 35.6 35.7 +#ifdef CONFIG_XEN_PHYSDEV_ACCESS 35.8 + 35.9 +#ifdef CONFIG_HIGHMEM 35.10 +#error "Highmem is not yet compatible with physical device access" 35.11 +#endif 35.12 + 35.13 /* 35.14 - * IO bus memory addresses are also 1:1 with the physical address 35.15 + * The bus translation macros need special care if we are executing device 35.16 + * accesses to/from other domains' memory. In these cases the virtual address 35.17 + * is actually a temporary mapping in the 'vmalloc' space. The physical 35.18 + * address will therefore be >max_low_pfn, and will not have a valid entry 35.19 + * in the phys_to_mach mapping table. 35.20 */ 35.21 +static inline unsigned long phys_to_bus(unsigned long phys) 35.22 +{ 35.23 + extern unsigned long max_pfn; 35.24 + pgd_t *pgd; pmd_t *pmd; pte_t *pte; 35.25 + void *addr; 35.26 + unsigned long bus; 35.27 + if ( (phys >> PAGE_SHIFT) < max_pfn ) 35.28 + return phys_to_machine(phys); 35.29 + addr = phys_to_virt(phys); 35.30 + pgd = pgd_offset_k( (unsigned long)addr); 35.31 + pmd = pmd_offset(pgd, (unsigned long)addr); 35.32 + pte = pte_offset(pmd, (unsigned long)addr); 35.33 + bus = (pte->pte_low & PAGE_MASK) | (phys & ~PAGE_MASK); 35.34 + return bus; 35.35 +} 35.36 + 35.37 +#define virt_to_bus(_x) phys_to_bus(virt_to_phys(_x)) 35.38 +#define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) 35.39 +#define page_to_bus(_x) phys_to_bus(page_to_phys(_x)) 35.40 + 35.41 +#else 35.42 + 35.43 #define virt_to_bus(_x) phys_to_machine(virt_to_phys(_x)) 35.44 #define bus_to_virt(_x) phys_to_virt(machine_to_phys(_x)) 35.45 #define page_to_bus(_x) phys_to_machine(page_to_phys(_x)) 35.46 35.47 +#endif /* CONFIG_XEN_PHYSDEV_ACCESS */ 35.48 + 35.49 /* 35.50 * readX/writeX() are used to access memory mapped devices. On some 35.51 * architectures the memory mapped IO stuff needs to be accessed
36.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h Sun May 02 00:27:58 2004 +0000 36.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/pgtable-2level.h Sun May 02 00:52:45 2004 +0000 36.3 @@ -47,6 +47,11 @@ static inline pmd_t * pmd_offset(pgd_t * 36.4 return (pmd_t *) dir; 36.5 } 36.6 36.7 +#define pte_same(a, b) ((a).pte_low == (b).pte_low) 36.8 +#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) 36.9 +#define pte_none(x) (!(x).pte_low) 36.10 +#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) 36.11 + 36.12 /* 36.13 * A note on implementation of this atomic 'get-and-clear' operation. 36.14 * This is actually very simple because XenoLinux can only run on a single 36.15 @@ -59,13 +64,9 @@ static inline pmd_t * pmd_offset(pgd_t * 36.16 static inline pte_t ptep_get_and_clear(pte_t *xp) 36.17 { 36.18 pte_t pte = *xp; 36.19 - queue_l1_entry_update(xp, 0); 36.20 + if ( !pte_none(pte) ) 36.21 + queue_l1_entry_update(xp, 0); 36.22 return pte; 36.23 } 36.24 36.25 -#define pte_same(a, b) ((a).pte_low == (b).pte_low) 36.26 -#define pte_page(x) (mem_map+((unsigned long)((pte_val(x) >> PAGE_SHIFT)))) 36.27 -#define pte_none(x) (!(x).pte_low) 36.28 -#define __mk_pte(page_nr,pgprot) __pte(((page_nr) << PAGE_SHIFT) | pgprot_val(pgprot)) 36.29 - 36.30 #endif /* _I386_PGTABLE_2LEVEL_H */
37.1 --- a/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h Sun May 02 00:27:58 2004 +0000 37.2 +++ b/xenolinux-2.4.26-sparse/include/asm-xen/proc_cmd.h Sun May 02 00:52:45 2004 +0000 37.3 @@ -32,10 +32,22 @@ typedef struct privcmd_blkmsg 37.4 int buf_size; 37.5 } privcmd_blkmsg_t; 37.6 37.7 -#define IOCTL_PRIVCMD_HYPERCALL \ 37.8 +/* 37.9 + * @cmd: IOCTL_PRIVCMD_HYPERCALL 37.10 + * @arg: &privcmd_hypercall_t 37.11 + * Return: Value returned from execution of the specified hypercall. 37.12 + */ 37.13 +#define IOCTL_PRIVCMD_HYPERCALL \ 37.14 _IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t)) 37.15 -#define IOCTL_PRIVCMD_BLKMSG \ 37.16 - _IOC(_IOC_NONE, 'P', 1, sizeof(privcmd_blkmsg_t)) 37.17 + 37.18 +/* 37.19 + * @cmd: IOCTL_PRIVCMD_INITDOMAIN_EVTCHN 37.20 + * @arg: n/a 37.21 + * Return: Port associated with domain-controller end of control event channel 37.22 + * for the initial domain. 37.23 + */ 37.24 +#define IOCTL_PRIVCMD_INITDOMAIN_EVTCHN \ 37.25 + _IOC(_IOC_NONE, 'P', 1, 0) 37.26 #define IOCTL_PRIVCMD_MMAP \ 37.27 _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) 37.28
38.1 --- a/xenolinux-2.4.26-sparse/mm/vmalloc.c Sun May 02 00:27:58 2004 +0000 38.2 +++ b/xenolinux-2.4.26-sparse/mm/vmalloc.c Sun May 02 00:52:45 2004 +0000 38.3 @@ -45,6 +45,10 @@ static inline void free_area_pte(pmd_t * 38.4 continue; 38.5 if (pte_present(page)) { 38.6 struct page *ptpage = pte_page(page); 38.7 +#if defined(CONFIG_XEN_PRIVILEGED_GUEST) 38.8 + if (pte_io(page)) 38.9 + continue; 38.10 +#endif 38.11 if (VALID_PAGE(ptpage) && (!PageReserved(ptpage))) 38.12 __free_page(ptpage); 38.13 continue; 38.14 @@ -250,11 +254,6 @@ void __vfree(void * addr, int free_area_ 38.15 for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) { 38.16 if (tmp->addr == addr) { 38.17 *p = tmp->next; 38.18 -#ifdef CONFIG_XEN_PRIVILEGED_GUEST 38.19 - if (tmp->flags & VM_IOREMAP) 38.20 - zap_page_range(&init_mm, VMALLOC_VMADDR(tmp->addr), tmp->size); 38.21 - else 38.22 -#endif 38.23 if (free_area_pages) 38.24 vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size); 38.25 write_unlock(&vmlist_lock);