ia64/xen-unstable

changeset 1190:f94912902e71

bitkeeper revision 1.804 (40585405PlcQQehq9SazyQeocNUgLg)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@scramble.cl.cam.ac.uk
date Wed Mar 17 13:35:01 2004 +0000 (2004-03-17)
parents 17dc27860023 f34a02c2b3f6
children 2d19a1a32eea
files .rootkeys tools/xc/lib/xc_linux_build.c tools/xc/lib/xc_linux_restore.c tools/xc/lib/xc_linux_save.c tools/xc/lib/xc_netbsd_build.c xen/arch/i386/domain_page.c xen/arch/i386/mm.c xen/arch/i386/process.c xen/common/dom0_ops.c xen/common/domain.c xen/common/domain_page.c xen/common/kernel.c xen/common/lib.c xen/include/asm-i386/config.h xen/include/asm-i386/processor.h xen/include/asm-i386/types.h xen/include/asm-x86_64/atomic.h xen/include/asm-x86_64/config.h xen/include/asm-x86_64/current.h xen/include/asm-x86_64/desc.h xen/include/asm-x86_64/io.h xen/include/asm-x86_64/ldt.h xen/include/asm-x86_64/page.h xen/include/asm-x86_64/pci.h xen/include/asm-x86_64/processor.h xen/include/asm-x86_64/types.h xen/include/asm-x86_64/uaccess.h xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h xen/include/hypervisor-ifs/arch-x86_64/hypervisor-if.h xen/include/hypervisor-ifs/dom0_ops.h xen/include/xeno/lib.h xen/include/xeno/types.h xen/net/dev.c xenolinux-2.4.25-sparse/arch/xeno/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.c xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.h xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_vbd.c xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/dom0_core.c xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/xl_evtchn.c xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c
line diff
     1.1 --- a/.rootkeys	Tue Mar 16 12:37:49 2004 +0000
     1.2 +++ b/.rootkeys	Wed Mar 17 13:35:01 2004 +0000
     1.3 @@ -118,6 +118,7 @@ 3e5636e7NRe-LqmOmyagUFWr70bcag xen/arch/
     1.4  3ddb79bcsjinG9k1KcvbVBuas1R2dA xen/arch/i386/apic.c
     1.5  3ddb79bcSC_LvnmFlX-T5iTgaR0SKg xen/arch/i386/boot/boot.S
     1.6  3ddb79bcUrk2EIaM5VsT6wUudH1kkg xen/arch/i386/delay.c
     1.7 +3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/arch/i386/domain_page.c
     1.8  3ddb79bcecupHj56ZbTa3B0FxDowMg xen/arch/i386/entry.S
     1.9  3ddb79bcY5zW7KhvI9gvfuPi3ZumEg xen/arch/i386/extable.c
    1.10  3fe443fdDDb0Sw6NQBCk4GQapayfTA xen/arch/i386/flushtlb.c
    1.11 @@ -155,7 +156,6 @@ 3fa152581E5KhrAtqZef2Sr5NKTz4w xen/commo
    1.12  3ddb79bdLX_P6iB7ILiblRLWvebapg xen/common/dom0_ops.c
    1.13  3e6377e4i0c9GtKN65e99OtRbw3AZw xen/common/dom_mem_ops.c
    1.14  3ddb79bdYO5D8Av12NHqPeSviav7cg xen/common/domain.c
    1.15 -3e32af9aRnYGl4GMOaDKp7JdfhOGhg xen/common/domain_page.c
    1.16  3ddb79bdeyutmaXEfpQvvxj7eQ0fCw xen/common/event.c
    1.17  3fba5b96H0khoxNiKbjdi0inpXV-Pw xen/common/event_channel.c
    1.18  3ddb79bd9drcFPVxd4w2GPOIjLlXpA xen/common/kernel.c
    1.19 @@ -601,18 +601,20 @@ 3e5a4e65TNEycLeXqPSXQJQm_xGecA xenolinux
    1.20  3e6377f5xwPfYZkPHPrDbEq1PRN7uQ xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile
    1.21  3e6377f8Me8IqtvEhb70XFgOvqQH7A xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/balloon.c
    1.22  3e5a4e65iHEuC5sjFhj42XALYbLVRw xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile
    1.23 -3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.c
    1.24 -3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.h
    1.25 -3e676eb5RXnHzSHgA1BvM0B1aIm4qg xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_vbd.c
    1.26 +3e5a4e65pP5spJErBW69pJxSSdK9RA xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c
    1.27 +3e67f822FOPwqHiaRKbrskgWgoNL5g xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h
    1.28 +3e676eb5RXnHzSHgA1BvM0B1aIm4qg xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c
    1.29  3e5a4e65G3e2s0ghPMgiJ-gBTUJ0uQ xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile
    1.30  3e5a4e651TH-SXHoufurnWjgl5bfOA xenolinux-2.4.25-sparse/arch/xeno/drivers/console/console.c
    1.31  3e5a4e656nfFISThfbyXQOA6HN6YHw xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile
    1.32 -3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/dom0_core.c
    1.33 +3e5a4e65BXtftInNHUC2PjDfPhdZZA xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c
    1.34  3e5a4e65gfn_ltB8ujHMVFApnTTNRQ xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/vfr.c
    1.35  40420a6ebRqDjufoN1WSJvolEW2Wjw xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile
    1.36 -40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/xl_evtchn.c
    1.37 +40420a73Wou6JlsZDiu6YwjYomsm7A xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c
    1.38  3e5a4e65gZBRBB6RsSVg1c9iahigAw xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile
    1.39  3e5a4e65ZxKrbFetVB84JhrTyZ1YuQ xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c
    1.40 +405853f2wg7JXZJNltspMwOZJklxgw xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile
    1.41 +405853f6nbeazrNyEWNHBuoSg2PiPA xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c
    1.42  3e5a4e65lWzkiPXsZdzPt2RNnJGG1g xenolinux-2.4.25-sparse/arch/xeno/kernel/Makefile
    1.43  3e5a4e65_hqfuxtGG8IUy6wRM86Ecg xenolinux-2.4.25-sparse/arch/xeno/kernel/entry.S
    1.44  3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg xenolinux-2.4.25-sparse/arch/xeno/kernel/head.S
     2.1 --- a/tools/xc/lib/xc_linux_build.c	Tue Mar 16 12:37:49 2004 +0000
     2.2 +++ b/tools/xc/lib/xc_linux_build.c	Wed Mar 17 13:35:01 2004 +0000
     2.3 @@ -378,32 +378,32 @@ int xc_linux_build(int xc_handle,
     2.4  
     2.5      /*
     2.6       * Initial register values:
     2.7 -     *  DS,ES,FS,GS = FLAT_RING1_DS
     2.8 -     *       CS:EIP = FLAT_RING1_CS:start_pc
     2.9 -     *       SS:ESP = FLAT_RING1_DS:start_stack
    2.10 +     *  DS,ES,FS,GS = FLAT_GUESTOS_DS
    2.11 +     *       CS:EIP = FLAT_GUESTOS_CS:start_pc
    2.12 +     *       SS:ESP = FLAT_GUESTOS_DS:start_stack
    2.13       *          ESI = start_info
    2.14       *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
    2.15       *       EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
    2.16       */
    2.17 -    ctxt->i386_ctxt.ds = FLAT_RING1_DS;
    2.18 -    ctxt->i386_ctxt.es = FLAT_RING1_DS;
    2.19 -    ctxt->i386_ctxt.fs = FLAT_RING1_DS;
    2.20 -    ctxt->i386_ctxt.gs = FLAT_RING1_DS;
    2.21 -    ctxt->i386_ctxt.ss = FLAT_RING1_DS;
    2.22 -    ctxt->i386_ctxt.cs = FLAT_RING1_CS;
    2.23 -    ctxt->i386_ctxt.eip = load_addr;
    2.24 -    ctxt->i386_ctxt.esp = virt_startinfo_addr;
    2.25 -    ctxt->i386_ctxt.esi = virt_startinfo_addr;
    2.26 -    ctxt->i386_ctxt.eflags = (1<<9) | (1<<2);
    2.27 +    ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
    2.28 +    ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
    2.29 +    ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
    2.30 +    ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
    2.31 +    ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
    2.32 +    ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
    2.33 +    ctxt->cpu_ctxt.eip = load_addr;
    2.34 +    ctxt->cpu_ctxt.esp = virt_startinfo_addr;
    2.35 +    ctxt->cpu_ctxt.esi = virt_startinfo_addr;
    2.36 +    ctxt->cpu_ctxt.eflags = (1<<9) | (1<<2);
    2.37  
    2.38      /* FPU is set up to default initial state. */
    2.39 -    memset(ctxt->i387_ctxt, 0, sizeof(ctxt->i387_ctxt));
    2.40 +    memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
    2.41  
    2.42      /* Virtual IDT is empty at start-of-day. */
    2.43      for ( i = 0; i < 256; i++ )
    2.44      {
    2.45          ctxt->trap_ctxt[i].vector = i;
    2.46 -        ctxt->trap_ctxt[i].cs     = FLAT_RING1_CS;
    2.47 +        ctxt->trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
    2.48      }
    2.49      ctxt->fast_trap_idx = 0;
    2.50  
    2.51 @@ -414,16 +414,16 @@ int xc_linux_build(int xc_handle,
    2.52      ctxt->gdt_ents = 0;
    2.53  
    2.54      /* Ring 1 stack is the initial stack. */
    2.55 -    ctxt->ring1_ss  = FLAT_RING1_DS;
    2.56 -    ctxt->ring1_esp = virt_startinfo_addr;
    2.57 +    ctxt->guestos_ss  = FLAT_GUESTOS_DS;
    2.58 +    ctxt->guestos_esp = virt_startinfo_addr;
    2.59  
    2.60      /* No debugging. */
    2.61      memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
    2.62  
    2.63      /* No callback handlers. */
    2.64 -    ctxt->event_callback_cs     = FLAT_RING1_CS;
    2.65 +    ctxt->event_callback_cs     = FLAT_GUESTOS_CS;
    2.66      ctxt->event_callback_eip    = 0;
    2.67 -    ctxt->failsafe_callback_cs  = FLAT_RING1_CS;
    2.68 +    ctxt->failsafe_callback_cs  = FLAT_GUESTOS_CS;
    2.69      ctxt->failsafe_callback_eip = 0;
    2.70  
    2.71      launch_op.u.builddomain.domain   = (domid_t)domid;
     3.1 --- a/tools/xc/lib/xc_linux_restore.c	Tue Mar 16 12:37:49 2004 +0000
     3.2 +++ b/tools/xc/lib/xc_linux_restore.c	Wed Mar 17 13:35:01 2004 +0000
     3.3 @@ -313,13 +313,13 @@ int xc_linux_restore(int xc_handle,
     3.4      verbose_printf("\b\b\b\b100%%\nMemory reloaded.\n");
     3.5  
     3.6      /* Uncanonicalise the suspend-record frame number and poke resume rec. */
     3.7 -    pfn = ctxt.i386_ctxt.esi;
     3.8 +    pfn = ctxt.cpu_ctxt.esi;
     3.9      if ( (pfn >= nr_pfns) || (pfn_type[pfn] != NONE) )
    3.10      {
    3.11          ERROR("Suspend record frame number is bad");
    3.12          goto out;
    3.13      }
    3.14 -    ctxt.i386_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
    3.15 +    ctxt.cpu_ctxt.esi = mfn = pfn_to_mfn_table[pfn];
    3.16      p_srec = map_pfn_writeable(pm_handle, mfn);
    3.17      p_srec->resume_info.nr_pages    = nr_pfns;
    3.18      p_srec->resume_info.shared_info = shared_info_frame << PAGE_SHIFT;
    3.19 @@ -370,13 +370,13 @@ int xc_linux_restore(int xc_handle,
    3.20  
    3.21      /*
    3.22       * Safety checking of saved context:
    3.23 -     *  1. i386_ctxt is fine, as Xen checks that on context switch.
    3.24 -     *  2. i387_ctxt is fine, as it can't hurt Xen.
    3.25 +     *  1. cpu_ctxt is fine, as Xen checks that on context switch.
    3.26 +     *  2. fpu_ctxt is fine, as it can't hurt Xen.
    3.27       *  3. trap_ctxt needs the code selectors checked.
    3.28       *  4. fast_trap_idx is checked by Xen.
    3.29       *  5. ldt base must be page-aligned, no more than 8192 ents, ...
    3.30       *  6. gdt already done, and further checking is done by Xen.
    3.31 -     *  7. check that ring1_ss is safe.
    3.32 +     *  7. check that guestos_ss is safe.
    3.33       *  8. pt_base is already done.
    3.34       *  9. debugregs are checked by Xen.
    3.35       *  10. callback code selectors need checking.
    3.36 @@ -385,14 +385,14 @@ int xc_linux_restore(int xc_handle,
    3.37      {
    3.38          ctxt.trap_ctxt[i].vector = i;
    3.39          if ( (ctxt.trap_ctxt[i].cs & 3) == 0 )
    3.40 -            ctxt.trap_ctxt[i].cs = FLAT_RING1_CS;
    3.41 +            ctxt.trap_ctxt[i].cs = FLAT_GUESTOS_CS;
    3.42      }
    3.43 -    if ( (ctxt.ring1_ss & 3) == 0 )
    3.44 -        ctxt.ring1_ss = FLAT_RING1_DS;
    3.45 +    if ( (ctxt.guestos_ss & 3) == 0 )
    3.46 +        ctxt.guestos_ss = FLAT_GUESTOS_DS;
    3.47      if ( (ctxt.event_callback_cs & 3) == 0 )
    3.48 -        ctxt.event_callback_cs = FLAT_RING1_CS;
    3.49 +        ctxt.event_callback_cs = FLAT_GUESTOS_CS;
    3.50      if ( (ctxt.failsafe_callback_cs & 3) == 0 )
    3.51 -        ctxt.failsafe_callback_cs = FLAT_RING1_CS;
    3.52 +        ctxt.failsafe_callback_cs = FLAT_GUESTOS_CS;
    3.53      if ( ((ctxt.ldt_base & (PAGE_SIZE - 1)) != 0) ||
    3.54           (ctxt.ldt_ents > 8192) ||
    3.55           (ctxt.ldt_base > HYPERVISOR_VIRT_START) ||
     4.1 --- a/tools/xc/lib/xc_linux_save.c	Tue Mar 16 12:37:49 2004 +0000
     4.2 +++ b/tools/xc/lib/xc_linux_save.c	Wed Mar 17 13:35:01 2004 +0000
     4.3 @@ -182,14 +182,14 @@ int xc_linux_save(int xc_handle,
     4.4          goto out;
     4.5  
     4.6      /* Is the suspend-record MFN actually valid for this domain? */
     4.7 -    if ( !check_pfn_ownership(xc_handle, ctxt.i386_ctxt.esi, domid) )
     4.8 +    if ( !check_pfn_ownership(xc_handle, ctxt.cpu_ctxt.esi, domid) )
     4.9      {
    4.10          ERROR("Invalid state record pointer");
    4.11          goto out;
    4.12      }
    4.13  
    4.14      /* If the suspend-record MFN is okay then grab a copy of it to @srec. */
    4.15 -    p_srec = map_pfn_readonly(pm_handle, ctxt.i386_ctxt.esi);
    4.16 +    p_srec = map_pfn_readonly(pm_handle, ctxt.cpu_ctxt.esi);
    4.17      memcpy(&srec, p_srec, sizeof(srec));
    4.18      unmap_pfn(pm_handle, p_srec);
    4.19  
    4.20 @@ -272,7 +272,7 @@ int xc_linux_save(int xc_handle,
    4.21      }
    4.22  
    4.23      /* Canonicalise the suspend-record frame number. */
    4.24 -    if ( !translate_mfn_to_pfn(&ctxt.i386_ctxt.esi) )
    4.25 +    if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) )
    4.26      {
    4.27          ERROR("State record is not in range of pseudophys map");
    4.28          goto out;
     5.1 --- a/tools/xc/lib/xc_netbsd_build.c	Tue Mar 16 12:37:49 2004 +0000
     5.2 +++ b/tools/xc/lib/xc_netbsd_build.c	Wed Mar 17 13:35:01 2004 +0000
     5.3 @@ -273,32 +273,32 @@ int xc_netbsd_build(int xc_handle,
     5.4  
     5.5      /*
     5.6       * Initial register values:
     5.7 -     *  DS,ES,FS,GS = FLAT_RING1_DS
     5.8 -     *       CS:EIP = FLAT_RING1_CS:start_pc
     5.9 -     *       SS:ESP = FLAT_RING1_DS:start_stack
    5.10 +     *  DS,ES,FS,GS = FLAT_GUESTOS_DS
    5.11 +     *       CS:EIP = FLAT_GUESTOS_CS:start_pc
    5.12 +     *       SS:ESP = FLAT_GUESTOS_DS:start_stack
    5.13       *          ESI = start_info
    5.14       *  [EAX,EBX,ECX,EDX,EDI,EBP are zero]
    5.15       *       EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
    5.16       */
    5.17 -    ctxt->i386_ctxt.ds = FLAT_RING1_DS;
    5.18 -    ctxt->i386_ctxt.es = FLAT_RING1_DS;
    5.19 -    ctxt->i386_ctxt.fs = FLAT_RING1_DS;
    5.20 -    ctxt->i386_ctxt.gs = FLAT_RING1_DS;
    5.21 -    ctxt->i386_ctxt.ss = FLAT_RING1_DS;
    5.22 -    ctxt->i386_ctxt.cs = FLAT_RING1_CS;
    5.23 -    ctxt->i386_ctxt.eip = load_addr;
    5.24 -    ctxt->i386_ctxt.esp = virt_startinfo_addr;
    5.25 -    ctxt->i386_ctxt.esi = virt_startinfo_addr;
    5.26 -    ctxt->i386_ctxt.eflags = (1<<9) | (1<<2);
    5.27 +    ctxt->cpu_ctxt.ds = FLAT_GUESTOS_DS;
    5.28 +    ctxt->cpu_ctxt.es = FLAT_GUESTOS_DS;
    5.29 +    ctxt->cpu_ctxt.fs = FLAT_GUESTOS_DS;
    5.30 +    ctxt->cpu_ctxt.gs = FLAT_GUESTOS_DS;
    5.31 +    ctxt->cpu_ctxt.ss = FLAT_GUESTOS_DS;
    5.32 +    ctxt->cpu_ctxt.cs = FLAT_GUESTOS_CS;
    5.33 +    ctxt->cpu_ctxt.eip = load_addr;
    5.34 +    ctxt->cpu_ctxt.esp = virt_startinfo_addr;
    5.35 +    ctxt->cpu_ctxt.esi = virt_startinfo_addr;
    5.36 +    ctxt->cpu_ctxt.eflags = (1<<9) | (1<<2);
    5.37  
    5.38      /* FPU is set up to default initial state. */
    5.39 -    memset(ctxt->i387_ctxt, 0, sizeof(ctxt->i387_ctxt));
    5.40 +    memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
    5.41  
    5.42      /* Virtual IDT is empty at start-of-day. */
    5.43      for ( i = 0; i < 256; i++ )
    5.44      {
    5.45          ctxt->trap_ctxt[i].vector = i;
    5.46 -        ctxt->trap_ctxt[i].cs     = FLAT_RING1_CS;
    5.47 +        ctxt->trap_ctxt[i].cs     = FLAT_GUESTOS_CS;
    5.48      }
    5.49      ctxt->fast_trap_idx = 0;
    5.50  
    5.51 @@ -309,16 +309,16 @@ int xc_netbsd_build(int xc_handle,
    5.52      ctxt->gdt_ents = 0;
    5.53  
    5.54      /* Ring 1 stack is the initial stack. */
    5.55 -    ctxt->ring1_ss  = FLAT_RING1_DS;
    5.56 -    ctxt->ring1_esp = virt_startinfo_addr;
    5.57 +    ctxt->guestos_ss  = FLAT_GUESTOS_DS;
    5.58 +    ctxt->guestos_esp = virt_startinfo_addr;
    5.59  
    5.60      /* No debugging. */
    5.61      memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
    5.62  
    5.63      /* No callback handlers. */
    5.64 -    ctxt->event_callback_cs     = FLAT_RING1_CS;
    5.65 +    ctxt->event_callback_cs     = FLAT_GUESTOS_CS;
    5.66      ctxt->event_callback_eip    = 0;
    5.67 -    ctxt->failsafe_callback_cs  = FLAT_RING1_CS;
    5.68 +    ctxt->failsafe_callback_cs  = FLAT_GUESTOS_CS;
    5.69      ctxt->failsafe_callback_eip = 0;
    5.70  
    5.71      launch_op.u.builddomain.domain   = (domid_t)domid;
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/xen/arch/i386/domain_page.c	Wed Mar 17 13:35:01 2004 +0000
     6.3 @@ -0,0 +1,79 @@
     6.4 +/******************************************************************************
     6.5 + * domain_page.h
     6.6 + * 
     6.7 + * Allow temporary mapping of domain pages. Based on ideas from the
     6.8 + * Linux PKMAP code -- the copyrights and credits are retained below.
     6.9 + */
    6.10 +
    6.11 +/*
    6.12 + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
    6.13 + *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
    6.14 + * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
    6.15 + */
    6.16 +
    6.17 +#include <xeno/config.h>
    6.18 +#include <xeno/sched.h>
    6.19 +#include <xeno/mm.h>
    6.20 +#include <xeno/perfc.h>
    6.21 +#include <asm/domain_page.h>
    6.22 +#include <asm/pgalloc.h>
    6.23 +
    6.24 +unsigned long *mapcache;
    6.25 +static unsigned int map_idx, shadow_map_idx[NR_CPUS];
    6.26 +static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
    6.27 +
    6.28 +/* Use a spare PTE bit to mark entries ready for recycling. */
    6.29 +#define READY_FOR_TLB_FLUSH (1<<10)
    6.30 +
    6.31 +static void flush_all_ready_maps(void)
    6.32 +{
    6.33 +    unsigned long *cache = mapcache;
    6.34 +
    6.35 +    /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
    6.36 +    do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
    6.37 +    while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
    6.38 +
    6.39 +    perfc_incrc(domain_page_tlb_flush);
    6.40 +    local_flush_tlb();
    6.41 +}
    6.42 +
    6.43 +
    6.44 +void *map_domain_mem(unsigned long pa)
    6.45 +{
    6.46 +    unsigned long va;
    6.47 +    unsigned int idx, cpu = smp_processor_id();
    6.48 +    unsigned long *cache = mapcache;
    6.49 +    unsigned long flags;
    6.50 +
    6.51 +    spin_lock_irqsave(&map_lock, flags);
    6.52 +
    6.53 +    /* Has some other CPU caused a wrap? We must flush if so. */
    6.54 +    if ( map_idx < shadow_map_idx[cpu] )
    6.55 +    {
    6.56 +        perfc_incrc(domain_page_tlb_flush);
    6.57 +        local_flush_tlb();
    6.58 +    }
    6.59 +
    6.60 +    for ( ; ; )
    6.61 +    {
    6.62 +        idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
    6.63 +        if ( idx == 0 ) flush_all_ready_maps();
    6.64 +        if ( cache[idx] == 0 ) break;
    6.65 +    }
    6.66 +
    6.67 +    cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
    6.68 +
    6.69 +    spin_unlock_irqrestore(&map_lock, flags);
    6.70 +
    6.71 +    shadow_map_idx[cpu] = idx;
    6.72 +
    6.73 +    va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
    6.74 +    return (void *)va;
    6.75 +}
    6.76 +
    6.77 +void unmap_domain_mem(void *va)
    6.78 +{
    6.79 +    unsigned int idx;
    6.80 +    idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
    6.81 +    mapcache[idx] |= READY_FOR_TLB_FLUSH;
    6.82 +}
     7.1 --- a/xen/arch/i386/mm.c	Tue Mar 16 12:37:49 2004 +0000
     7.2 +++ b/xen/arch/i386/mm.c	Wed Mar 17 13:35:01 2004 +0000
     7.3 @@ -131,8 +131,8 @@ long do_stack_switch(unsigned long ss, u
     7.4      if ( (ss & 3) == 0 )
     7.5          return -EPERM;
     7.6  
     7.7 -    current->thread.ss1  = ss;
     7.8 -    current->thread.esp1 = esp;
     7.9 +    current->thread.guestos_ss = ss;
    7.10 +    current->thread.guestos_sp = esp;
    7.11      t->ss1  = ss;
    7.12      t->esp1 = esp;
    7.13  
     8.1 --- a/xen/arch/i386/process.c	Tue Mar 16 12:37:49 2004 +0000
     8.2 +++ b/xen/arch/i386/process.c	Wed Mar 17 13:35:01 2004 +0000
     8.3 @@ -264,8 +264,8 @@ void switch_to(struct task_struct *prev_
     8.4          SET_FAST_TRAP(&next_p->thread);
     8.5  
     8.6          /* Switch the guest OS ring-1 stack. */
     8.7 -        tss->esp1 = next->esp1;
     8.8 -        tss->ss1  = next->ss1;
     8.9 +        tss->esp1 = next->guestos_sp;
    8.10 +        tss->ss1  = next->guestos_ss;
    8.11  
    8.12          /* Maybe switch the debug registers. */
    8.13          if ( next->debugreg[7] )
     9.1 --- a/xen/common/dom0_ops.c	Tue Mar 16 12:37:49 2004 +0000
     9.2 +++ b/xen/common/dom0_ops.c	Wed Mar 17 13:35:01 2004 +0000
     9.3 @@ -290,23 +290,25 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     9.4              {
     9.5                  rmb(); /* Ensure that we see saved register state. */
     9.6                  op->u.getdomaininfo.ctxt.flags = 0;
     9.7 -                memcpy(&op->u.getdomaininfo.ctxt.i386_ctxt, 
     9.8 +                memcpy(&op->u.getdomaininfo.ctxt.cpu_ctxt, 
     9.9                         &p->shared_info->execution_context,
    9.10                         sizeof(p->shared_info->execution_context));
    9.11                  if ( test_bit(PF_DONEFPUINIT, &p->flags) )
    9.12                      op->u.getdomaininfo.ctxt.flags |= ECF_I387_VALID;
    9.13 -                memcpy(&op->u.getdomaininfo.ctxt.i387_ctxt,
    9.14 +                memcpy(&op->u.getdomaininfo.ctxt.fpu_ctxt,
    9.15                         &p->thread.i387,
    9.16                         sizeof(p->thread.i387));
    9.17                  memcpy(&op->u.getdomaininfo.ctxt.trap_ctxt,
    9.18                         p->thread.traps,
    9.19                         sizeof(p->thread.traps));
    9.20 +#ifdef ARCH_HAS_FAST_TRAP
    9.21                  if ( (p->thread.fast_trap_desc.a == 0) &&
    9.22                       (p->thread.fast_trap_desc.b == 0) )
    9.23                      op->u.getdomaininfo.ctxt.fast_trap_idx = 0;
    9.24                  else
    9.25                      op->u.getdomaininfo.ctxt.fast_trap_idx = 
    9.26                          p->thread.fast_trap_idx;
    9.27 +#endif
    9.28                  op->u.getdomaininfo.ctxt.ldt_base = p->mm.ldt_base;
    9.29                  op->u.getdomaininfo.ctxt.ldt_ents = p->mm.ldt_ents;
    9.30                  op->u.getdomaininfo.ctxt.gdt_ents = 0;
    9.31 @@ -318,8 +320,8 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    9.32                      op->u.getdomaininfo.ctxt.gdt_ents = 
    9.33                          (GET_GDT_ENTRIES(p) + 1) >> 3;
    9.34                  }
    9.35 -                op->u.getdomaininfo.ctxt.ring1_ss  = p->thread.ss1;
    9.36 -                op->u.getdomaininfo.ctxt.ring1_esp = p->thread.esp1;
    9.37 +                op->u.getdomaininfo.ctxt.guestos_ss  = p->thread.guestos_ss;
    9.38 +                op->u.getdomaininfo.ctxt.guestos_esp = p->thread.guestos_sp;
    9.39                  op->u.getdomaininfo.ctxt.pt_base   = 
    9.40                      pagetable_val(p->mm.pagetable);
    9.41                  memcpy(op->u.getdomaininfo.ctxt.debugreg, 
    9.42 @@ -371,6 +373,12 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    9.43                  case PGT_l2_page_table:
    9.44                      op->u.getpageframeinfo.type = L2TAB;
    9.45                      break;
    9.46 +                case PGT_l3_page_table:
    9.47 +                    op->u.getpageframeinfo.type = L3TAB;
    9.48 +                    break;
    9.49 +                case PGT_l4_page_table:
    9.50 +                    op->u.getpageframeinfo.type = L4TAB;
    9.51 +                    break;
    9.52                  }
    9.53              }
    9.54              
    10.1 --- a/xen/common/domain.c	Tue Mar 16 12:37:49 2004 +0000
    10.2 +++ b/xen/common/domain.c	Wed Mar 17 13:35:01 2004 +0000
    10.3 @@ -19,13 +19,16 @@
    10.4  #include <xeno/vbd.h>
    10.5  #include <asm/i387.h>
    10.6  
    10.7 -/*
    10.8 - * NB. No ring-3 access in initial guestOS pagetables. Note that we allow
    10.9 - * ring-3 privileges in the page directories, so that the guestOS may later
   10.10 - * decide to share a 4MB region with applications.
   10.11 - */
   10.12 +#if !defined(CONFIG_X86_64BITMODE)
   10.13 +/* No ring-3 access in initial page tables. */
   10.14  #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
   10.15 +#else
   10.16 +/* Allow ring-3 access in long mode as guest cannot use ring 1. */
   10.17 +#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
   10.18 +#endif
   10.19  #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   10.20 +#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   10.21 +#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
   10.22  
   10.23  /* Both these structures are protected by the tasklist_lock. */
   10.24  rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
   10.25 @@ -426,20 +429,20 @@ void free_all_dom_mem(struct task_struct
   10.26              put_page(page);
   10.27  
   10.28          /*
   10.29 -         * Forcibly invalidate L2 tables at this point to break circular
   10.30 +         * Forcibly invalidate base page tables at this point to break circular
   10.31           * 'linear page table' references. This is okay because MMU structures
   10.32 -         * are not shared across domains and this domain is now dead. Thus L2
   10.33 +         * are not shared across domains and this domain is now dead. Thus base
   10.34           * tables are not in use so a non-zero count means circular reference.
   10.35           */
   10.36          y = page->type_and_flags;
   10.37          do {
   10.38              x = y;
   10.39              if ( likely((x & (PGT_type_mask|PGT_validated)) != 
   10.40 -                        (PGT_l2_page_table|PGT_validated)) )
   10.41 +                        (PGT_base_page_table|PGT_validated)) )
   10.42                  break;
   10.43              y = cmpxchg(&page->type_and_flags, x, x & ~PGT_validated);
   10.44              if ( likely(y == x) )
   10.45 -                free_page_type(page, PGT_l2_page_table);
   10.46 +                free_page_type(page, PGT_base_page_table);
   10.47          }
   10.48          while ( unlikely(y != x) );
   10.49  
   10.50 @@ -504,7 +507,7 @@ void release_task(struct task_struct *p)
   10.51   */
   10.52  int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *builddomain)
   10.53  {
   10.54 -    unsigned long phys_l2tab;
   10.55 +    unsigned long phys_basetab;
   10.56      int i;
   10.57  
   10.58      if ( test_bit(PF_CONSTRUCTED, &p->flags) )
   10.59 @@ -514,16 +517,18 @@ int final_setup_guestos(struct task_stru
   10.60      if ( builddomain->ctxt.flags & ECF_I387_VALID )
   10.61          set_bit(PF_DONEFPUINIT, &p->flags);
   10.62      memcpy(&p->shared_info->execution_context,
   10.63 -           &builddomain->ctxt.i386_ctxt,
   10.64 +           &builddomain->ctxt.cpu_ctxt,
   10.65             sizeof(p->shared_info->execution_context));
   10.66      memcpy(&p->thread.i387,
   10.67 -           &builddomain->ctxt.i387_ctxt,
   10.68 +           &builddomain->ctxt.fpu_ctxt,
   10.69             sizeof(p->thread.i387));
   10.70      memcpy(p->thread.traps,
   10.71             &builddomain->ctxt.trap_ctxt,
   10.72             sizeof(p->thread.traps));
   10.73 +#ifdef ARCH_HAS_FAST_TRAP
   10.74      SET_DEFAULT_FAST_TRAP(&p->thread);
   10.75      (void)set_fast_trap(p, builddomain->ctxt.fast_trap_idx);
   10.76 +#endif
   10.77      p->mm.ldt_base = builddomain->ctxt.ldt_base;
   10.78      p->mm.ldt_ents = builddomain->ctxt.ldt_ents;
   10.79      SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
   10.80 @@ -532,8 +537,8 @@ int final_setup_guestos(struct task_stru
   10.81          (void)set_gdt(p,
   10.82                        builddomain->ctxt.gdt_frames,
   10.83                        builddomain->ctxt.gdt_ents);
   10.84 -    p->thread.ss1  = builddomain->ctxt.ring1_ss;
   10.85 -    p->thread.esp1 = builddomain->ctxt.ring1_esp;
   10.86 +    p->thread.guestos_ss = builddomain->ctxt.guestos_ss;
   10.87 +    p->thread.guestos_sp = builddomain->ctxt.guestos_esp;
   10.88      for ( i = 0; i < 8; i++ )
   10.89          (void)set_debugreg(p, i, builddomain->ctxt.debugreg[i]);
   10.90      p->event_selector    = builddomain->ctxt.event_callback_cs;
   10.91 @@ -541,10 +546,10 @@ int final_setup_guestos(struct task_stru
   10.92      p->failsafe_selector = builddomain->ctxt.failsafe_callback_cs;
   10.93      p->failsafe_address  = builddomain->ctxt.failsafe_callback_eip;
   10.94      
   10.95 -    phys_l2tab = builddomain->ctxt.pt_base;
   10.96 -    p->mm.pagetable = mk_pagetable(phys_l2tab);
   10.97 -    get_page_and_type(&frame_table[phys_l2tab>>PAGE_SHIFT], p, 
   10.98 -                      PGT_l2_page_table);
   10.99 +    phys_basetab = builddomain->ctxt.pt_base;
  10.100 +    p->mm.pagetable = mk_pagetable(phys_basetab);
  10.101 +    get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], p, 
  10.102 +                      PGT_base_page_table);
  10.103  
  10.104      /* Set up the shared info structure. */
  10.105      update_dom_time(p->shared_info);
  10.106 @@ -620,6 +625,7 @@ int setup_guestos(struct task_struct *p,
  10.107      if ( strncmp(data_start, "XenoGues", 8) )
  10.108      {
  10.109          printk("DOM%llu: Invalid guest OS image\n", dom);
  10.110 +        unmap_domain_mem(data_start);
  10.111          return -1;
  10.112      }
  10.113  
  10.114 @@ -628,12 +634,14 @@ int setup_guestos(struct task_struct *p,
  10.115      {
  10.116          printk("DOM%llu: Guest OS load address not page-aligned (%08lx)\n",
  10.117                 dom, virt_load_address);
  10.118 +        unmap_domain_mem(data_start);
  10.119          return -1;
  10.120      }
  10.121  
  10.122      if ( alloc_new_dom_mem(p, params->memory_kb) )
  10.123      {
  10.124          printk("DOM%llu: Not enough memory --- reduce dom0_mem ??\n", dom);
  10.125 +        unmap_domain_mem(data_start);
  10.126          return -ENOMEM;
  10.127      }
  10.128  
  10.129 @@ -650,6 +658,7 @@ int setup_guestos(struct task_struct *p,
  10.130                 dom, data_len>>20,
  10.131                 (params->memory_kb)>>11,
  10.132                 (params->memory_kb)>>10);
  10.133 +        unmap_domain_mem(data_start);
  10.134          free_all_dom_mem(p);
  10.135          return -1;
  10.136      }
  10.137 @@ -664,11 +673,11 @@ int setup_guestos(struct task_struct *p,
  10.138       * We're basically forcing default RPLs to 1, so that our "what privilege
  10.139       * level are we returning to?" logic works.
  10.140       */
  10.141 -    p->failsafe_selector = FLAT_RING1_CS;
  10.142 -    p->event_selector    = FLAT_RING1_CS;
  10.143 -    p->thread.ss1        = FLAT_RING1_DS;
  10.144 +    p->failsafe_selector = FLAT_GUESTOS_CS;
  10.145 +    p->event_selector    = FLAT_GUESTOS_CS;
  10.146 +    p->thread.guestos_ss = FLAT_GUESTOS_DS;
  10.147      for ( i = 0; i < 256; i++ ) 
  10.148 -        p->thread.traps[i].cs = FLAT_RING1_CS;
  10.149 +        p->thread.traps[i].cs = FLAT_GUESTOS_CS;
  10.150  
  10.151      /*
  10.152       * WARNING: The new domain must have its 'processor' field
  10.153 @@ -770,11 +779,11 @@ int setup_guestos(struct task_struct *p,
  10.154  	src++;
  10.155  	if ( (((unsigned long)src) & (PAGE_SIZE-1)) == 0 )
  10.156          {
  10.157 -	    unmap_domain_mem( vsrc-1 );
  10.158 -	    vsrc = map_domain_mem( (unsigned long)src );
  10.159 +	    unmap_domain_mem(vsrc-1);
  10.160 +	    vsrc = map_domain_mem((unsigned long)src);
  10.161          }
  10.162      }
  10.163 -    unmap_domain_mem( vsrc );
  10.164 +    unmap_domain_mem(vsrc);
  10.165      
  10.166      /* Set up start info area. */
  10.167      memset(virt_startinfo_address, 0, sizeof(*virt_startinfo_address));
    11.1 --- a/xen/common/domain_page.c	Tue Mar 16 12:37:49 2004 +0000
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,79 +0,0 @@
    11.4 -/******************************************************************************
    11.5 - * domain_page.h
    11.6 - * 
    11.7 - * Allow temporary mapping of domain pages. Based on ideas from the
    11.8 - * Linux PKMAP code -- the copyrights and credits are retained below.
    11.9 - */
   11.10 -
   11.11 -/*
   11.12 - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
   11.13 - *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de *
   11.14 - * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
   11.15 - */
   11.16 -
   11.17 -#include <xeno/config.h>
   11.18 -#include <xeno/sched.h>
   11.19 -#include <xeno/mm.h>
   11.20 -#include <xeno/perfc.h>
   11.21 -#include <asm/domain_page.h>
   11.22 -#include <asm/pgalloc.h>
   11.23 -
   11.24 -unsigned long *mapcache;
   11.25 -static unsigned int map_idx, shadow_map_idx[NR_CPUS];
   11.26 -static spinlock_t map_lock = SPIN_LOCK_UNLOCKED;
   11.27 -
   11.28 -/* Use a spare PTE bit to mark entries ready for recycling. */
   11.29 -#define READY_FOR_TLB_FLUSH (1<<10)
   11.30 -
   11.31 -static void flush_all_ready_maps(void)
   11.32 -{
   11.33 -    unsigned long *cache = mapcache;
   11.34 -
   11.35 -    /* A bit skanky -- depends on having an aligned PAGE_SIZE set of PTEs. */
   11.36 -    do { if ( (*cache & READY_FOR_TLB_FLUSH) ) *cache = 0; }
   11.37 -    while ( ((unsigned long)(++cache) & ~PAGE_MASK) != 0 );
   11.38 -
   11.39 -    perfc_incrc(domain_page_tlb_flush);
   11.40 -    local_flush_tlb();
   11.41 -}
   11.42 -
   11.43 -
   11.44 -void *map_domain_mem(unsigned long pa)
   11.45 -{
   11.46 -    unsigned long va;
   11.47 -    unsigned int idx, cpu = smp_processor_id();
   11.48 -    unsigned long *cache = mapcache;
   11.49 -    unsigned long flags;
   11.50 -
   11.51 -    spin_lock_irqsave(&map_lock, flags);
   11.52 -
   11.53 -    /* Has some other CPU caused a wrap? We must flush if so. */
   11.54 -    if ( map_idx < shadow_map_idx[cpu] )
   11.55 -    {
   11.56 -        perfc_incrc(domain_page_tlb_flush);
   11.57 -        local_flush_tlb();
   11.58 -    }
   11.59 -
   11.60 -    for ( ; ; )
   11.61 -    {
   11.62 -        idx = map_idx = (map_idx + 1) & (MAPCACHE_ENTRIES - 1);
   11.63 -        if ( idx == 0 ) flush_all_ready_maps();
   11.64 -        if ( cache[idx] == 0 ) break;
   11.65 -    }
   11.66 -
   11.67 -    cache[idx] = (pa & PAGE_MASK) | __PAGE_HYPERVISOR;
   11.68 -
   11.69 -    spin_unlock_irqrestore(&map_lock, flags);
   11.70 -
   11.71 -    shadow_map_idx[cpu] = idx;
   11.72 -
   11.73 -    va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT) + (pa & ~PAGE_MASK);
   11.74 -    return (void *)va;
   11.75 -}
   11.76 -
   11.77 -void unmap_domain_mem(void *va)
   11.78 -{
   11.79 -    unsigned int idx;
   11.80 -    idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
   11.81 -    mapcache[idx] |= READY_FOR_TLB_FLUSH;
   11.82 -}
    12.1 --- a/xen/common/kernel.c	Tue Mar 16 12:37:49 2004 +0000
    12.2 +++ b/xen/common/kernel.c	Wed Mar 17 13:35:01 2004 +0000
    12.3 @@ -190,16 +190,22 @@ void cmain(unsigned long magic, multiboo
    12.4          for ( ; ; ) ;
    12.5      }
    12.6  
    12.7 +    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
    12.8 +
    12.9      /* The array of pfn_info structures must fit into the reserved area. */
   12.10 -    if ( sizeof(struct pfn_info) > 24 )
   12.11 +    if ( (sizeof(struct pfn_info) * max_page) > 
   12.12 +         (FRAMETABLE_VIRT_END - FRAMETABLE_VIRT_START) )
   12.13      {
   12.14 -        printk("'struct pfn_info' too large to fit in Xen address space!\n");
   12.15 -        for ( ; ; ) ;
   12.16 +        unsigned long new_max = 
   12.17 +            (FRAMETABLE_VIRT_END - FRAMETABLE_VIRT_START) /
   12.18 +            sizeof(struct pfn_info);
   12.19 +	printk("Truncating available memory to %lu/%luMB\n", 
   12.20 +               new_max >> (20 - PAGE_SHIFT), max_page >> (20 - PAGE_SHIFT));
   12.21 +	max_page = new_max;
   12.22      }
   12.23  
   12.24      set_current(&idle0_task);
   12.25  
   12.26 -    max_page = (mbi->mem_upper+1024) >> (PAGE_SHIFT - 10);
   12.27      init_frametable(max_page);
   12.28      printk("Initialised all memory on a %luMB machine\n",
   12.29             max_page >> (20-PAGE_SHIFT));
    13.1 --- a/xen/common/lib.c	Tue Mar 16 12:37:49 2004 +0000
    13.2 +++ b/xen/common/lib.c	Wed Mar 17 13:35:01 2004 +0000
    13.3 @@ -2,125 +2,6 @@
    13.4  #include <xeno/ctype.h>
    13.5  #include <xeno/lib.h>
    13.6  
    13.7 -#if 0 // jws - now in string.c, string.h, asm/string.h 
    13.8 -int memcmp(const void * cs,const void * ct,size_t count)
    13.9 -{
   13.10 -	const unsigned char *su1, *su2;
   13.11 -	signed char res = 0;
   13.12 -
   13.13 -	for( su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
   13.14 -		if ((res = *su1 - *su2) != 0)
   13.15 -			break;
   13.16 -	return res;
   13.17 -}
   13.18 -
   13.19 -void * memcpy(void * dest,const void *src,size_t count)
   13.20 -{
   13.21 -	char *tmp = (char *) dest, *s = (char *) src;
   13.22 -
   13.23 -	while (count--)
   13.24 -		*tmp++ = *s++;
   13.25 -
   13.26 -	return dest;
   13.27 -}
   13.28 -
   13.29 -int strncmp(const char * cs,const char * ct,size_t count)
   13.30 -{
   13.31 -	register signed char __res = 0;
   13.32 -
   13.33 -	while (count) {
   13.34 -		if ((__res = *cs - *ct++) != 0 || !*cs++)
   13.35 -			break;
   13.36 -		count--;
   13.37 -	}
   13.38 -
   13.39 -	return __res;
   13.40 -}
   13.41 -
   13.42 -int strcmp(const char * cs,const char * ct)
   13.43 -{
   13.44 -        register signed char __res;
   13.45 -
   13.46 -        while (1) {
   13.47 -                if ((__res = *cs - *ct++) != 0 || !*cs++)
   13.48 -                        break;
   13.49 -        }
   13.50 -
   13.51 -        return __res;
   13.52 -}
   13.53 -
   13.54 -char * strcpy(char * dest,const char *src)
   13.55 -{
   13.56 -        char *tmp = dest;
   13.57 -
   13.58 -        while ((*dest++ = *src++) != '\0')
   13.59 -                /* nothing */;
   13.60 -        return tmp;
   13.61 -}
   13.62 -
   13.63 -char * strncpy(char * dest,const char *src,size_t count)
   13.64 -{
   13.65 -        char *tmp = dest;
   13.66 -
   13.67 -        while (count-- && (*dest++ = *src++) != '\0')
   13.68 -                /* nothing */;
   13.69 -
   13.70 -        return tmp;
   13.71 -}
   13.72 -
   13.73 -void * memset(void * s,int c,size_t count)
   13.74 -{
   13.75 -        char *xs = (char *) s;
   13.76 -
   13.77 -        while (count--)
   13.78 -                *xs++ = c;
   13.79 -
   13.80 -        return s;
   13.81 -}
   13.82 -
   13.83 -size_t strnlen(const char * s, size_t count)
   13.84 -{
   13.85 -        const char *sc;
   13.86 -
   13.87 -        for (sc = s; count-- && *sc != '\0'; ++sc)
   13.88 -                /* nothing */;
   13.89 -        return sc - s;
   13.90 -}
   13.91 -
   13.92 -size_t strlen(const char * s)
   13.93 -{
   13.94 -	const char *sc;
   13.95 -
   13.96 -	for (sc = s; *sc != '\0'; ++sc)
   13.97 -		/* nothing */;
   13.98 -	return sc - s;
   13.99 -}
  13.100 -
  13.101 -char * strchr(const char * s, int c)
  13.102 -{
  13.103 -        for(; *s != (char) c; ++s)
  13.104 -                if (*s == '\0')
  13.105 -                        return NULL;
  13.106 -        return (char *) s;
  13.107 -}
  13.108 -
  13.109 -char * strstr(const char * s1,const char * s2)
  13.110 -{
  13.111 -        int l1, l2;
  13.112 -
  13.113 -        l2 = strlen(s2);
  13.114 -        if (!l2)
  13.115 -                return (char *) s1;
  13.116 -        l1 = strlen(s1);
  13.117 -        while (l1 >= l2) {
  13.118 -                l1--;
  13.119 -                if (!memcmp(s1,s2,l2))
  13.120 -                        return (char *) s1;
  13.121 -                s1++;
  13.122 -        }
  13.123 -        return NULL;
  13.124 -}
  13.125 -#endif
  13.126  
  13.127  /* for inc/ctype.h */
  13.128  unsigned char _ctype[] = {
  13.129 @@ -213,6 +94,7 @@ unsigned char *quad_to_str(unsigned long
  13.130  
  13.131  #include <asm/types.h>
  13.132  
  13.133 +#if BITS_PER_LONG == 32
  13.134  
  13.135  /*
  13.136   * Depending on the desired operation, we view a `long long' (aka quad_t) in
  13.137 @@ -526,7 +408,7 @@ u64
  13.138          return (__qdivrem(a, b, (u64 *)0));
  13.139  }
  13.140  
  13.141 -
  13.142 +#endif /* BITS_PER_LONG == 32 */
  13.143  
  13.144  
  13.145  /* HASH/RANDOMISATION FUNCTION
  13.146 @@ -535,8 +417,6 @@ u64
  13.147   * See http://burlteburtle.net/bob/hash/evahash.html 
  13.148   */
  13.149  
  13.150 -typedef unsigned long ub4;
  13.151 -
  13.152  #define mix(a,b,c)                                      \
  13.153      do {                                                \
  13.154          a -= b; a -= c; a ^= (c>>13);                   \
  13.155 @@ -550,9 +430,9 @@ typedef unsigned long ub4;
  13.156          c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
  13.157      } while ( 0 )
  13.158  
  13.159 -unsigned long hash(unsigned char *k, unsigned long len)
  13.160 +u32 hash(unsigned char *k, unsigned long len)
  13.161  {
  13.162 -    unsigned long a, b, c, l;
  13.163 +    u32 a, b, c, l;
  13.164  
  13.165      l = len;
  13.166      a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
  13.167 @@ -560,9 +440,9 @@ unsigned long hash(unsigned char *k, uns
  13.168  
  13.169      while ( l >= 12 )
  13.170      {
  13.171 -        a += (k[0] + ((ub4)k[1]<<8) + ((ub4)k[2]<<16)  + ((ub4)k[3]<<24));
  13.172 -        b += (k[4] + ((ub4)k[5]<<8) + ((ub4)k[6]<<16)  + ((ub4)k[7]<<24));
  13.173 -        c += (k[8] + ((ub4)k[9]<<8) + ((ub4)k[10]<<16) + ((ub4)k[11]<<24));
  13.174 +        a += (k[0] + ((u32)k[1]<<8) + ((u32)k[2]<<16)  + ((u32)k[3]<<24));
  13.175 +        b += (k[4] + ((u32)k[5]<<8) + ((u32)k[6]<<16)  + ((u32)k[7]<<24));
  13.176 +        c += (k[8] + ((u32)k[9]<<8) + ((u32)k[10]<<16) + ((u32)k[11]<<24));
  13.177          mix(a,b,c);
  13.178          k += 12; l -= 12;
  13.179      }
  13.180 @@ -570,17 +450,17 @@ unsigned long hash(unsigned char *k, uns
  13.181      c += len;
  13.182      switch ( l )
  13.183      {
  13.184 -    case 11: c+=((ub4)k[10]<<24);
  13.185 -    case 10: c+=((ub4)k[9]<<16);
  13.186 -    case 9 : c+=((ub4)k[8]<<8);
  13.187 +    case 11: c+=((u32)k[10]<<24);
  13.188 +    case 10: c+=((u32)k[9]<<16);
  13.189 +    case 9 : c+=((u32)k[8]<<8);
  13.190          /* the first byte of c is reserved for the length */
  13.191 -    case 8 : b+=((ub4)k[7]<<24);
  13.192 -    case 7 : b+=((ub4)k[6]<<16);
  13.193 -    case 6 : b+=((ub4)k[5]<<8);
  13.194 +    case 8 : b+=((u32)k[7]<<24);
  13.195 +    case 7 : b+=((u32)k[6]<<16);
  13.196 +    case 6 : b+=((u32)k[5]<<8);
  13.197      case 5 : b+=k[4];
  13.198 -    case 4 : a+=((ub4)k[3]<<24);
  13.199 -    case 3 : a+=((ub4)k[2]<<16);
  13.200 -    case 2 : a+=((ub4)k[1]<<8);
  13.201 +    case 4 : a+=((u32)k[3]<<24);
  13.202 +    case 3 : a+=((u32)k[2]<<16);
  13.203 +    case 2 : a+=((u32)k[1]<<8);
  13.204      case 1 : a+=k[0];
  13.205          /* case 0: nothing left to add */
  13.206      }
    14.1 --- a/xen/include/asm-i386/config.h	Tue Mar 16 12:37:49 2004 +0000
    14.2 +++ b/xen/include/asm-i386/config.h	Wed Mar 17 13:35:01 2004 +0000
    14.3 @@ -118,6 +118,8 @@
    14.4    SYMBOL_NAME_LABEL(name)
    14.5  #endif
    14.6  
    14.7 +#define PGT_base_page_table PGT_l2_page_table
    14.8 +
    14.9  #define barrier() __asm__ __volatile__("": : :"memory")
   14.10  
   14.11  #define __HYPERVISOR_CS 0x0808
    15.1 --- a/xen/include/asm-i386/processor.h	Tue Mar 16 12:37:49 2004 +0000
    15.2 +++ b/xen/include/asm-i386/processor.h	Wed Mar 17 13:35:01 2004 +0000
    15.3 @@ -354,7 +354,7 @@ struct tss_struct {
    15.4  };
    15.5  
    15.6  struct thread_struct {
    15.7 -    unsigned long esp1, ss1;
    15.8 +    unsigned long guestos_sp, guestos_ss;
    15.9  /* Hardware debugging registers */
   15.10      unsigned long	debugreg[8];  /* %%db0-7 debug registers */
   15.11  /* floating point info */
    16.1 --- a/xen/include/asm-i386/types.h	Tue Mar 16 12:37:49 2004 +0000
    16.2 +++ b/xen/include/asm-i386/types.h	Wed Mar 17 13:35:01 2004 +0000
    16.3 @@ -3,6 +3,8 @@
    16.4  
    16.5  typedef unsigned short umode_t;
    16.6  
    16.7 +typedef unsigned int size_t;
    16.8 +
    16.9  /*
   16.10   * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
   16.11   * header files exported to user space
    17.1 --- a/xen/include/asm-x86_64/atomic.h	Tue Mar 16 12:37:49 2004 +0000
    17.2 +++ b/xen/include/asm-x86_64/atomic.h	Wed Mar 17 13:35:01 2004 +0000
    17.3 @@ -1,5 +1,5 @@
    17.4 -#ifndef __ARCH_I386_ATOMIC__
    17.5 -#define __ARCH_I386_ATOMIC__
    17.6 +#ifndef __ARCH_X86_64_ATOMIC__
    17.7 +#define __ARCH_X86_64_ATOMIC__
    17.8  
    17.9  #include <xeno/config.h>
   17.10  
   17.11 @@ -202,4 +202,4 @@ static __inline__ int atomic_add_negativ
   17.12  #define smp_mb__before_atomic_inc()	barrier()
   17.13  #define smp_mb__after_atomic_inc()	barrier()
   17.14  
   17.15 -#endif
   17.16 +#endif /* __ARCH_X86_64_ATOMIC__ */
    18.1 --- a/xen/include/asm-x86_64/config.h	Tue Mar 16 12:37:49 2004 +0000
    18.2 +++ b/xen/include/asm-x86_64/config.h	Wed Mar 17 13:35:01 2004 +0000
    18.3 @@ -2,12 +2,14 @@
    18.4   * config.h
    18.5   * 
    18.6   * A Linux-style configuration list.
    18.7 + * 
    18.8   */
    18.9  
   18.10  #ifndef __XENO_X86_64_CONFIG_H__
   18.11  #define __XENO_X86_64_CONFIG_H__
   18.12  
   18.13 -#define CONFIG_X86 1
   18.14 +#define CONFIG_X86    1
   18.15 +#define CONFIG_X86_64BITMODE 1
   18.16  
   18.17  #define CONFIG_SMP 1
   18.18  #define CONFIG_X86_LOCAL_APIC 1
   18.19 @@ -53,63 +55,200 @@
   18.20  #define __cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES)))
   18.21  #define ____cacheline_aligned __cacheline_aligned
   18.22  
   18.23 -/*
   18.24 - * Virtual addresses beyond this are not modifiable by guest OSes. The
   18.25 - * machine->physical mapping table starts at this address, read-only.
   18.26 - */
   18.27 -#define HYPERVISOR_VIRT_START (0xFFFF800000000000ULL)
   18.28 -                                                                                                
   18.29 +#define PHYSICAL_ADDRESS_BITS 52
   18.30 +#define MAX_PHYSICAL_ADDRESS (1 << PHYSICAL_ADDRESS_BITS)
   18.31 +#define VIRTUAL_ADDRESS_BITS 48
   18.32 +#define XEN_PAGE_SIZE 4096
   18.33 +
   18.34 +#define PTE_SIZE 8
   18.35 +#define TOTAL_PTES (512ULL * 512 * 512 * 512)
   18.36 +
   18.37 +/* next PML4 from an _END address */
   18.38 +#define PML4_BITS 39
   18.39 +#define PML4_SPACE (1ULL << PML4_BITS)
   18.40 +
   18.41  /*
   18.42 - * Xen exists in the highest 2GB of address space for RIP-relative
   18.43 - * addressing
   18.44 + * Memory layout
   18.45 + *
   18.46 + *   0x0000000000000000 - 0x00007fffffffffff Guest & user apps (128TB)
   18.47 + *    (Only for 32-bit guests)
   18.48 + *    0x00000000fc000000 - 0x00000000fc3fffff Machine/Physical 32-bit shadow (4MB)
   18.49 + *    0x00000000fc400000 - 0x00000000feffffff IO remap for 32-bit guests (44MB)
   18.50 + *    0x00000000ff000000 - 0x00000000ff3fffff 32-bit PTE shadow (4MB)
   18.51 + *
   18.52 + *   0xffff800000000000	- 0xffff807fffffffff Linear page table (512GB)
   18.53 + *   0xffff808000000000 - 0xffff80ffffffffff Reserved for shadow page table (512GB)
   18.54 + *
   18.55 + *   0xffff810000000000 - 0xffff82ffffffffff Xen PML4 slots 
   18.56 + *    0xffff810000000000 - 0xffff81003fffffff Xen hypervisor virtual space (1GB)
   18.57 + *    0xffff810040000000 - 0xffff81807fffffff Per-domain mappings (1GB)
   18.58 + *    0xffff810080000000 - 0xffff81387fffffff R/O physical map (224GB)
   18.59 + *    0xffff813880000000 - 0xffff81707fffffff R/W physical map (224GB)
   18.60 + *    0xffff817080000000 - 0xffff82c07fffffff Frame table (1344GB) 
   18.61 + *    0xffff82c080000000 - 0xffff82c0bfffffff I/O remap space (1GB)
   18.62 + *    0xffff82c0c0000000 - 0xffff82ffffffffff (253GB)
   18.63 + *
   18.64 + *   0xffff830000000000 - 0xffff87ffffffffff RESERVED (5TB)
   18.65 + *
   18.66 + *   0xffff880000000000 - ...                Physical 1:1 direct mapping (112TB max)
   18.67 + *    0xffff880000000000 - 0xffff880001000000 Low memory DMA region (16M)
   18.68 + *
   18.69 + *   0xfffff80000000000 - 0xffffffffffffffff Reserved for guest (8TB)
   18.70 + * 
   18.71 + * The requirement that we have a 1:1 map of physical memory limits
   18.72 + * the maximum memory size we can support.  With only 48 virtual address
   18.73 + * bits, and the assumption that guests will run users in positive address
   18.74 + * space, a contiguous 1:1 map can only live in the negative address space.
   18.75 + * Since we don't want to bump guests out of the very top of memory and
   18.76 + * force relocation, we can't use this entire space, and Xen has several
   18.77 + * heavy mapping that require PML4 slices.  Just to be safe, we reserve
   18.78 + * 16 PML4s each for Xen and the guest.  224 PML4s give us 112 terabytes
   18.79 + * of addressable memory.  Any high device physical addresses beyond this
   18.80 + * region can be mapped into the IO remap space or some of the reserved 
   18.81 + * 6TB region.
   18.82 + * 
   18.83 + * 112 TB is just 16 TB shy of the maximum physical memory supported
   18.84 + * on Linux 2.6.0, and should be enough for anybody.
   18.85 + *
   18.86 + * There are some additional constraints in the memory layout that require
   18.87 + * several changes from the i386 architecture.
   18.88 + *
   18.89 + * ACPI data and ACPI non-volatile storage must be placed in some region
   18.90 + * of memory below the 4GB mark.  Depending on the BIOS and system, we
   18.91 + * may have this located as low as 1GB.  This means allocating large
   18.92 + * chunks of physically contiguous memory from the direct mapping may not
   18.93 + * be possible. 
   18.94 + *
   18.95 + * The full frame table for 112TB of physical memory currently occupies
   18.96 + * 1344GB space.  This clearly can not be allocated in physically contiguous
   18.97 + * space, so it must be moved to a virtual address.
   18.98 + *
   18.99 + * Both copies of the machine->physical table must also be relocated.  
  18.100 + * (112 TB / 4k) * 8 bytes means that each copy of the physical map requires
  18.101 + * 224GB of space, thus it also must move to VM space.
  18.102 + *
  18.103 + * The physical pages used to allocate the page tables for the direct 1:1
  18.104 + * map may occupy (112TB / 2M) * 8 bytes = 448MB.  This is almost guaranteed
  18.105 + * to fit in contiguous physical memory, but these pages used to be allocated
  18.106 + * in the Xen monitor address space.  This means the Xen address space must
  18.107 + * accomodate up to ~500 MB, which means it also must move out of the
  18.108 + * direct mapped region. 
  18.109 + *
  18.110 + * Since both copies of the MPT, the frame table, and Xen now exist in
  18.111 + * purely virtual space, we have the added advantage of being able to
  18.112 + * map them to local pages on NUMA machines, or use NUMA aware memory
  18.113 + * allocation within Xen itself.
  18.114 + *
  18.115 + * Additionally, the 1:1 page table now exists contiguously in virtual
  18.116 + * space, but may be mapped to physically separated pages, allowing
  18.117 + * each node to contain the page tables for its own local memory.  Setting
  18.118 + * up this mapping presents a bit of a chicken-egg problem, but is possible
  18.119 + * as a future enhancement. 
  18.120 + *
  18.121 + * Zachary Amsden (zamsden@cisco.com)
  18.122 + *
  18.123   */
  18.124 -#define XEN_VIRT_START        (0xFFFFFFFF80000000ULL)
  18.125 -                                                                                                
  18.126 +
  18.127 +/* Guest and user space */
  18.128 +#define NSPACE_VIRT_START	0
  18.129 +#define NSPACE_VIRT_END		(1ULL << (VIRTUAL_ADDRESS_BITS - 1))
  18.130 +
  18.131 +/* Priviledged space */
  18.132 +#define ESPACE_VIRT_END		0
  18.133 +#define ESPACE_VIRT_START	(ESPACE_VIRT_END-(1ULL << (VIRTUAL_ADDRESS_BITS-1)))
  18.134 +
  18.135 +/* reservations in e-space */
  18.136 +#define GUEST_RESERVED_PML4S 16
  18.137 +#define XEN_RESERVED_PML4S 16
  18.138 +
  18.139 +#define MAX_MEMORY_SIZE ((1ULL << (VIRTUAL_ADDRESS_BITS-1)) \
  18.140 +			-((GUEST_RESERVED_PML4S + XEN_RESERVED_PML4S) * PML4_SPACE))
  18.141 +#define MAX_MEMORY_FRAMES (MAX_MEMORY_SIZE / XEN_PAGE_SIZE)
  18.142 +
  18.143  /*
  18.144 - * First 4MB are mapped read-only for all. It's for the machine->physical
  18.145 + * Virtual addresses beyond this are not modifiable by guest OSes. 
  18.146 + */
  18.147 +#define HYPERVISOR_VIRT_START ESPACE_VIRT_START
  18.148 +#define HYPERVISOR_VIRT_END   (ESPACE_VIRT_END-(GUEST_RESERVED_PML4S * PML4_SPACE))
  18.149 +
  18.150 +/* First 512GB of virtual address space is used as a linear p.t. mapping. */
  18.151 +#define LINEAR_PT_VIRT_START  (HYPERVISOR_VIRT_START)
  18.152 +#define LINEAR_PT_VIRT_END    (LINEAR_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
  18.153 +
  18.154 +/* Reserve some space for a shadow PT mapping */
  18.155 +#define SHADOW_PT_VIRT_START  (LINEAR_PT_VIRT_END)
  18.156 +#define SHADOW_PT_VIRT_END    (SHADOW_PT_VIRT_START + (PTE_SIZE * TOTAL_PTES))
  18.157 +
  18.158 +/* Xen exists in the first 1GB of the next PML4 space */
  18.159 +#define MAX_MONITOR_ADDRESS   (1 * 1024 * 1024 * 1024)
  18.160 +#define MONITOR_VIRT_START    (SHADOW_PT_VIRT_END)
  18.161 +#define MONITOR_VIRT_END      (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
  18.162 +
  18.163 +/* Next 1GB of virtual address space used for per-domain mappings (eg. GDT). */
  18.164 +#define PERDOMAIN_VIRT_START  (MONITOR_VIRT_END)
  18.165 +#define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (512 * 512 * 4096))
  18.166 +#define GDT_VIRT_START        (PERDOMAIN_VIRT_START)
  18.167 +#define GDT_VIRT_END          (GDT_VIRT_START + (128*1024))
  18.168 +#define LDT_VIRT_START        (GDT_VIRT_END)
  18.169 +#define LDT_VIRT_END          (LDT_VIRT_START + (128*1024))
  18.170 +
  18.171 +/*
  18.172 + * First set of MPTs are mapped read-only for all. It's for the machine->physical
  18.173   * mapping table (MPT table). The following are virtual addresses.
  18.174   */
  18.175 -#define READONLY_MPT_VIRT_START (HYPERVISOR_VIRT_START)
  18.176 -#define READONLY_MPT_VIRT_END   (READONLY_MPT_VIRT_START + (4*1024*1024))
  18.177 -/*
  18.178 - * Next 16MB is fixed monitor space, which is part of a 44MB direct-mapped
  18.179 - * memory region. The following are machine addresses.
  18.180 - */
  18.181 -#define MAX_MONITOR_ADDRESS   (16*1024*1024)
  18.182 -#define MAX_DMA_ADDRESS       (16*1024*1024)
  18.183 -#define MAX_DIRECTMAP_ADDRESS (44*1024*1024)
  18.184 -/* And the virtual addresses for the direct-map region... */
  18.185 -#define DIRECTMAP_VIRT_START  (READONLY_MPT_VIRT_END)
  18.186 -#define DIRECTMAP_VIRT_END    (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
  18.187 -#define MONITOR_VIRT_START    (DIRECTMAP_VIRT_START)
  18.188 -#define MONITOR_VIRT_END      (MONITOR_VIRT_START + MAX_MONITOR_ADDRESS)
  18.189 -#define RDWR_MPT_VIRT_START   (MONITOR_VIRT_END)
  18.190 -#define RDWR_MPT_VIRT_END     (RDWR_MPT_VIRT_START + (4*1024*1024))
  18.191 +#define READONLY_MPT_VIRT_START (PERDOMAIN_VIRT_END)
  18.192 +#define READONLY_MPT_VIRT_END   (READONLY_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
  18.193 +
  18.194 +/* R/W machine->physical table */
  18.195 +#define RDWR_MPT_VIRT_START   (READONLY_MPT_VIRT_END)
  18.196 +#define RDWR_MPT_VIRT_END     (RDWR_MPT_VIRT_START + (PTE_SIZE * MAX_MEMORY_FRAMES))
  18.197 +
  18.198 +/* Frame table */
  18.199 +#define FRAMETABLE_ENTRY_SIZE	(48)
  18.200  #define FRAMETABLE_VIRT_START (RDWR_MPT_VIRT_END)
  18.201 -#define FRAMETABLE_VIRT_END   (DIRECTMAP_VIRT_END)
  18.202 -/* Next 4MB of virtual address space is used as a linear p.t. mapping. */
  18.203 -#define LINEAR_PT_VIRT_START  (DIRECTMAP_VIRT_END)
  18.204 -#define LINEAR_PT_VIRT_END    (LINEAR_PT_VIRT_START + (4*1024*1024))
  18.205 -/* Next 4MB of virtual address space used for per-domain mappings (eg. GDT). */
  18.206 -#define PERDOMAIN_VIRT_START  (LINEAR_PT_VIRT_END)
  18.207 -#define PERDOMAIN_VIRT_END    (PERDOMAIN_VIRT_START + (4*1024*1024))
  18.208 -#define GDT_VIRT_START        (PERDOMAIN_VIRT_START)
  18.209 -#define GDT_VIRT_END          (GDT_VIRT_START + (64*1024))
  18.210 -#define LDT_VIRT_START        (GDT_VIRT_END)
  18.211 -#define LDT_VIRT_END          (LDT_VIRT_START + (64*1024))
  18.212 -/* Penultimate 4MB of virtual address space used for domain page mappings. */
  18.213 -#define MAPCACHE_VIRT_START   (PERDOMAIN_VIRT_END)
  18.214 -#define MAPCACHE_VIRT_END     (MAPCACHE_VIRT_START + (4*1024*1024))
  18.215 -/* Final 4MB of virtual address space used for ioremap(). */
  18.216 -#define IOREMAP_VIRT_START    (MAPCACHE_VIRT_END)
  18.217 -#define IOREMAP_VIRT_END      (IOREMAP_VIRT_START + (4*1024*1024))
  18.218 +#define FRAMETABLE_VIRT_END   (FRAMETABLE_VIRT_START + (FRAMETABLE_ENTRY_SIZE * MAX_MEMORY_FRAMES))
  18.219 +
  18.220 +/* Next 1GB of virtual address space used for ioremap(). */
  18.221 +#define IOREMAP_VIRT_START    (FRAMETABLE_VIRT_END)
  18.222 +#define IOREMAP_VIRT_END      (IOREMAP_VIRT_START + (512 * 512 * 4096))
  18.223 +
  18.224 +/* And the virtual addresses for the direct-map region... */
  18.225 +#define DIRECTMAP_VIRT_START  (ESPACE_VIRT_START + (XEN_RESERVED_PML4S * PML4_SPACE))
  18.226 +#define DIRECTMAP_VIRT_END    (DIRECTMAP_VIRT_START + MAX_DIRECTMAP_ADDRESS)
  18.227  
  18.228  /*
  18.229 - * Amount of slack domain memory to leave in system, in megabytes.
  18.230 + * Next is the direct-mapped memory region. The following are machine addresses.
  18.231 + */
  18.232 +#define MAX_DMA_ADDRESS       (16*1024*1024)
  18.233 +#define MAX_DIRECTMAP_ADDRESS MAX_MEMORY_SIZE
  18.234 +
  18.235 +
  18.236 +
  18.237 +/*
  18.238 + * Amount of slack domain memory to leave in system, in kilobytes.
  18.239   * Prevents a hard out-of-memory crunch for thinsg like network receive.
  18.240   */
  18.241  #define SLACK_DOMAIN_MEM_KILOBYTES 2048
  18.242  
  18.243 +
  18.244 +/*
  18.245 + * These will probably change in the future..
  18.246 + * locations for 32-bit guest compatibility mappings
  18.247 + */
  18.248 +
  18.249 +/* 4M of 32-bit machine-physical shadow in low 4G of VM space */
  18.250 +#define SHADOW_MPT32_VIRT_START (0xfc000000)
  18.251 +#define SHADOW_MPT32_VIRT_END   (SHADOW_MPT32_VIRT_START + (4 * 1024 * 1024))
  18.252 +
  18.253 +/* 44M of I/O remap for 32-bit drivers */
  18.254 +#define IOREMAP_LOW_VIRT_START (SHADOW_MPT32_VIRT_END)
  18.255 +#define IOREMAP_LOW_VIRT_END   (IOREMAP_LOW_VIRT_START + (44 * 1024 * 1024))
  18.256 +
  18.257 +/* 4M of 32-bit page table */
  18.258 +#define SHADOW_PT32_VIRT_START (IOREMAP_LOW_VIRT_END)
  18.259 +#define SHADOW_PT32_VIRT_END   (SHADOW_PT32_VIRT_START + (4 * 1024 * 1024))
  18.260 +
  18.261 +
  18.262  /* Linkage for x86 */
  18.263  #define FASTCALL(x)     x __attribute__((regparm(3)))
  18.264  #define asmlinkage        __attribute__((regparm(0)))
  18.265 @@ -127,6 +266,8 @@
  18.266    SYMBOL_NAME_LABEL(name)
  18.267  #endif
  18.268  
  18.269 +#define PGT_base_page_table PGT_l4_page_table
  18.270 +
  18.271  #define barrier() __asm__ __volatile__("": : :"memory")
  18.272  
  18.273  /*
    19.1 --- a/xen/include/asm-x86_64/current.h	Tue Mar 16 12:37:49 2004 +0000
    19.2 +++ b/xen/include/asm-x86_64/current.h	Wed Mar 17 13:35:01 2004 +0000
    19.3 @@ -26,7 +26,7 @@ static inline void set_current(struct ta
    19.4  static inline execution_context_t *get_execution_context(void)
    19.5  {
    19.6      execution_context_t *execution_context;
    19.7 -    __asm__( "andq %%rsp,%0; addl %2,%0"
    19.8 +    __asm__( "andq %%rsp,%0; addq %2,%0"
    19.9  	    : "=r" (execution_context)
   19.10  	    : "0" (~(STACK_SIZE-1)), "i" (STACK_SIZE-STACK_RESERVED) ); 
   19.11      return execution_context;
   19.12 @@ -42,7 +42,7 @@ static inline unsigned long get_stack_to
   19.13  
   19.14  #define schedule_tail(_p)                                         \
   19.15      __asm__ __volatile__ (                                        \
   19.16 -        "andq %%rsp,%0; addq %2,%0; movl %0,%%rsp; jmp *%1"       \
   19.17 +        "andq %%rsp,%0; addq %2,%0; movq %0,%%rsp; jmp *%1"       \
   19.18          : : "r" (~(STACK_SIZE-1)),                                \
   19.19              "r" (unlikely(is_idle_task((_p))) ?                   \
   19.20                                  continue_cpu_idle_loop :          \
    20.1 --- a/xen/include/asm-x86_64/desc.h	Tue Mar 16 12:37:49 2004 +0000
    20.2 +++ b/xen/include/asm-x86_64/desc.h	Wed Mar 17 13:35:01 2004 +0000
    20.3 @@ -8,7 +8,7 @@
    20.4  #define __FIRST_PER_CPU_ENTRY (FIRST_RESERVED_GDT_ENTRY + 8)
    20.5  
    20.6  #define __CPU_DESC_INDEX(x,field) \
    20.7 -	((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (FIRST_PER_CPU_ENTRY*8))
    20.8 +	((x) * sizeof(struct per_cpu_gdt) + offsetof(struct per_cpu_gdt, field) + (__FIRST_PER_CPU_ENTRY*8))
    20.9  #define __LDT(n) (((n)<<1) + __FIRST_LDT_ENTRY)
   20.10  
   20.11  #define load_TR(cpu) asm volatile("ltr %w0"::"r" (__CPU_DESC_INDEX(cpu, tss)));
    21.1 --- a/xen/include/asm-x86_64/io.h	Tue Mar 16 12:37:49 2004 +0000
    21.2 +++ b/xen/include/asm-x86_64/io.h	Wed Mar 17 13:35:01 2004 +0000
    21.3 @@ -2,6 +2,7 @@
    21.4  #define _ASM_IO_H
    21.5  
    21.6  #include <xeno/config.h>
    21.7 +#include <asm/page.h>
    21.8  
    21.9  /*
   21.10   * This file contains the definitions for the x86 IO instructions
   21.11 @@ -139,9 +140,12 @@ extern inline void * phys_to_virt(unsign
   21.12  #ifdef CONFIG_DISCONTIGMEM
   21.13  #include <asm/mmzone.h>
   21.14  #else
   21.15 -#define page_to_phys(page)	(((page) - frame_table) << PAGE_SHIFT)
   21.16 +#define page_to_phys(page)      (((page) - frame_table) << PAGE_SHIFT)
   21.17  #endif
   21.18  
   21.19 +#define page_to_pfn(page)       ((unsigned long)((_page) - frame_table))
   21.20 +#define page_to_virt(page)      (phys_to_virt(page_to_phys(_page)))
   21.21 +
   21.22  extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
   21.23  
   21.24  extern inline void * ioremap (unsigned long offset, unsigned long size)
    22.1 --- a/xen/include/asm-x86_64/ldt.h	Tue Mar 16 12:37:49 2004 +0000
    22.2 +++ b/xen/include/asm-x86_64/ldt.h	Wed Mar 17 13:35:01 2004 +0000
    22.3 @@ -9,7 +9,7 @@ static inline void load_LDT(struct task_
    22.4  
    22.5      if ( (ents = p->mm.ldt_ents) == 0 )
    22.6      {
    22.7 -        __asm__ __volatile__ ( "lldt %%rax" : : "a" (0) );
    22.8 +        __asm__ __volatile__ ( "lldt %w0" : : "r" (0) );
    22.9      }
   22.10      else
   22.11      {
   22.12 @@ -17,17 +17,17 @@ static inline void load_LDT(struct task_
   22.13  	struct ldttss_desc *desc;
   22.14  
   22.15          cpu = smp_processor_id();
   22.16 -        desc = (struct desc_struct *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt));
   22.17 +        desc = (struct ldttss_desc *)((char *)GET_GDT_ADDRESS(p) + __CPU_DESC_INDEX(cpu, ldt));
   22.18  	desc->limit0 = ents*8-1;
   22.19  	desc->base0 = LDT_VIRT_START&0xffff;
   22.20  	desc->base1 = (LDT_VIRT_START&0xff0000)>>16;
   22.21  	desc->type = DESC_LDT;
   22.22  	desc->dpl = 0;
   22.23  	desc->p = 1;
   22.24 -	desc->limit = 0;
   22.25 +	desc->limit1 = 0;
   22.26  	desc->zero0 = 0;
   22.27  	desc->g = 0;
   22.28 -	desc->base2 = (LDT_VIRST_START&0xff000000)>>24;
   22.29 +	desc->base2 = (LDT_VIRT_START&0xff000000)>>24;
   22.30  	desc->base3 = LDT_VIRT_START>>32;
   22.31  	desc->zero1 = 0;
   22.32  	__load_LDT(cpu);
    23.1 --- a/xen/include/asm-x86_64/page.h	Tue Mar 16 12:37:49 2004 +0000
    23.2 +++ b/xen/include/asm-x86_64/page.h	Wed Mar 17 13:35:01 2004 +0000
    23.3 @@ -265,7 +265,6 @@ struct bug_frame {
    23.4  #ifndef CONFIG_DISCONTIGMEM
    23.5  #define virt_to_page(kaddr)	(frame_table + (__pa(kaddr) >> PAGE_SHIFT))
    23.6  #define pfn_to_page(pfn)	(frame_table + (pfn)) 
    23.7 -#define page_to_pfn(page)   ((page) - frame_table)
    23.8  #define page_address(_p)        (__va(((_p) - frame_table) << PAGE_SHIFT))
    23.9  #define VALID_PAGE(page)	(((page) - frame_table) < max_mapnr)
   23.10  #endif
    24.1 --- a/xen/include/asm-x86_64/pci.h	Tue Mar 16 12:37:49 2004 +0000
    24.2 +++ b/xen/include/asm-x86_64/pci.h	Wed Mar 17 13:35:01 2004 +0000
    24.3 @@ -27,10 +27,8 @@ int pcibios_set_irq_routing(struct pci_d
    24.4  #include <xeno/types.h>
    24.5  #include <xeno/slab.h>
    24.6  #include <asm/scatterlist.h>
    24.7 -/*#include <xeno/string.h>*/
    24.8  #include <asm/io.h>
    24.9  #include <asm/page.h>
   24.10 -#include <asm/mmzone.h>
   24.11  
   24.12  struct pci_dev;
   24.13  extern int force_mmu;
   24.14 @@ -96,14 +94,16 @@ static inline void pci_dma_sync_single(s
   24.15  				       dma_addr_t dma_handle,
   24.16  				       size_t size, int direction)
   24.17  {
   24.18 -	BUG_ON(direction == PCI_DMA_NONE); 
   24.19 +	if (direction == PCI_DMA_NONE)
   24.20 +		out_of_line_bug();
   24.21  } 
   24.22  
   24.23  static inline void pci_dma_sync_sg(struct pci_dev *hwdev, 
   24.24  				   struct scatterlist *sg,
   24.25  				   int nelems, int direction)
   24.26  { 
   24.27 -	BUG_ON(direction == PCI_DMA_NONE); 
   24.28 +	if (direction == PCI_DMA_NONE)
   24.29 +		out_of_line_bug();
   24.30  } 
   24.31  
   24.32  /* The PCI address space does equal the physical memory
   24.33 @@ -162,6 +162,19 @@ static inline dma_addr_t pci_map_page(st
   24.34  
   24.35  #define BAD_DMA_ADDRESS (-1UL)
   24.36  
   24.37 +
   24.38 +/* Unmap a set of streaming mode DMA translations.
   24.39 + * Again, cpu read rules concerning calls here are the same as for
   24.40 + * pci_unmap_single() above.
   24.41 + */
   24.42 +static inline void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg,
   24.43 +                                  int nents, int dir)
   24.44 +{
   24.45 +	if (dir == PCI_DMA_NONE)
   24.46 +		out_of_line_bug();
   24.47 +}
   24.48 +	
   24.49 +
   24.50  /* Map a set of buffers described by scatterlist in streaming
   24.51   * mode for DMA.  This is the scather-gather version of the
   24.52   * above pci_map_single interface.  Here the scatter gather list
   24.53 @@ -181,18 +194,20 @@ static inline int pci_map_sg(struct pci_
   24.54  			     int nents, int direction)
   24.55  {
   24.56  	int i;
   24.57 -											   
   24.58 -	BUG_ON(direction == PCI_DMA_NONE);
   24.59 -											   
   24.60 +
   24.61 +	if (direction == PCI_DMA_NONE)
   24.62 +		out_of_line_bug();
   24.63 +
   24.64  	/*
   24.65  	 * temporary 2.4 hack
   24.66  	 */
   24.67  	for (i = 0; i < nents; i++ ) {
   24.68  		struct scatterlist *s = &sg[i];
   24.69  		void *addr = s->address;
   24.70 -		if (addr)
   24.71 -			BUG_ON(s->page || s->offset);
   24.72 -		else if (s->page)
   24.73 +		if (addr) {
   24.74 +			if (s->page || s->offset)
   24.75 +				out_of_line_bug();
   24.76 +		} else if (s->page)
   24.77  			addr = page_address(s->page) + s->offset;
   24.78  #if 0
   24.79  		/* Invalid check, since address==0 is valid. */
   24.80 @@ -209,17 +224,6 @@ static inline int pci_map_sg(struct pci_
   24.81  	pci_unmap_sg(hwdev, sg, i, direction);
   24.82  	return 0;
   24.83  }
   24.84 -											   
   24.85 -/* Unmap a set of streaming mode DMA translations.
   24.86 - * Again, cpu read rules concerning calls here are the same as for
   24.87 - * pci_unmap_single() above.
   24.88 - */
   24.89 -static inline void pci_unmap_sg(struct pci_dev *dev, struct scatterlist *sg,
   24.90 -                                  int nents, int dir)
   24.91 -{
   24.92 -	if (direction == PCI_DMA_NONE)
   24.93 -		out_of_line_bug();
   24.94 -}
   24.95  
   24.96  	
   24.97  /* Make physical memory consistent for a single
   24.98 @@ -259,11 +263,6 @@ static inline void pci_dma_sync_sg(struc
   24.99  
  24.100  #endif
  24.101  
  24.102 -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg,
  24.103 -		      int nents, int direction);
  24.104 -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg,
  24.105 -			 int nents, int direction);
  24.106 -
  24.107  #define pci_unmap_page pci_unmap_single
  24.108  
  24.109  /* Return whether the given PCI device DMA address mask can
  24.110 @@ -297,6 +296,7 @@ pci_dac_page_to_dma(struct pci_dev *pdev
  24.111  static __inline__ struct pfn_info *
  24.112  pci_dac_dma_to_page(struct pci_dev *pdev, dma64_addr_t dma_addr)
  24.113  {
  24.114 +	unsigned long poff = (dma_addr >> PAGE_SHIFT);
  24.115  	return frame_table + poff;
  24.116  }
  24.117  
    25.1 --- a/xen/include/asm-x86_64/processor.h	Tue Mar 16 12:37:49 2004 +0000
    25.2 +++ b/xen/include/asm-x86_64/processor.h	Wed Mar 17 13:35:01 2004 +0000
    25.3 @@ -340,7 +340,8 @@ struct tss_struct {
    25.4  } __attribute__((packed)) ____cacheline_aligned;
    25.5  
    25.6  struct thread_struct {
    25.7 -	unsigned long	rsp0;
    25.8 +	unsigned long	guestos_sp;
    25.9 +	unsigned long	guestos_ss;
   25.10  	unsigned long	rip;
   25.11  	unsigned long	rsp;
   25.12  	unsigned long 	userrsp;	/* Copy from PDA */ 
    26.1 --- a/xen/include/asm-x86_64/types.h	Tue Mar 16 12:37:49 2004 +0000
    26.2 +++ b/xen/include/asm-x86_64/types.h	Wed Mar 17 13:35:01 2004 +0000
    26.3 @@ -3,6 +3,8 @@
    26.4  
    26.5  typedef unsigned short umode_t;
    26.6  
    26.7 +typedef unsigned long size_t;
    26.8 +
    26.9  /*
   26.10   * __xx is ok: it doesn't pollute the POSIX namespace. Use these in the
   26.11   * header files exported to user space
    27.1 --- a/xen/include/asm-x86_64/uaccess.h	Tue Mar 16 12:37:49 2004 +0000
    27.2 +++ b/xen/include/asm-x86_64/uaccess.h	Wed Mar 17 13:35:01 2004 +0000
    27.3 @@ -103,11 +103,11 @@ extern void __get_user_8(void);
    27.4  ({	long __val_gu;							\
    27.5  	int __ret_gu=1;							\
    27.6  	switch(sizeof (*(ptr))) {					\
    27.7 -	case 1:  _ret_gu=copy_from_user(&__val_gu,ptr,1);break;		\
    27.8 -	case 2:  _ret_gu=copy_from_user(&__val_gu,ptr,2);break;		\
    27.9 -	case 4:  _ret_gu=copy_from_user(&__val_gu,ptr,4);break;		\
   27.10 -	case 8:  _ret_gu=copy_from_user(&__val_gu,ptr,8);break;		\
   27.11 -	default: _ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\
   27.12 ++	case 1:  __ret_gu=copy_from_user(&__val_gu,ptr,1);break;		\
   27.13 ++	case 2:  __ret_gu=copy_from_user(&__val_gu,ptr,2);break;		\
   27.14 ++	case 4:  __ret_gu=copy_from_user(&__val_gu,ptr,4);break;		\
   27.15 ++	case 8:  __ret_gu=copy_from_user(&__val_gu,ptr,8);break;		\
   27.16 ++	default: __ret_gu=copy_from_user(&__val_gu,ptr,sizeof(*(ptr)));break;\
   27.17  	/*case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;*/	\
   27.18  	/*case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;*/	\
   27.19  	/*case 4:  __get_user_x(4,__ret_gu,__val_gu,ptr); break;*/	\
    28.1 --- a/xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h	Tue Mar 16 12:37:49 2004 +0000
    28.2 +++ b/xen/include/hypervisor-ifs/arch-i386/hypervisor-if.h	Wed Mar 17 13:35:01 2004 +0000
    28.3 @@ -35,6 +35,11 @@
    28.4  #define FLAT_RING3_CS 0x082b    /* GDT index 261 */
    28.5  #define FLAT_RING3_DS 0x0833    /* GDT index 262 */
    28.6  
    28.7 +#define FLAT_GUESTOS_CS FLAT_RING1_CS
    28.8 +#define FLAT_GUESTOS_DS FLAT_RING1_DS
    28.9 +#define FLAT_USER_CS    FLAT_RING3_CS
   28.10 +#define FLAT_USER_DS    FLAT_RING3_DS
   28.11 +
   28.12  /* And the trap vector is... */
   28.13  #define TRAP_INSTR "int $0x82"
   28.14  
   28.15 @@ -99,13 +104,13 @@ typedef struct full_execution_context_st
   28.16  {
   28.17  #define ECF_I387_VALID (1<<0)
   28.18      unsigned long flags;
   28.19 -    execution_context_t i386_ctxt;          /* User-level CPU registers     */
   28.20 -    char          i387_ctxt[256];           /* User-level FPU registers     */
   28.21 +    execution_context_t cpu_ctxt;           /* User-level CPU registers     */
   28.22 +    char          fpu_ctxt[256];            /* User-level FPU registers     */
   28.23      trap_info_t   trap_ctxt[256];           /* Virtual IDT                  */
   28.24      unsigned int  fast_trap_idx;            /* "Fast trap" vector offset    */
   28.25      unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
   28.26      unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
   28.27 -    unsigned long ring1_ss, ring1_esp;      /* Virtual TSS (only SS1/ESP1)  */
   28.28 +    unsigned long guestos_ss, guestos_esp;  /* Virtual TSS (only SS1/ESP1)  */
   28.29      unsigned long pt_base;                  /* CR3 (pagetable base)         */
   28.30      unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
   28.31      unsigned long event_callback_cs;        /* CS:EIP of event callback     */
   28.32 @@ -114,6 +119,8 @@ typedef struct full_execution_context_st
   28.33      unsigned long failsafe_callback_eip;
   28.34  } full_execution_context_t;
   28.35  
   28.36 +#define ARCH_HAS_FAST_TRAP
   28.37 +
   28.38  #endif
   28.39  
   28.40  #endif
    29.1 --- a/xen/include/hypervisor-ifs/arch-x86_64/hypervisor-if.h	Tue Mar 16 12:37:49 2004 +0000
    29.2 +++ b/xen/include/hypervisor-ifs/arch-x86_64/hypervisor-if.h	Wed Mar 17 13:35:01 2004 +0000
    29.3 @@ -35,13 +35,20 @@
    29.4  #define FLAT_RING3_CS64 0x082b	/* GDT index 261 */
    29.5  #define FLAT_RING3_DS 0x0833	/* GDT index 262 */
    29.6  
    29.7 +#define FLAT_GUESTOS_DS   FLAT_RING3_DS
    29.8 +#define FLAT_GUESTOS_CS   FLAT_RING3_CS64
    29.9 +#define FLAT_GUESTOS_CS32 FLAT_RING3_CS32
   29.10 +
   29.11 +#define FLAT_USER_DS      FLAT_RING3_DS
   29.12 +#define FLAT_USER_CS      FLAT_RING3_CS64
   29.13 +#define FLAT_USER_CS32    FLAT_RING3_CS32
   29.14  
   29.15  /* And the trap vector is... */
   29.16  #define TRAP_INSTR "syscall"
   29.17  
   29.18  
   29.19  #ifndef machine_to_phys_mapping
   29.20 -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
   29.21 +#define machine_to_phys_mapping ((unsigned long *)0xffff810000000000ULL)
   29.22  #endif
   29.23  
   29.24  #ifndef __ASSEMBLY__
   29.25 @@ -98,13 +105,12 @@ typedef struct full_execution_context_st
   29.26  {
   29.27  #define ECF_I387_VALID (1<<0)
   29.28      unsigned long flags;
   29.29 -    execution_context_t x86_64_ctxt;          /* User-level CPU registers     */
   29.30 -    char          i387_ctxt[512];           /* User-level FPU registers     */
   29.31 +    execution_context_t cpu_ctxt;           /* User-level CPU registers     */
   29.32 +    char          fpu_ctxt[512];            /* User-level FPU registers     */
   29.33      trap_info_t   trap_ctxt[256];           /* Virtual IDT                  */
   29.34 -    unsigned int  fast_trap_idx;            /* "Fast trap" vector offset    */
   29.35      unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
   29.36      unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
   29.37 -    unsigned long ring1_ss, ring1_esp;      /* Virtual TSS (only SS1/ESP1)  */
   29.38 +    unsigned long guestos_ss, guestos_esp;  /* Virtual TSS (only SS1/ESP1)  */
   29.39      unsigned long pt_base;                  /* CR3 (pagetable base)         */
   29.40      unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
   29.41      unsigned long event_callback_cs;        /* CS:EIP of event callback     */
    30.1 --- a/xen/include/hypervisor-ifs/dom0_ops.h	Tue Mar 16 12:37:49 2004 +0000
    30.2 +++ b/xen/include/hypervisor-ifs/dom0_ops.h	Wed Mar 17 13:35:01 2004 +0000
    30.3 @@ -116,10 +116,11 @@ typedef struct dom0_getdomaininfo_st
    30.4  typedef struct dom0_getpageframeinfo_st
    30.5  {
    30.6      /* IN variables. */
    30.7 -    unsigned long pfn;          /* Machine page frame number to query.       */
    30.8 +    unsigned long pfn;     /* Machine page frame number to query.       */
    30.9      domid_t domain;        /* To which domain does the frame belong?    */
   30.10      /* OUT variables. */
   30.11 -    enum { NONE, L1TAB, L2TAB } type; /* Is the page PINNED to a type?       */
   30.12 +    /* Is the page PINNED to a type? */
   30.13 +    enum { NONE, L1TAB, L2TAB, L3TAB, L4TAB } type;
   30.14  } dom0_getpageframeinfo_t;
   30.15  
   30.16  #define DOM0_IOPL             14
    31.1 --- a/xen/include/xeno/lib.h	Tue Mar 16 12:37:49 2004 +0000
    31.2 +++ b/xen/include/xeno/lib.h	Wed Mar 17 13:35:01 2004 +0000
    31.3 @@ -51,6 +51,6 @@ unsigned long simple_strtoul(const char 
    31.4  long long simple_strtoll(const char *cp,char **endp,unsigned int base);
    31.5  
    31.6  /* Produce a 32-bit hash from a key string 'k' of length 'len' bytes. */
    31.7 -unsigned long hash(unsigned char *k, unsigned long len);
    31.8 +u32 hash(unsigned char *k, unsigned long len);
    31.9  
   31.10  #endif /* __LIB_H__ */
    32.1 --- a/xen/include/xeno/types.h	Tue Mar 16 12:37:49 2004 +0000
    32.2 +++ b/xen/include/xeno/types.h	Wed Mar 17 13:35:01 2004 +0000
    32.3 @@ -14,8 +14,6 @@
    32.4  #define LONG_MIN        (-LONG_MAX - 1)
    32.5  #define ULONG_MAX       (~0UL)
    32.6  
    32.7 -typedef unsigned int size_t;
    32.8 -
    32.9  /* bsd */
   32.10  typedef unsigned char           u_char;
   32.11  typedef unsigned short          u_short;
    33.1 --- a/xen/net/dev.c	Tue Mar 16 12:37:49 2004 +0000
    33.2 +++ b/xen/net/dev.c	Wed Mar 17 13:35:01 2004 +0000
    33.3 @@ -627,7 +627,7 @@ static void net_rx_action(struct softirq
    33.4           * for ethernet header, plus any other alignment padding added by the
    33.5           * driver.
    33.6           */
    33.7 -        offset = (int)skb->data & ~PAGE_MASK; 
    33.8 +        offset = (int)(long)skb->data & ~PAGE_MASK; 
    33.9          skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << 
   33.10                                            PAGE_SHIFT));
   33.11          skb->data = skb->nh.raw = skb->head + offset;
    34.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/Makefile	Tue Mar 16 12:37:49 2004 +0000
    34.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/Makefile	Wed Mar 17 13:35:01 2004 +0000
    34.3 @@ -51,20 +51,21 @@ HEAD := arch/xeno/kernel/head.o arch/xen
    34.4  SUBDIRS += arch/xeno/kernel arch/xeno/mm arch/xeno/lib
    34.5  SUBDIRS += arch/xeno/drivers/console arch/xeno/drivers/network
    34.6  SUBDIRS += arch/xeno/drivers/evtchn arch/xeno/drivers/block
    34.7 -SUBDIRS += arch/xeno/drivers/balloon
    34.8 +SUBDIRS += arch/xeno/drivers/balloon arch/xeno/drivers/vnetif
    34.9  ifdef CONFIG_XENO_PRIV
   34.10  SUBDIRS += arch/xeno/drivers/dom0 
   34.11  endif
   34.12  
   34.13  CORE_FILES += arch/xeno/kernel/kernel.o arch/xeno/mm/mm.o
   34.14 -CORE_FILES += arch/xeno/drivers/evtchn/evtchn.o
   34.15 -CORE_FILES += arch/xeno/drivers/console/con.o
   34.16 -CORE_FILES += arch/xeno/drivers/block/blk.o
   34.17 -CORE_FILES += arch/xeno/drivers/network/net.o
   34.18 +CORE_FILES += arch/xeno/drivers/evtchn/drv.o
   34.19 +CORE_FILES += arch/xeno/drivers/console/drv.o
   34.20 +CORE_FILES += arch/xeno/drivers/block/drv.o
   34.21 +CORE_FILES += arch/xeno/drivers/network/drv.o
   34.22 +CORE_FILES += arch/xeno/drivers/vnetif/drv.o
   34.23  ifdef CONFIG_XENO_PRIV
   34.24 -CORE_FILES += arch/xeno/drivers/dom0/dom0.o
   34.25 +CORE_FILES += arch/xeno/drivers/dom0/drv.o
   34.26  endif
   34.27 -CORE_FILES += arch/xeno/drivers/balloon/balloon_driver.o
   34.28 +CORE_FILES += arch/xeno/drivers/balloon/drv.o
   34.29  LIBS := $(TOPDIR)/arch/xeno/lib/lib.a $(LIBS) $(TOPDIR)/arch/xeno/lib/lib.a
   34.30  
   34.31  arch/xeno/kernel: dummy
    35.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile	Tue Mar 16 12:37:49 2004 +0000
    35.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/balloon/Makefile	Wed Mar 17 13:35:01 2004 +0000
    35.3 @@ -1,3 +1,3 @@
    35.4 -O_TARGET := balloon_driver.o
    35.5 +O_TARGET := drv.o
    35.6  obj-y := balloon.o
    35.7  include $(TOPDIR)/Rules.make
    36.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile	Tue Mar 16 12:37:49 2004 +0000
    36.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/Makefile	Wed Mar 17 13:35:01 2004 +0000
    36.3 @@ -1,3 +1,3 @@
    36.4 -O_TARGET := blk.o
    36.5 -obj-y := xl_block.o xl_vbd.o
    36.6 +O_TARGET := drv.o
    36.7 +obj-y := block.o vbd.o
    36.8  include $(TOPDIR)/Rules.make
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.c	Wed Mar 17 13:35:01 2004 +0000
    37.3 @@ -0,0 +1,621 @@
    37.4 +/******************************************************************************
    37.5 + * block.c
    37.6 + * 
    37.7 + * Xenolinux virtual block-device driver.
    37.8 + * 
    37.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   37.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   37.11 + */
   37.12 +
   37.13 +#include "block.h"
   37.14 +#include <linux/blk.h>
   37.15 +#include <linux/cdrom.h>
   37.16 +#include <linux/tqueue.h>
   37.17 +#include <linux/sched.h>
   37.18 +#include <scsi/scsi.h>
   37.19 +
   37.20 +#include <linux/interrupt.h>
   37.21 +
   37.22 +typedef unsigned char byte; /* from linux/ide.h */
   37.23 +
   37.24 +#define XLBLK_RESPONSE_IRQ HYPEREVENT_IRQ(_EVENT_BLKDEV)
   37.25 +#define XLBLK_UPDATE_IRQ   HYPEREVENT_IRQ(_EVENT_VBD_UPD)
   37.26 +#define DEBUG_IRQ          HYPEREVENT_IRQ(_EVENT_DEBUG)
   37.27 +
   37.28 +#define STATE_ACTIVE    0
   37.29 +#define STATE_SUSPENDED 1
   37.30 +#define STATE_CLOSED    2
   37.31 +static unsigned int state = STATE_SUSPENDED;
   37.32 +
   37.33 +static blk_ring_t *blk_ring;
   37.34 +static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   37.35 +static BLK_RING_IDX req_prod;  /* Private request producer.         */
   37.36 +
   37.37 +/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   37.38 +#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   37.39 +                      (state != STATE_ACTIVE))
   37.40 +
   37.41 +
   37.42 +/*
   37.43 + * Request queues with outstanding work, but ring is currently full.
   37.44 + * We need no special lock here, as we always access this with the
   37.45 + * io_request_lock held. We only need a small maximum list.
   37.46 + */
   37.47 +#define MAX_PENDING 8
   37.48 +static request_queue_t *pending_queues[MAX_PENDING];
   37.49 +static int nr_pending;
   37.50 +
   37.51 +static kdev_t        sg_dev;
   37.52 +static int           sg_operation = -1;
   37.53 +static unsigned long sg_next_sect;
   37.54 +#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   37.55 +
   37.56 +static inline void signal_requests_to_xen(void)
   37.57 +{
   37.58 +    block_io_op_t op; 
   37.59 +
   37.60 +    DISABLE_SCATTERGATHER();
   37.61 +    blk_ring->req_prod = req_prod;
   37.62 +
   37.63 +    op.cmd = BLOCK_IO_OP_SIGNAL; 
   37.64 +    HYPERVISOR_block_io_op(&op);
   37.65 +    return;
   37.66 +}
   37.67 +
   37.68 +
   37.69 +/*
   37.70 + * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   37.71 + * 
   37.72 + * Schedule a task for keventd to run, which will update the VBDs and perform 
   37.73 + * the corresponding updates to our view of VBD state, so the XenoLinux will 
   37.74 + * respond to changes / additions / deletions to the set of VBDs automatically.
   37.75 + */
   37.76 +static struct tq_struct update_tq;
   37.77 +static void update_vbds_task(void *unused)
   37.78 +{ 
   37.79 +    xlvbd_update_vbds();
   37.80 +}
   37.81 +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   37.82 +{
   37.83 +    update_tq.routine = update_vbds_task;
   37.84 +    schedule_task(&update_tq);
   37.85 +}
   37.86 +
   37.87 +
   37.88 +int xenolinux_block_open(struct inode *inode, struct file *filep)
   37.89 +{
   37.90 +    short xldev = inode->i_rdev; 
   37.91 +    struct gendisk *gd = get_gendisk(xldev);
   37.92 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   37.93 +    short minor = MINOR(xldev); 
   37.94 +
   37.95 +    if ( gd->part[minor].nr_sects == 0 )
   37.96 +    { 
   37.97 +        /*
   37.98 +         * Device either doesn't exist, or has zero capacity; we use a few
   37.99 +         * cheesy heuristics to return the relevant error code
  37.100 +         */
  37.101 +        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  37.102 +             ((minor & (gd->max_p - 1)) != 0) )
  37.103 +        { 
  37.104 +            /*
  37.105 +             * We have a real device, but no such partition, or we just have a
  37.106 +             * partition number so guess this is the problem.
  37.107 +             */
  37.108 +            return -ENXIO;     /* no such device or address */
  37.109 +        }
  37.110 +        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  37.111 +        {
  37.112 +            /* This is a removable device => assume that media is missing. */ 
  37.113 +            return -ENOMEDIUM; /* media not present (this is a guess) */
  37.114 +        } 
  37.115 +        else
  37.116 +        { 
  37.117 +            /* Just go for the general 'no such device' error. */
  37.118 +            return -ENODEV;    /* no such device */
  37.119 +        }
  37.120 +    }
  37.121 +    
  37.122 +    /* Update of usage count is protected by per-device semaphore. */
  37.123 +    disk->usage++;
  37.124 +
  37.125 +    return 0;
  37.126 +}
  37.127 +
  37.128 +
  37.129 +int xenolinux_block_release(struct inode *inode, struct file *filep)
  37.130 +{
  37.131 +    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  37.132 +
  37.133 +    /*
  37.134 +     * When usage drops to zero it may allow more VBD updates to occur.
  37.135 +     * Update of usage count is protected by a per-device semaphore.
  37.136 +     */
  37.137 +    if ( --disk->usage == 0 )
  37.138 +    {
  37.139 +        update_tq.routine = update_vbds_task;
  37.140 +        schedule_task(&update_tq);
  37.141 +    }
  37.142 +
  37.143 +    return 0;
  37.144 +}
  37.145 +
  37.146 +
  37.147 +int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
  37.148 +                          unsigned command, unsigned long argument)
  37.149 +{
  37.150 +    kdev_t dev = inode->i_rdev;
  37.151 +    struct hd_geometry *geo = (struct hd_geometry *)argument;
  37.152 +    struct gendisk *gd;     
  37.153 +    struct hd_struct *part; 
  37.154 +    int i;
  37.155 +
  37.156 +    /* NB. No need to check permissions. That is done for us. */
  37.157 +    
  37.158 +    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  37.159 +                  command, (long) argument, dev); 
  37.160 +  
  37.161 +    gd = get_gendisk(dev);
  37.162 +    part = &gd->part[MINOR(dev)]; 
  37.163 +
  37.164 +    switch ( command )
  37.165 +    {
  37.166 +    case BLKGETSIZE:
  37.167 +        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  37.168 +        return put_user(part->nr_sects, (unsigned long *) argument);
  37.169 +
  37.170 +    case BLKGETSIZE64:
  37.171 +        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  37.172 +                      (u64)part->nr_sects * 512);
  37.173 +        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  37.174 +
  37.175 +    case BLKRRPART:                               /* re-read partition table */
  37.176 +        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  37.177 +        return xenolinux_block_revalidate(dev);
  37.178 +
  37.179 +    case BLKSSZGET:
  37.180 +        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  37.181 +
  37.182 +    case BLKBSZGET:                                        /* get block size */
  37.183 +        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  37.184 +        break;
  37.185 +
  37.186 +    case BLKBSZSET:                                        /* set block size */
  37.187 +        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  37.188 +        break;
  37.189 +
  37.190 +    case BLKRASET:                                         /* set read-ahead */
  37.191 +        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  37.192 +        break;
  37.193 +
  37.194 +    case BLKRAGET:                                         /* get read-ahead */
  37.195 +        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  37.196 +        break;
  37.197 +
  37.198 +    case HDIO_GETGEO:
  37.199 +        /* note: these values are complete garbage */
  37.200 +        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  37.201 +        if (!argument) return -EINVAL;
  37.202 +        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  37.203 +        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  37.204 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  37.205 +        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  37.206 +        return 0;
  37.207 +
  37.208 +    case HDIO_GETGEO_BIG: 
  37.209 +        /* note: these values are complete garbage */
  37.210 +        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  37.211 +        if (!argument) return -EINVAL;
  37.212 +        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  37.213 +        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  37.214 +        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  37.215 +        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  37.216 +        return 0;
  37.217 +
  37.218 +    case CDROMMULTISESSION:
  37.219 +        DPRINTK("FIXME: support multisession CDs later\n");
  37.220 +        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  37.221 +            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  37.222 +        return 0;
  37.223 +
  37.224 +    case SCSI_IOCTL_GET_BUS_NUMBER:
  37.225 +        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in Xen blkdev");
  37.226 +        return -ENOSYS;
  37.227 +
  37.228 +    default:
  37.229 +        printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", command);
  37.230 +        return -ENOSYS;
  37.231 +    }
  37.232 +    
  37.233 +    return 0;
  37.234 +}
  37.235 +
  37.236 +/* check media change: should probably do something here in some cases :-) */
  37.237 +int xenolinux_block_check(kdev_t dev)
  37.238 +{
  37.239 +    DPRINTK("xenolinux_block_check\n");
  37.240 +    return 0;
  37.241 +}
  37.242 +
  37.243 +int xenolinux_block_revalidate(kdev_t dev)
  37.244 +{
  37.245 +    struct block_device *bd;
  37.246 +    struct gendisk *gd;
  37.247 +    xl_disk_t *disk;
  37.248 +    unsigned long capacity;
  37.249 +    int i, rc = 0;
  37.250 +    
  37.251 +    if ( (bd = bdget(dev)) == NULL )
  37.252 +        return -EINVAL;
  37.253 +
  37.254 +    /*
  37.255 +     * Update of partition info, and check of usage count, is protected
  37.256 +     * by the per-block-device semaphore.
  37.257 +     */
  37.258 +    down(&bd->bd_sem);
  37.259 +
  37.260 +    if ( ((gd = get_gendisk(dev)) == NULL) ||
  37.261 +         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  37.262 +         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  37.263 +    {
  37.264 +        rc = -EINVAL;
  37.265 +        goto out;
  37.266 +    }
  37.267 +
  37.268 +    if ( disk->usage > 1 )
  37.269 +    {
  37.270 +        rc = -EBUSY;
  37.271 +        goto out;
  37.272 +    }
  37.273 +
  37.274 +    /* Only reread partition table if VBDs aren't mapped to partitions. */
  37.275 +    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  37.276 +    {
  37.277 +        for ( i = gd->max_p - 1; i >= 0; i-- )
  37.278 +        {
  37.279 +            invalidate_device(dev+i, 1);
  37.280 +            gd->part[MINOR(dev+i)].start_sect = 0;
  37.281 +            gd->part[MINOR(dev+i)].nr_sects   = 0;
  37.282 +            gd->sizes[MINOR(dev+i)]           = 0;
  37.283 +        }
  37.284 +
  37.285 +        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  37.286 +    }
  37.287 +
  37.288 + out:
  37.289 +    up(&bd->bd_sem);
  37.290 +    bdput(bd);
  37.291 +    return rc;
  37.292 +}
  37.293 +
  37.294 +
  37.295 +/*
  37.296 + * hypervisor_request
  37.297 + *
  37.298 + * request block io 
  37.299 + * 
  37.300 + * id: for guest use only.
  37.301 + * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  37.302 + * buffer: buffer to read/write into. this should be a
  37.303 + *   virtual address in the guest os.
  37.304 + */
  37.305 +static int hypervisor_request(unsigned long   id,
  37.306 +                              int             operation,
  37.307 +                              char *          buffer,
  37.308 +                              unsigned long   sector_number,
  37.309 +                              unsigned short  nr_sectors,
  37.310 +                              kdev_t          device)
  37.311 +{
  37.312 +    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  37.313 +    struct gendisk *gd;
  37.314 +    blk_ring_req_entry_t *req;
  37.315 +    struct buffer_head *bh;
  37.316 +
  37.317 +    if ( unlikely(nr_sectors >= (1<<9)) )
  37.318 +        BUG();
  37.319 +    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  37.320 +        BUG();
  37.321 +
  37.322 +    if ( unlikely(state == STATE_CLOSED) )
  37.323 +        return 1;
  37.324 +
  37.325 +    switch ( operation )
  37.326 +    {
  37.327 +
  37.328 +    case XEN_BLOCK_READ:
  37.329 +    case XEN_BLOCK_WRITE:
  37.330 +        gd = get_gendisk(device); 
  37.331 +
  37.332 +        /*
  37.333 +         * Update the sector_number we'll pass down as appropriate; note that
  37.334 +         * we could sanity check that resulting sector will be in this
  37.335 +         * partition, but this will happen in xen anyhow.
  37.336 +         */
  37.337 +        sector_number += gd->part[MINOR(device)].start_sect;
  37.338 +
  37.339 +        /*
  37.340 +         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  37.341 +         * partitions then we clear the partn bits from the device number.
  37.342 +         */
  37.343 +        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  37.344 +               GENHD_FL_VIRT_PARTNS) )
  37.345 +            device &= ~(gd->max_p - 1);
  37.346 +
  37.347 +        if ( (sg_operation == operation) &&
  37.348 +             (sg_dev == device) &&
  37.349 +             (sg_next_sect == sector_number) )
  37.350 +        {
  37.351 +            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  37.352 +            bh = (struct buffer_head *)id;
  37.353 +            bh->b_reqnext = (struct buffer_head *)req->id;
  37.354 +            req->id = id;
  37.355 +            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  37.356 +            if ( ++req->nr_segments < MAX_BLK_SEGS )
  37.357 +                sg_next_sect += nr_sectors;
  37.358 +            else
  37.359 +                DISABLE_SCATTERGATHER();
  37.360 +            return 0;
  37.361 +        }
  37.362 +        else if ( RING_PLUGGED )
  37.363 +        {
  37.364 +            return 1;
  37.365 +        }
  37.366 +        else
  37.367 +        {
  37.368 +            sg_operation = operation;
  37.369 +            sg_dev       = device;
  37.370 +            sg_next_sect = sector_number + nr_sectors;
  37.371 +        }
  37.372 +        break;
  37.373 +
  37.374 +    default:
  37.375 +        panic("unknown op %d\n", operation);
  37.376 +    }
  37.377 +
  37.378 +    /* Fill out a communications ring structure. */
  37.379 +    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  37.380 +    req->id            = id;
  37.381 +    req->operation     = operation;
  37.382 +    req->sector_number = (xen_sector_t)sector_number;
  37.383 +    req->device        = device; 
  37.384 +    req->nr_segments   = 1;
  37.385 +    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  37.386 +    req_prod++;
  37.387 +
  37.388 +    return 0;
  37.389 +}
  37.390 +
  37.391 +
  37.392 +/*
  37.393 + * do_xlblk_request
  37.394 + *  read a block; request is in a request queue
  37.395 + */
  37.396 +void do_xlblk_request(request_queue_t *rq)
  37.397 +{
  37.398 +    struct request *req;
  37.399 +    struct buffer_head *bh, *next_bh;
  37.400 +    int rw, nsect, full, queued = 0;
  37.401 +
  37.402 +    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  37.403 +
  37.404 +    while ( !rq->plugged && !list_empty(&rq->queue_head))
  37.405 +    {
  37.406 +        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  37.407 +            goto out;
  37.408 +  
  37.409 +        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  37.410 +                req, req->cmd, req->sector,
  37.411 +                req->current_nr_sectors, req->nr_sectors, req->bh);
  37.412 +
  37.413 +        rw = req->cmd;
  37.414 +        if ( rw == READA )
  37.415 +            rw = READ;
  37.416 +        if ( unlikely((rw != READ) && (rw != WRITE)) )
  37.417 +            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  37.418 +
  37.419 +        req->errors = 0;
  37.420 +
  37.421 +        bh = req->bh;
  37.422 +        while ( bh != NULL )
  37.423 +        {
  37.424 +            next_bh = bh->b_reqnext;
  37.425 +            bh->b_reqnext = NULL;
  37.426 +
  37.427 +            full = hypervisor_request(
  37.428 +                (unsigned long)bh,
  37.429 +                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  37.430 +                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  37.431 +
  37.432 +            if ( full )
  37.433 +            { 
  37.434 +                bh->b_reqnext = next_bh;
  37.435 +                pending_queues[nr_pending++] = rq;
  37.436 +                if ( unlikely(nr_pending >= MAX_PENDING) )
  37.437 +                    BUG();
  37.438 +                goto out; 
  37.439 +            }
  37.440 +
  37.441 +            queued++;
  37.442 +
  37.443 +            /* Dequeue the buffer head from the request. */
  37.444 +            nsect = bh->b_size >> 9;
  37.445 +            bh = req->bh = next_bh;
  37.446 +            
  37.447 +            if ( bh != NULL )
  37.448 +            {
  37.449 +                /* There's another buffer head to do. Update the request. */
  37.450 +                req->hard_sector += nsect;
  37.451 +                req->hard_nr_sectors -= nsect;
  37.452 +                req->sector = req->hard_sector;
  37.453 +                req->nr_sectors = req->hard_nr_sectors;
  37.454 +                req->current_nr_sectors = bh->b_size >> 9;
  37.455 +                req->buffer = bh->b_data;
  37.456 +            }
  37.457 +            else
  37.458 +            {
  37.459 +                /* That was the last buffer head. Finalise the request. */
  37.460 +                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  37.461 +                    BUG();
  37.462 +                blkdev_dequeue_request(req);
  37.463 +                end_that_request_last(req);
  37.464 +            }
  37.465 +        }
  37.466 +    }
  37.467 +
  37.468 + out:
  37.469 +    if ( queued != 0 ) signal_requests_to_xen();
  37.470 +}
  37.471 +
  37.472 +
  37.473 +static void kick_pending_request_queues(void)
  37.474 +{
  37.475 +    /* We kick pending request queues if the ring is reasonably empty. */
  37.476 +    if ( (nr_pending != 0) && 
  37.477 +         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  37.478 +    {
  37.479 +        /* Attempt to drain the queue, but bail if the ring becomes full. */
  37.480 +        while ( (nr_pending != 0) && !RING_PLUGGED )
  37.481 +            do_xlblk_request(pending_queues[--nr_pending]);
  37.482 +    }
  37.483 +}
  37.484 +
  37.485 +
  37.486 +static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  37.487 +{
  37.488 +    BLK_RING_IDX i; 
  37.489 +    unsigned long flags; 
  37.490 +    struct buffer_head *bh, *next_bh;
  37.491 +    
  37.492 +    if ( unlikely(state == STATE_CLOSED) )
  37.493 +        return;
  37.494 +    
  37.495 +    spin_lock_irqsave(&io_request_lock, flags);     
  37.496 +
  37.497 +    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  37.498 +    {
  37.499 +        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  37.500 +        switch ( bret->operation )
  37.501 +        {
  37.502 +        case XEN_BLOCK_READ:
  37.503 +        case XEN_BLOCK_WRITE:
  37.504 +            if ( unlikely(bret->status != 0) )
  37.505 +                DPRINTK("Bad return from blkdev data request: %lx\n",
  37.506 +                        bret->status);
  37.507 +            for ( bh = (struct buffer_head *)bret->id; 
  37.508 +                  bh != NULL; 
  37.509 +                  bh = next_bh )
  37.510 +            {
  37.511 +                next_bh = bh->b_reqnext;
  37.512 +                bh->b_reqnext = NULL;
  37.513 +                bh->b_end_io(bh, !bret->status);
  37.514 +            }
  37.515 +            break;
  37.516 +     
  37.517 +        default:
  37.518 +            BUG();
  37.519 +        }
  37.520 +    }
  37.521 +    
  37.522 +    resp_cons = i;
  37.523 +
  37.524 +    kick_pending_request_queues();
  37.525 +
  37.526 +    spin_unlock_irqrestore(&io_request_lock, flags);
  37.527 +}
  37.528 +
  37.529 +
  37.530 +static void reset_xlblk_interface(void)
  37.531 +{
  37.532 +    block_io_op_t op; 
  37.533 +
  37.534 +    nr_pending = 0;
  37.535 +
  37.536 +    op.cmd = BLOCK_IO_OP_RESET;
  37.537 +    if ( HYPERVISOR_block_io_op(&op) != 0 )
  37.538 +        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  37.539 +
  37.540 +    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  37.541 +    (void)HYPERVISOR_block_io_op(&op);
  37.542 +
  37.543 +    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  37.544 +    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  37.545 +    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  37.546 +
  37.547 +    wmb();
  37.548 +    state = STATE_ACTIVE;
  37.549 +}
  37.550 +
  37.551 +
  37.552 +int __init xlblk_init(void)
  37.553 +{
  37.554 +    int error; 
  37.555 +
  37.556 +    reset_xlblk_interface();
  37.557 +
  37.558 +    error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 
  37.559 +                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  37.560 +    if ( error )
  37.561 +    {
  37.562 +        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  37.563 +        goto fail;
  37.564 +    }
  37.565 +
  37.566 +    error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int,
  37.567 +                        SA_INTERRUPT, "blkdev", NULL);
  37.568 +
  37.569 +    if ( error )
  37.570 +    {
  37.571 +        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  37.572 +        goto fail;
  37.573 +    }
  37.574 +
  37.575 +    (void)xlvbd_init();
  37.576 +
  37.577 +    return 0;
  37.578 +
  37.579 + fail:
  37.580 +    return error;
  37.581 +}
  37.582 +
  37.583 +
  37.584 +static void __exit xlblk_cleanup(void)
  37.585 +{
  37.586 +    xlvbd_cleanup();
  37.587 +    free_irq(XLBLK_RESPONSE_IRQ, NULL);
  37.588 +    free_irq(XLBLK_UPDATE_IRQ, NULL);
  37.589 +}
  37.590 +
  37.591 +
  37.592 +#ifdef MODULE
  37.593 +module_init(xlblk_init);
  37.594 +module_exit(xlblk_cleanup);
  37.595 +#endif
  37.596 +
  37.597 +
  37.598 +void blkdev_suspend(void)
  37.599 +{
  37.600 +    state = STATE_SUSPENDED;
  37.601 +    wmb();
  37.602 +
  37.603 +    while ( resp_cons != blk_ring->req_prod )
  37.604 +    {
  37.605 +        barrier();
  37.606 +        current->state = TASK_INTERRUPTIBLE;
  37.607 +        schedule_timeout(1);
  37.608 +    }
  37.609 +
  37.610 +    wmb();
  37.611 +    state = STATE_CLOSED;
  37.612 +    wmb();
  37.613 +
  37.614 +    clear_fixmap(FIX_BLKRING_BASE);
  37.615 +}
  37.616 +
  37.617 +
  37.618 +void blkdev_resume(void)
  37.619 +{
  37.620 +    reset_xlblk_interface();
  37.621 +    spin_lock_irq(&io_request_lock);
  37.622 +    kick_pending_request_queues();
  37.623 +    spin_unlock_irq(&io_request_lock);
  37.624 +}
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/block.h	Wed Mar 17 13:35:01 2004 +0000
    38.3 @@ -0,0 +1,82 @@
    38.4 +/******************************************************************************
    38.5 + * block.h
    38.6 + * 
    38.7 + * Shared definitions between all levels of XenoLinux Virtual block devices.
    38.8 + */
    38.9 +
   38.10 +#ifndef __XENO_DRIVERS_BLOCK_H__
   38.11 +#define __XENO_DRIVERS_BLOCK_H__
   38.12 +
   38.13 +#include <linux/config.h>
   38.14 +#include <linux/module.h>
   38.15 +
   38.16 +#include <linux/kernel.h>
   38.17 +#include <linux/sched.h>
   38.18 +#include <linux/slab.h>
   38.19 +#include <linux/string.h>
   38.20 +#include <linux/errno.h>
   38.21 +
   38.22 +#include <linux/fs.h>
   38.23 +#include <linux/hdreg.h>
   38.24 +#include <linux/blkdev.h>
   38.25 +#include <linux/major.h>
   38.26 +
   38.27 +#include <asm/hypervisor-ifs/hypervisor-if.h>
   38.28 +#include <asm/hypervisor-ifs/vbd.h>
   38.29 +#include <asm/io.h>
   38.30 +#include <asm/atomic.h>
   38.31 +#include <asm/uaccess.h>
   38.32 +
   38.33 +#if 0
   38.34 +#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   38.35 +#else
   38.36 +#define DPRINTK(_f, _a...) ((void)0)
   38.37 +#endif
   38.38 +
   38.39 +#if 0
   38.40 +#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   38.41 +#else
   38.42 +#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   38.43 +#endif
   38.44 +
   38.45 +/* Private gendisk->flags[] values. */
   38.46 +#define GENHD_FL_XENO        2 /* Is unit a Xen block device?  */
   38.47 +#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   38.48 +
   38.49 +/*
   38.50 + * We have one of these per vbd, whether ide, scsi or 'other'.
   38.51 + * They hang in an array off the gendisk structure. We may end up putting
   38.52 + * all kinds of interesting stuff here :-)
   38.53 + */
   38.54 +typedef struct xl_disk {
   38.55 +    int usage;
   38.56 +} xl_disk_t;
   38.57 +
   38.58 +extern int xenolinux_control_msg(int operration, char *buffer, int size);
   38.59 +extern int xenolinux_block_open(struct inode *inode, struct file *filep);
   38.60 +extern int xenolinux_block_release(struct inode *inode, struct file *filep);
   38.61 +extern int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
   38.62 +                                 unsigned command, unsigned long argument);
   38.63 +extern int xenolinux_block_check(kdev_t dev);
   38.64 +extern int xenolinux_block_revalidate(kdev_t dev);
   38.65 +extern void do_xlblk_request (request_queue_t *rq); 
   38.66 +
   38.67 +extern void xlvbd_update_vbds(void);
   38.68 +
   38.69 +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   38.70 +{
   38.71 +    struct gendisk *gd = get_gendisk(xldev);
   38.72 +    
   38.73 +    if ( gd == NULL ) 
   38.74 +        return NULL;
   38.75 +    
   38.76 +    return (xl_disk_t *)gd->real_devices + 
   38.77 +        (MINOR(xldev) >> gd->minor_shift);
   38.78 +}
   38.79 +
   38.80 +
   38.81 +/* Virtual block-device subsystem. */
   38.82 +extern int  xlvbd_init(void);
   38.83 +extern void xlvbd_cleanup(void); 
   38.84 +
   38.85 +#endif /* __XENO_DRIVERS_BLOCK_H__ */
    39.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    39.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/vbd.c	Wed Mar 17 13:35:01 2004 +0000
    39.3 @@ -0,0 +1,561 @@
    39.4 +/******************************************************************************
    39.5 + * vbd.c
    39.6 + * 
    39.7 + * Xenolinux virtual block-device driver (xvd).
    39.8 + * 
    39.9 + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   39.10 + * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   39.11 + */
   39.12 +
   39.13 +#include "block.h"
   39.14 +#include <linux/blk.h>
   39.15 +
   39.16 +/*
   39.17 + * For convenience we distinguish between ide, scsi and 'other' (i.e.
   39.18 + * potentially combinations of the two) in the naming scheme and in a few 
   39.19 + * other places (like default readahead, etc).
   39.20 + */
   39.21 +#define XLIDE_MAJOR_NAME  "hd"
   39.22 +#define XLSCSI_MAJOR_NAME "sd"
   39.23 +#define XLVBD_MAJOR_NAME "xvd"
   39.24 +
   39.25 +#define XLIDE_DEVS_PER_MAJOR   2
   39.26 +#define XLSCSI_DEVS_PER_MAJOR 16
   39.27 +#define XLVBD_DEVS_PER_MAJOR  16
   39.28 +
   39.29 +#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   39.30 +#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   39.31 +
   39.32 +#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   39.33 +#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   39.34 +
   39.35 +#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   39.36 +#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   39.37 +
   39.38 +/* The below are for the generic drivers/block/ll_rw_block.c code. */
   39.39 +static int xlide_blksize_size[256];
   39.40 +static int xlide_hardsect_size[256];
   39.41 +static int xlide_max_sectors[256];
   39.42 +static int xlscsi_blksize_size[256];
   39.43 +static int xlscsi_hardsect_size[256];
   39.44 +static int xlscsi_max_sectors[256];
   39.45 +static int xlvbd_blksize_size[256];
   39.46 +static int xlvbd_hardsect_size[256];
   39.47 +static int xlvbd_max_sectors[256];
   39.48 +
   39.49 +/* Information from Xen about our VBDs. */
   39.50 +#define MAX_VBDS 64
   39.51 +static int nr_vbds;
   39.52 +static xen_disk_t *vbd_info;
   39.53 +
   39.54 +static struct block_device_operations xlvbd_block_fops = 
   39.55 +{
   39.56 +    open:               xenolinux_block_open,
   39.57 +    release:            xenolinux_block_release,
   39.58 +    ioctl:              xenolinux_block_ioctl,
   39.59 +    check_media_change: xenolinux_block_check,
   39.60 +    revalidate:         xenolinux_block_revalidate,
   39.61 +};
   39.62 +
   39.63 +static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   39.64 +{
   39.65 +    int error;
   39.66 +    block_io_op_t op; 
   39.67 +
   39.68 +    /* Probe for disk information. */
   39.69 +    memset(&op, 0, sizeof(op)); 
   39.70 +    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   39.71 +    op.u.probe_params.domain    = 0; 
   39.72 +    op.u.probe_params.xdi.max   = MAX_VBDS;
   39.73 +    op.u.probe_params.xdi.disks = disk_info;
   39.74 +    op.u.probe_params.xdi.count = 0;
   39.75 +
   39.76 +    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   39.77 +    {
   39.78 +        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   39.79 +        return -1;
   39.80 +    }
   39.81 +
   39.82 +    return op.u.probe_params.xdi.count;
   39.83 +}
   39.84 +
   39.85 +/*
   39.86 + * xlvbd_init_device - initialise a VBD device
   39.87 + * @disk:              a xen_disk_t describing the VBD
   39.88 + *
   39.89 + * Takes a xen_disk_t * that describes a VBD the domain has access to.
   39.90 + * Performs appropriate initialisation and registration of the device.
   39.91 + *
   39.92 + * Care needs to be taken when making re-entrant calls to ensure that
   39.93 + * corruption does not occur.  Also, devices that are in use should not have
   39.94 + * their details updated.  This is the caller's responsibility.
   39.95 + */
   39.96 +static int xlvbd_init_device(xen_disk_t *xd)
   39.97 +{
   39.98 +    int device = xd->device;
   39.99 +    int major  = MAJOR(device); 
  39.100 +    int minor  = MINOR(device);
  39.101 +    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  39.102 +    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  39.103 +    char *major_name;
  39.104 +    struct gendisk *gd;
  39.105 +    struct block_device *bd;
  39.106 +    xl_disk_t *disk;
  39.107 +    int i, rc = 0, max_part, partno;
  39.108 +    unsigned long capacity;
  39.109 +
  39.110 +    unsigned char buf[64];
  39.111 +
  39.112 +    if ( (bd = bdget(device)) == NULL )
  39.113 +        return -1;
  39.114 +
  39.115 +    /*
  39.116 +     * Update of partition info, and check of usage count, is protected
  39.117 +     * by the per-block-device semaphore.
  39.118 +     */
  39.119 +    down(&bd->bd_sem);
  39.120 +
  39.121 +    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  39.122 +    {
  39.123 +        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  39.124 +        rc = -1;
  39.125 +        goto out;
  39.126 +    }
  39.127 +
  39.128 +    if ( is_ide ) {
  39.129 +
  39.130 +	major_name = XLIDE_MAJOR_NAME; 
  39.131 +	max_part   = XLIDE_MAX_PART;
  39.132 +
  39.133 +    } else if ( is_scsi ) {
  39.134 +
  39.135 +	major_name = XLSCSI_MAJOR_NAME;
  39.136 +	max_part   = XLSCSI_MAX_PART;
  39.137 +
  39.138 +    } else if (XD_VIRTUAL(xd->info)) {
  39.139 +
  39.140 +	major_name = XLVBD_MAJOR_NAME;
  39.141 +	max_part   = XLVBD_MAX_PART;
  39.142 +
  39.143 +    } else { 
  39.144 +
  39.145 +        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  39.146 +	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  39.147 +	       major, minor);
  39.148 +	is_scsi    = 1; 
  39.149 +	major_name = "cciss"; 
  39.150 +	max_part   = XLSCSI_MAX_PART;
  39.151 +
  39.152 +    }
  39.153 +    
  39.154 +    partno = minor & (max_part - 1); 
  39.155 +    
  39.156 +    if ( (gd = get_gendisk(device)) == NULL )
  39.157 +    {
  39.158 +        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  39.159 +        if ( rc < 0 )
  39.160 +        {
  39.161 +            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  39.162 +            goto out;
  39.163 +        }
  39.164 +
  39.165 +        if ( is_ide )
  39.166 +        { 
  39.167 +            blksize_size[major]  = xlide_blksize_size;
  39.168 +            hardsect_size[major] = xlide_hardsect_size;
  39.169 +            max_sectors[major]   = xlide_max_sectors;
  39.170 +            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  39.171 +        } 
  39.172 +        else if ( is_scsi )
  39.173 +        { 
  39.174 +            blksize_size[major]  = xlscsi_blksize_size;
  39.175 +            hardsect_size[major] = xlscsi_hardsect_size;
  39.176 +            max_sectors[major]   = xlscsi_max_sectors;
  39.177 +            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  39.178 +        }
  39.179 +        else
  39.180 +        { 
  39.181 +            blksize_size[major]  = xlvbd_blksize_size;
  39.182 +            hardsect_size[major] = xlvbd_hardsect_size;
  39.183 +            max_sectors[major]   = xlvbd_max_sectors;
  39.184 +            read_ahead[major]    = 8;
  39.185 +        }
  39.186 +
  39.187 +        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  39.188 +
  39.189 +        /*
  39.190 +         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  39.191 +         * soon as we pass them down to Xen.
  39.192 +         */
  39.193 +        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  39.194 +
  39.195 +        /* Construct an appropriate gendisk structure. */
  39.196 +        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  39.197 +        gd->major      = major;
  39.198 +        gd->major_name = major_name; 
  39.199 +    
  39.200 +        gd->max_p      = max_part; 
  39.201 +        if ( is_ide )
  39.202 +        { 
  39.203 +            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  39.204 +            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  39.205 +        } 
  39.206 +        else if ( is_scsi )
  39.207 +        { 
  39.208 +            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  39.209 +            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  39.210 +        }
  39.211 +        else
  39.212 +        { 
  39.213 +            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  39.214 +            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  39.215 +        }
  39.216 +
  39.217 +        /* 
  39.218 +        ** The sizes[] and part[] arrays hold the sizes and other 
  39.219 +        ** information about every partition with this 'major' (i.e. 
  39.220 +        ** every disk sharing the 8 bit prefix * max partns per disk) 
  39.221 +        */
  39.222 +        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  39.223 +        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  39.224 +                            GFP_KERNEL);
  39.225 +        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  39.226 +        memset(gd->part,  0, max_part * gd->nr_real 
  39.227 +               * sizeof(struct hd_struct));
  39.228 +
  39.229 +
  39.230 +        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  39.231 +                                   GFP_KERNEL);
  39.232 +        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  39.233 +
  39.234 +        gd->next   = NULL;            
  39.235 +        gd->fops   = &xlvbd_block_fops;
  39.236 +
  39.237 +        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  39.238 +                             GFP_KERNEL);
  39.239 +        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  39.240 +    
  39.241 +        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  39.242 +        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  39.243 +
  39.244 +        add_gendisk(gd);
  39.245 +
  39.246 +        blk_size[major] = gd->sizes;
  39.247 +    }
  39.248 +
  39.249 +    if ( XD_READONLY(xd->info) )
  39.250 +        set_device_ro(device, 1); 
  39.251 +
  39.252 +    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO;
  39.253 +
  39.254 +    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  39.255 +    capacity = (unsigned long)xd->capacity;
  39.256 +
  39.257 +    if ( partno != 0 )
  39.258 +    {
  39.259 +        /*
  39.260 +         * If this was previously set up as a real disc we will have set 
  39.261 +         * up partition-table information. Virtual partitions override 
  39.262 +         * 'real' partitions, and the two cannot coexist on a device.
  39.263 +         */
  39.264 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  39.265 +             (gd->sizes[minor & ~(max_part-1)] != 0) )
  39.266 +        {
  39.267 +            /*
  39.268 +             * Any non-zero sub-partition entries must be cleaned out before
  39.269 +             * installing 'virtual' partition entries. The two types cannot
  39.270 +             * coexist, and virtual partitions are favoured.
  39.271 +             */
  39.272 +            kdev_t dev = device & ~(max_part-1);
  39.273 +            for ( i = max_part - 1; i > 0; i-- )
  39.274 +            {
  39.275 +                invalidate_device(dev+i, 1);
  39.276 +                gd->part[MINOR(dev+i)].start_sect = 0;
  39.277 +                gd->part[MINOR(dev+i)].nr_sects   = 0;
  39.278 +                gd->sizes[MINOR(dev+i)]           = 0;
  39.279 +            }
  39.280 +            printk(KERN_ALERT
  39.281 +                   "Virtual partitions found for /dev/%s - ignoring any "
  39.282 +                   "real partition information we may have found.\n",
  39.283 +                   disk_name(gd, MINOR(device), buf));
  39.284 +        }
  39.285 +
  39.286 +        /* Need to skankily setup 'partition' information */
  39.287 +        gd->part[minor].start_sect = 0; 
  39.288 +        gd->part[minor].nr_sects   = capacity; 
  39.289 +        gd->sizes[minor]           = capacity; 
  39.290 +
  39.291 +        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  39.292 +    }
  39.293 +    else
  39.294 +    {
  39.295 +        gd->part[minor].nr_sects = capacity;
  39.296 +        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  39.297 +        
  39.298 +        /* Some final fix-ups depending on the device type */
  39.299 +        switch ( XD_TYPE(xd->info) )
  39.300 +        { 
  39.301 +        case XD_TYPE_CDROM:
  39.302 +        case XD_TYPE_FLOPPY: 
  39.303 +        case XD_TYPE_TAPE:
  39.304 +            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  39.305 +            printk(KERN_ALERT 
  39.306 +                   "Skipping partition check on %s /dev/%s\n", 
  39.307 +                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  39.308 +                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  39.309 +                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  39.310 +            break; 
  39.311 +
  39.312 +        case XD_TYPE_DISK:
  39.313 +            /* Only check partitions on real discs (not virtual!). */
  39.314 +            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  39.315 +            {
  39.316 +                printk(KERN_ALERT
  39.317 +                       "Skipping partition check on virtual /dev/%s\n",
  39.318 +                       disk_name(gd, MINOR(device), buf));
  39.319 +                break;
  39.320 +            }
  39.321 +            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  39.322 +            break; 
  39.323 +
  39.324 +        default:
  39.325 +            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  39.326 +                   XD_TYPE(xd->info)); 
  39.327 +            break; 
  39.328 +        }
  39.329 +    }
  39.330 +
  39.331 + out:
  39.332 +    up(&bd->bd_sem);
  39.333 +    bdput(bd);    
  39.334 +    return rc;
  39.335 +}
  39.336 +
  39.337 +
  39.338 +/*
  39.339 + * xlvbd_remove_device - remove a device node if possible
  39.340 + * @device:       numeric device ID
  39.341 + *
  39.342 + * Updates the gendisk structure and invalidates devices.
  39.343 + *
  39.344 + * This is OK for now but in future, should perhaps consider where this should
  39.345 + * deallocate gendisks / unregister devices.
  39.346 + */
  39.347 +static int xlvbd_remove_device(int device)
  39.348 +{
  39.349 +    int i, rc = 0, minor = MINOR(device);
  39.350 +    struct gendisk *gd;
  39.351 +    struct block_device *bd;
  39.352 +    xl_disk_t *disk = NULL;
  39.353 +
  39.354 +    if ( (bd = bdget(device)) == NULL )
  39.355 +        return -1;
  39.356 +
  39.357 +    /*
  39.358 +     * Update of partition info, and check of usage count, is protected
  39.359 +     * by the per-block-device semaphore.
  39.360 +     */
  39.361 +    down(&bd->bd_sem);
  39.362 +
  39.363 +    if ( ((gd = get_gendisk(device)) == NULL) ||
  39.364 +         ((disk = xldev_to_xldisk(device)) == NULL) )
  39.365 +        BUG();
  39.366 +
  39.367 +    if ( disk->usage != 0 )
  39.368 +    {
  39.369 +        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  39.370 +        rc = -1;
  39.371 +        goto out;
  39.372 +    }
  39.373 + 
  39.374 +    if ( (minor & (gd->max_p-1)) != 0 )
  39.375 +    {
  39.376 +        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  39.377 +        invalidate_device(device, 1);
  39.378 +	gd->part[minor].start_sect = 0;
  39.379 +        gd->part[minor].nr_sects   = 0;
  39.380 +        gd->sizes[minor]           = 0;
  39.381 +
  39.382 +        /* Clear the consists-of-virtual-partitions flag if possible. */
  39.383 +        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  39.384 +        for ( i = 1; i < gd->max_p; i++ )
  39.385 +            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  39.386 +                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  39.387 +
  39.388 +        /*
  39.389 +         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  39.390 +         * present, then we can try to grok the unit's real partition table.
  39.391 +         */
  39.392 +        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  39.393 +             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  39.394 +             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  39.395 +        {
  39.396 +            register_disk(gd,
  39.397 +                          device&~(gd->max_p-1), 
  39.398 +                          gd->max_p, 
  39.399 +                          &xlvbd_block_fops,
  39.400 +                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  39.401 +        }
  39.402 +    }
  39.403 +    else
  39.404 +    {
  39.405 +        /*
  39.406 +         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  39.407 +         * NB. The partition entries are only cleared if there are no VBDs
  39.408 +         * mapped to individual partitions on this unit.
  39.409 +         */
  39.410 +        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  39.411 +        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  39.412 +            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  39.413 +        while ( i >= 0 )
  39.414 +        {
  39.415 +            invalidate_device(device+i, 1);
  39.416 +            gd->part[minor+i].start_sect = 0;
  39.417 +            gd->part[minor+i].nr_sects   = 0;
  39.418 +            gd->sizes[minor+i]           = 0;
  39.419 +            i--;
  39.420 +        }
  39.421 +    }
  39.422 +
  39.423 + out:
  39.424 +    up(&bd->bd_sem);
  39.425 +    bdput(bd);
  39.426 +    return rc;
  39.427 +}
  39.428 +
  39.429 +/*
  39.430 + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  39.431 + * state. The VBDs need to be updated in this way when the domain is
  39.432 + * initialised and also each time we receive an XLBLK_UPDATE event.
  39.433 + */
  39.434 +void xlvbd_update_vbds(void)
  39.435 +{
  39.436 +    int i, j, k, old_nr, new_nr;
  39.437 +    xen_disk_t *old_info, *new_info, *merged_info;
  39.438 +
  39.439 +    old_info = vbd_info;
  39.440 +    old_nr   = nr_vbds;
  39.441 +
  39.442 +    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  39.443 +    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  39.444 +    {
  39.445 +        kfree(new_info);
  39.446 +        return;
  39.447 +    }
  39.448 +
  39.449 +    /*
  39.450 +     * Final list maximum size is old list + new list. This occurs only when
  39.451 +     * old list and new list do not overlap at all, and we cannot yet destroy
  39.452 +     * VBDs in the old list because the usage counts are busy.
  39.453 +     */
  39.454 +    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  39.455 +
  39.456 +    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  39.457 +    i = j = k = 0;
  39.458 +
  39.459 +    while ( (i < old_nr) && (j < new_nr) )
  39.460 +    {
  39.461 +        if ( old_info[i].device < new_info[j].device )
  39.462 +        {
  39.463 +            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  39.464 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  39.465 +            i++;
  39.466 +        }
  39.467 +        else if ( old_info[i].device > new_info[j].device )
  39.468 +        {
  39.469 +            if ( xlvbd_init_device(&new_info[j]) == 0 )
  39.470 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  39.471 +            j++;
  39.472 +        }
  39.473 +        else
  39.474 +        {
  39.475 +            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  39.476 +                  (old_info[i].info == new_info[j].info)) ||
  39.477 +                 (xlvbd_remove_device(old_info[i].device) != 0) )
  39.478 +                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  39.479 +            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  39.480 +                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  39.481 +            i++; j++;
  39.482 +        }
  39.483 +    }
  39.484 +
  39.485 +    for ( ; i < old_nr; i++ )
  39.486 +    {
  39.487 +        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  39.488 +            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  39.489 +    }
  39.490 +
  39.491 +    for ( ; j < new_nr; j++ )
  39.492 +    {
  39.493 +        if ( xlvbd_init_device(&new_info[j]) == 0 )
  39.494 +            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  39.495 +    }
  39.496 +
  39.497 +    vbd_info = merged_info;
  39.498 +    nr_vbds  = k;
  39.499 +
  39.500 +    kfree(old_info);
  39.501 +    kfree(new_info);
  39.502 +}
  39.503 +
  39.504 +
  39.505 +/*
  39.506 + * Set up all the linux device goop for the virtual block devices (vbd's) that 
  39.507 + * xen tells us about. Note that although from xen's pov VBDs are addressed 
  39.508 + * simply an opaque 16-bit device number, the domain creation tools 
  39.509 + * conventionally allocate these numbers to correspond to those used by 'real' 
  39.510 + * linux -- this is just for convenience as it means e.g. that the same 
  39.511 + * /etc/fstab can be used when booting with or without xen.
  39.512 + */
  39.513 +int __init xlvbd_init(void)
  39.514 +{
  39.515 +    int i;
  39.516 +    
  39.517 +    /*
  39.518 +     * If compiled as a module, we don't support unloading yet. We therefore 
  39.519 +     * permanently increment the reference count to disallow it.
  39.520 +     */
  39.521 +    SET_MODULE_OWNER(&xlvbd_block_fops);
  39.522 +    MOD_INC_USE_COUNT;
  39.523 +
  39.524 +    /* Initialize the global arrays. */
  39.525 +    for ( i = 0; i < 256; i++ ) 
  39.526 +    {
  39.527 +        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  39.528 +        xlide_blksize_size[i]  = 1024;
  39.529 +        xlide_hardsect_size[i] = 512;
  39.530 +        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  39.531 +
  39.532 +        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  39.533 +        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  39.534 +        xlscsi_hardsect_size[i] = 512;
  39.535 +        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  39.536 +
  39.537 +        /* we don't really know what to set these too since it depends */
  39.538 +        xlvbd_blksize_size[i]  = 512;
  39.539 +        xlvbd_hardsect_size[i] = 512;
  39.540 +        xlvbd_max_sectors[i]   = 128;
  39.541 +    }
  39.542 +
  39.543 +    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  39.544 +    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  39.545 +
  39.546 +    if ( nr_vbds < 0 )
  39.547 +    {
  39.548 +        kfree(vbd_info);
  39.549 +        vbd_info = NULL;
  39.550 +        nr_vbds  = 0;
  39.551 +    }
  39.552 +    else
  39.553 +    {
  39.554 +        for ( i = 0; i < nr_vbds; i++ )
  39.555 +            xlvbd_init_device(&vbd_info[i]);
  39.556 +    }
  39.557 +
  39.558 +    return 0;
  39.559 +}
  39.560 +
  39.561 +
  39.562 +#ifdef MODULE
  39.563 +module_init(xlvbd_init);
  39.564 +#endif
    40.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.c	Tue Mar 16 12:37:49 2004 +0000
    40.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    40.3 @@ -1,621 +0,0 @@
    40.4 -/******************************************************************************
    40.5 - * xl_block.c
    40.6 - * 
    40.7 - * Xenolinux virtual block-device driver.
    40.8 - * 
    40.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   40.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   40.11 - */
   40.12 -
   40.13 -#include "xl_block.h"
   40.14 -#include <linux/blk.h>
   40.15 -#include <linux/cdrom.h>
   40.16 -#include <linux/tqueue.h>
   40.17 -#include <linux/sched.h>
   40.18 -#include <scsi/scsi.h>
   40.19 -
   40.20 -#include <linux/interrupt.h>
   40.21 -
   40.22 -typedef unsigned char byte; /* from linux/ide.h */
   40.23 -
   40.24 -#define XLBLK_RESPONSE_IRQ HYPEREVENT_IRQ(_EVENT_BLKDEV)
   40.25 -#define XLBLK_UPDATE_IRQ   HYPEREVENT_IRQ(_EVENT_VBD_UPD)
   40.26 -#define DEBUG_IRQ          HYPEREVENT_IRQ(_EVENT_DEBUG)
   40.27 -
   40.28 -#define STATE_ACTIVE    0
   40.29 -#define STATE_SUSPENDED 1
   40.30 -#define STATE_CLOSED    2
   40.31 -static unsigned int state = STATE_SUSPENDED;
   40.32 -
   40.33 -static blk_ring_t *blk_ring;
   40.34 -static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */
   40.35 -static BLK_RING_IDX req_prod;  /* Private request producer.         */
   40.36 -
   40.37 -/* We plug the I/O ring if the driver is suspended or if the ring is full. */
   40.38 -#define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \
   40.39 -                      (state != STATE_ACTIVE))
   40.40 -
   40.41 -
   40.42 -/*
   40.43 - * Request queues with outstanding work, but ring is currently full.
   40.44 - * We need no special lock here, as we always access this with the
   40.45 - * io_request_lock held. We only need a small maximum list.
   40.46 - */
   40.47 -#define MAX_PENDING 8
   40.48 -static request_queue_t *pending_queues[MAX_PENDING];
   40.49 -static int nr_pending;
   40.50 -
   40.51 -static kdev_t        sg_dev;
   40.52 -static int           sg_operation = -1;
   40.53 -static unsigned long sg_next_sect;
   40.54 -#define DISABLE_SCATTERGATHER() (sg_operation = -1)
   40.55 -
   40.56 -static inline void signal_requests_to_xen(void)
   40.57 -{
   40.58 -    block_io_op_t op; 
   40.59 -
   40.60 -    DISABLE_SCATTERGATHER();
   40.61 -    blk_ring->req_prod = req_prod;
   40.62 -
   40.63 -    op.cmd = BLOCK_IO_OP_SIGNAL; 
   40.64 -    HYPERVISOR_block_io_op(&op);
   40.65 -    return;
   40.66 -}
   40.67 -
   40.68 -
   40.69 -/*
   40.70 - * xlblk_update_int/update-vbds_task - handle VBD update events from Xen
   40.71 - * 
   40.72 - * Schedule a task for keventd to run, which will update the VBDs and perform 
   40.73 - * the corresponding updates to our view of VBD state, so the XenoLinux will 
   40.74 - * respond to changes / additions / deletions to the set of VBDs automatically.
   40.75 - */
   40.76 -static struct tq_struct update_tq;
   40.77 -static void update_vbds_task(void *unused)
   40.78 -{ 
   40.79 -    xlvbd_update_vbds();
   40.80 -}
   40.81 -static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs)
   40.82 -{
   40.83 -    update_tq.routine = update_vbds_task;
   40.84 -    schedule_task(&update_tq);
   40.85 -}
   40.86 -
   40.87 -
   40.88 -int xenolinux_block_open(struct inode *inode, struct file *filep)
   40.89 -{
   40.90 -    short xldev = inode->i_rdev; 
   40.91 -    struct gendisk *gd = get_gendisk(xldev);
   40.92 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
   40.93 -    short minor = MINOR(xldev); 
   40.94 -
   40.95 -    if ( gd->part[minor].nr_sects == 0 )
   40.96 -    { 
   40.97 -        /*
   40.98 -         * Device either doesn't exist, or has zero capacity; we use a few
   40.99 -         * cheesy heuristics to return the relevant error code
  40.100 -         */
  40.101 -        if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
  40.102 -             ((minor & (gd->max_p - 1)) != 0) )
  40.103 -        { 
  40.104 -            /*
  40.105 -             * We have a real device, but no such partition, or we just have a
  40.106 -             * partition number so guess this is the problem.
  40.107 -             */
  40.108 -            return -ENXIO;     /* no such device or address */
  40.109 -        }
  40.110 -        else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
  40.111 -        {
  40.112 -            /* This is a removable device => assume that media is missing. */ 
  40.113 -            return -ENOMEDIUM; /* media not present (this is a guess) */
  40.114 -        } 
  40.115 -        else
  40.116 -        { 
  40.117 -            /* Just go for the general 'no such device' error. */
  40.118 -            return -ENODEV;    /* no such device */
  40.119 -        }
  40.120 -    }
  40.121 -    
  40.122 -    /* Update of usage count is protected by per-device semaphore. */
  40.123 -    disk->usage++;
  40.124 -
  40.125 -    return 0;
  40.126 -}
  40.127 -
  40.128 -
  40.129 -int xenolinux_block_release(struct inode *inode, struct file *filep)
  40.130 -{
  40.131 -    xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
  40.132 -
  40.133 -    /*
  40.134 -     * When usage drops to zero it may allow more VBD updates to occur.
  40.135 -     * Update of usage count is protected by a per-device semaphore.
  40.136 -     */
  40.137 -    if ( --disk->usage == 0 )
  40.138 -    {
  40.139 -        update_tq.routine = update_vbds_task;
  40.140 -        schedule_task(&update_tq);
  40.141 -    }
  40.142 -
  40.143 -    return 0;
  40.144 -}
  40.145 -
  40.146 -
  40.147 -int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
  40.148 -                          unsigned command, unsigned long argument)
  40.149 -{
  40.150 -    kdev_t dev = inode->i_rdev;
  40.151 -    struct hd_geometry *geo = (struct hd_geometry *)argument;
  40.152 -    struct gendisk *gd;     
  40.153 -    struct hd_struct *part; 
  40.154 -    int i;
  40.155 -
  40.156 -    /* NB. No need to check permissions. That is done for us. */
  40.157 -    
  40.158 -    DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
  40.159 -                  command, (long) argument, dev); 
  40.160 -  
  40.161 -    gd = get_gendisk(dev);
  40.162 -    part = &gd->part[MINOR(dev)]; 
  40.163 -
  40.164 -    switch ( command )
  40.165 -    {
  40.166 -    case BLKGETSIZE:
  40.167 -        DPRINTK_IOCTL("   BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects); 
  40.168 -        return put_user(part->nr_sects, (unsigned long *) argument);
  40.169 -
  40.170 -    case BLKGETSIZE64:
  40.171 -        DPRINTK_IOCTL("   BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
  40.172 -                      (u64)part->nr_sects * 512);
  40.173 -        return put_user((u64)part->nr_sects * 512, (u64 *) argument);
  40.174 -
  40.175 -    case BLKRRPART:                               /* re-read partition table */
  40.176 -        DPRINTK_IOCTL("   BLKRRPART: %x\n", BLKRRPART);
  40.177 -        return xenolinux_block_revalidate(dev);
  40.178 -
  40.179 -    case BLKSSZGET:
  40.180 -        return hardsect_size[MAJOR(dev)][MINOR(dev)]; 
  40.181 -
  40.182 -    case BLKBSZGET:                                        /* get block size */
  40.183 -        DPRINTK_IOCTL("   BLKBSZGET: %x\n", BLKBSZGET);
  40.184 -        break;
  40.185 -
  40.186 -    case BLKBSZSET:                                        /* set block size */
  40.187 -        DPRINTK_IOCTL("   BLKBSZSET: %x\n", BLKBSZSET);
  40.188 -        break;
  40.189 -
  40.190 -    case BLKRASET:                                         /* set read-ahead */
  40.191 -        DPRINTK_IOCTL("   BLKRASET: %x\n", BLKRASET);
  40.192 -        break;
  40.193 -
  40.194 -    case BLKRAGET:                                         /* get read-ahead */
  40.195 -        DPRINTK_IOCTL("   BLKRAFET: %x\n", BLKRAGET);
  40.196 -        break;
  40.197 -
  40.198 -    case HDIO_GETGEO:
  40.199 -        /* note: these values are complete garbage */
  40.200 -        DPRINTK_IOCTL("   HDIO_GETGEO: %x\n", HDIO_GETGEO);
  40.201 -        if (!argument) return -EINVAL;
  40.202 -        if (put_user(0x00,  (unsigned long *) &geo->start)) return -EFAULT;
  40.203 -        if (put_user(0xff,  (byte *)&geo->heads)) return -EFAULT;
  40.204 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  40.205 -        if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
  40.206 -        return 0;
  40.207 -
  40.208 -    case HDIO_GETGEO_BIG: 
  40.209 -        /* note: these values are complete garbage */
  40.210 -        DPRINTK_IOCTL("   HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
  40.211 -        if (!argument) return -EINVAL;
  40.212 -        if (put_user(0x00,  (unsigned long *) &geo->start))  return -EFAULT;
  40.213 -        if (put_user(0xff,  (byte *)&geo->heads))   return -EFAULT;
  40.214 -        if (put_user(0x3f,  (byte *)&geo->sectors)) return -EFAULT;
  40.215 -        if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
  40.216 -        return 0;
  40.217 -
  40.218 -    case CDROMMULTISESSION:
  40.219 -        DPRINTK("FIXME: support multisession CDs later\n");
  40.220 -        for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
  40.221 -            if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
  40.222 -        return 0;
  40.223 -
  40.224 -    case SCSI_IOCTL_GET_BUS_NUMBER:
  40.225 -        DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in xl_block");
  40.226 -        return -ENOSYS;
  40.227 -
  40.228 -    default:
  40.229 -        printk(KERN_ALERT "ioctl %08x not supported by xl_block\n", command);
  40.230 -        return -ENOSYS;
  40.231 -    }
  40.232 -    
  40.233 -    return 0;
  40.234 -}
  40.235 -
  40.236 -/* check media change: should probably do something here in some cases :-) */
  40.237 -int xenolinux_block_check(kdev_t dev)
  40.238 -{
  40.239 -    DPRINTK("xenolinux_block_check\n");
  40.240 -    return 0;
  40.241 -}
  40.242 -
  40.243 -int xenolinux_block_revalidate(kdev_t dev)
  40.244 -{
  40.245 -    struct block_device *bd;
  40.246 -    struct gendisk *gd;
  40.247 -    xl_disk_t *disk;
  40.248 -    unsigned long capacity;
  40.249 -    int i, rc = 0;
  40.250 -    
  40.251 -    if ( (bd = bdget(dev)) == NULL )
  40.252 -        return -EINVAL;
  40.253 -
  40.254 -    /*
  40.255 -     * Update of partition info, and check of usage count, is protected
  40.256 -     * by the per-block-device semaphore.
  40.257 -     */
  40.258 -    down(&bd->bd_sem);
  40.259 -
  40.260 -    if ( ((gd = get_gendisk(dev)) == NULL) ||
  40.261 -         ((disk = xldev_to_xldisk(dev)) == NULL) ||
  40.262 -         ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
  40.263 -    {
  40.264 -        rc = -EINVAL;
  40.265 -        goto out;
  40.266 -    }
  40.267 -
  40.268 -    if ( disk->usage > 1 )
  40.269 -    {
  40.270 -        rc = -EBUSY;
  40.271 -        goto out;
  40.272 -    }
  40.273 -
  40.274 -    /* Only reread partition table if VBDs aren't mapped to partitions. */
  40.275 -    if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
  40.276 -    {
  40.277 -        for ( i = gd->max_p - 1; i >= 0; i-- )
  40.278 -        {
  40.279 -            invalidate_device(dev+i, 1);
  40.280 -            gd->part[MINOR(dev+i)].start_sect = 0;
  40.281 -            gd->part[MINOR(dev+i)].nr_sects   = 0;
  40.282 -            gd->sizes[MINOR(dev+i)]           = 0;
  40.283 -        }
  40.284 -
  40.285 -        grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
  40.286 -    }
  40.287 -
  40.288 - out:
  40.289 -    up(&bd->bd_sem);
  40.290 -    bdput(bd);
  40.291 -    return rc;
  40.292 -}
  40.293 -
  40.294 -
  40.295 -/*
  40.296 - * hypervisor_request
  40.297 - *
  40.298 - * request block io 
  40.299 - * 
  40.300 - * id: for guest use only.
  40.301 - * operation: XEN_BLOCK_{READ,WRITE,PROBE,VBD*}
  40.302 - * buffer: buffer to read/write into. this should be a
  40.303 - *   virtual address in the guest os.
  40.304 - */
  40.305 -static int hypervisor_request(unsigned long   id,
  40.306 -                              int             operation,
  40.307 -                              char *          buffer,
  40.308 -                              unsigned long   sector_number,
  40.309 -                              unsigned short  nr_sectors,
  40.310 -                              kdev_t          device)
  40.311 -{
  40.312 -    unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer)); 
  40.313 -    struct gendisk *gd;
  40.314 -    blk_ring_req_entry_t *req;
  40.315 -    struct buffer_head *bh;
  40.316 -
  40.317 -    if ( unlikely(nr_sectors >= (1<<9)) )
  40.318 -        BUG();
  40.319 -    if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
  40.320 -        BUG();
  40.321 -
  40.322 -    if ( unlikely(state == STATE_CLOSED) )
  40.323 -        return 1;
  40.324 -
  40.325 -    switch ( operation )
  40.326 -    {
  40.327 -
  40.328 -    case XEN_BLOCK_READ:
  40.329 -    case XEN_BLOCK_WRITE:
  40.330 -        gd = get_gendisk(device); 
  40.331 -
  40.332 -        /*
  40.333 -         * Update the sector_number we'll pass down as appropriate; note that
  40.334 -         * we could sanity check that resulting sector will be in this
  40.335 -         * partition, but this will happen in xen anyhow.
  40.336 -         */
  40.337 -        sector_number += gd->part[MINOR(device)].start_sect;
  40.338 -
  40.339 -        /*
  40.340 -         * If this unit doesn't consist of virtual (i.e., Xen-specified)
  40.341 -         * partitions then we clear the partn bits from the device number.
  40.342 -         */
  40.343 -        if ( !(gd->flags[MINOR(device)>>gd->minor_shift] & 
  40.344 -               GENHD_FL_VIRT_PARTNS) )
  40.345 -            device &= ~(gd->max_p - 1);
  40.346 -
  40.347 -        if ( (sg_operation == operation) &&
  40.348 -             (sg_dev == device) &&
  40.349 -             (sg_next_sect == sector_number) )
  40.350 -        {
  40.351 -            req = &blk_ring->ring[MASK_BLK_IDX(req_prod-1)].req;
  40.352 -            bh = (struct buffer_head *)id;
  40.353 -            bh->b_reqnext = (struct buffer_head *)req->id;
  40.354 -            req->id = id;
  40.355 -            req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
  40.356 -            if ( ++req->nr_segments < MAX_BLK_SEGS )
  40.357 -                sg_next_sect += nr_sectors;
  40.358 -            else
  40.359 -                DISABLE_SCATTERGATHER();
  40.360 -            return 0;
  40.361 -        }
  40.362 -        else if ( RING_PLUGGED )
  40.363 -        {
  40.364 -            return 1;
  40.365 -        }
  40.366 -        else
  40.367 -        {
  40.368 -            sg_operation = operation;
  40.369 -            sg_dev       = device;
  40.370 -            sg_next_sect = sector_number + nr_sectors;
  40.371 -        }
  40.372 -        break;
  40.373 -
  40.374 -    default:
  40.375 -        panic("unknown op %d\n", operation);
  40.376 -    }
  40.377 -
  40.378 -    /* Fill out a communications ring structure. */
  40.379 -    req = &blk_ring->ring[MASK_BLK_IDX(req_prod)].req;
  40.380 -    req->id            = id;
  40.381 -    req->operation     = operation;
  40.382 -    req->sector_number = (xen_sector_t)sector_number;
  40.383 -    req->device        = device; 
  40.384 -    req->nr_segments   = 1;
  40.385 -    req->buffer_and_sects[0] = buffer_ma | nr_sectors;
  40.386 -    req_prod++;
  40.387 -
  40.388 -    return 0;
  40.389 -}
  40.390 -
  40.391 -
  40.392 -/*
  40.393 - * do_xlblk_request
  40.394 - *  read a block; request is in a request queue
  40.395 - */
  40.396 -void do_xlblk_request(request_queue_t *rq)
  40.397 -{
  40.398 -    struct request *req;
  40.399 -    struct buffer_head *bh, *next_bh;
  40.400 -    int rw, nsect, full, queued = 0;
  40.401 -
  40.402 -    DPRINTK("xlblk.c::do_xlblk_request\n"); 
  40.403 -
  40.404 -    while ( !rq->plugged && !list_empty(&rq->queue_head))
  40.405 -    {
  40.406 -        if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL ) 
  40.407 -            goto out;
  40.408 -  
  40.409 -        DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
  40.410 -                req, req->cmd, req->sector,
  40.411 -                req->current_nr_sectors, req->nr_sectors, req->bh);
  40.412 -
  40.413 -        rw = req->cmd;
  40.414 -        if ( rw == READA )
  40.415 -            rw = READ;
  40.416 -        if ( unlikely((rw != READ) && (rw != WRITE)) )
  40.417 -            panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
  40.418 -
  40.419 -        req->errors = 0;
  40.420 -
  40.421 -        bh = req->bh;
  40.422 -        while ( bh != NULL )
  40.423 -        {
  40.424 -            next_bh = bh->b_reqnext;
  40.425 -            bh->b_reqnext = NULL;
  40.426 -
  40.427 -            full = hypervisor_request(
  40.428 -                (unsigned long)bh,
  40.429 -                (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE, 
  40.430 -                bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
  40.431 -
  40.432 -            if ( full )
  40.433 -            { 
  40.434 -                bh->b_reqnext = next_bh;
  40.435 -                pending_queues[nr_pending++] = rq;
  40.436 -                if ( unlikely(nr_pending >= MAX_PENDING) )
  40.437 -                    BUG();
  40.438 -                goto out; 
  40.439 -            }
  40.440 -
  40.441 -            queued++;
  40.442 -
  40.443 -            /* Dequeue the buffer head from the request. */
  40.444 -            nsect = bh->b_size >> 9;
  40.445 -            bh = req->bh = next_bh;
  40.446 -            
  40.447 -            if ( bh != NULL )
  40.448 -            {
  40.449 -                /* There's another buffer head to do. Update the request. */
  40.450 -                req->hard_sector += nsect;
  40.451 -                req->hard_nr_sectors -= nsect;
  40.452 -                req->sector = req->hard_sector;
  40.453 -                req->nr_sectors = req->hard_nr_sectors;
  40.454 -                req->current_nr_sectors = bh->b_size >> 9;
  40.455 -                req->buffer = bh->b_data;
  40.456 -            }
  40.457 -            else
  40.458 -            {
  40.459 -                /* That was the last buffer head. Finalise the request. */
  40.460 -                if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
  40.461 -                    BUG();
  40.462 -                blkdev_dequeue_request(req);
  40.463 -                end_that_request_last(req);
  40.464 -            }
  40.465 -        }
  40.466 -    }
  40.467 -
  40.468 - out:
  40.469 -    if ( queued != 0 ) signal_requests_to_xen();
  40.470 -}
  40.471 -
  40.472 -
  40.473 -static void kick_pending_request_queues(void)
  40.474 -{
  40.475 -    /* We kick pending request queues if the ring is reasonably empty. */
  40.476 -    if ( (nr_pending != 0) && 
  40.477 -         ((req_prod - resp_cons) < (BLK_RING_SIZE >> 1)) )
  40.478 -    {
  40.479 -        /* Attempt to drain the queue, but bail if the ring becomes full. */
  40.480 -        while ( (nr_pending != 0) && !RING_PLUGGED )
  40.481 -            do_xlblk_request(pending_queues[--nr_pending]);
  40.482 -    }
  40.483 -}
  40.484 -
  40.485 -
  40.486 -static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
  40.487 -{
  40.488 -    BLK_RING_IDX i; 
  40.489 -    unsigned long flags; 
  40.490 -    struct buffer_head *bh, *next_bh;
  40.491 -    
  40.492 -    if ( unlikely(state == STATE_CLOSED) )
  40.493 -        return;
  40.494 -    
  40.495 -    spin_lock_irqsave(&io_request_lock, flags);     
  40.496 -
  40.497 -    for ( i = resp_cons; i != blk_ring->resp_prod; i++ )
  40.498 -    {
  40.499 -        blk_ring_resp_entry_t *bret = &blk_ring->ring[MASK_BLK_IDX(i)].resp;
  40.500 -        switch ( bret->operation )
  40.501 -        {
  40.502 -        case XEN_BLOCK_READ:
  40.503 -        case XEN_BLOCK_WRITE:
  40.504 -            if ( unlikely(bret->status != 0) )
  40.505 -                DPRINTK("Bad return from blkdev data request: %lx\n",
  40.506 -                        bret->status);
  40.507 -            for ( bh = (struct buffer_head *)bret->id; 
  40.508 -                  bh != NULL; 
  40.509 -                  bh = next_bh )
  40.510 -            {
  40.511 -                next_bh = bh->b_reqnext;
  40.512 -                bh->b_reqnext = NULL;
  40.513 -                bh->b_end_io(bh, !bret->status);
  40.514 -            }
  40.515 -            break;
  40.516 -     
  40.517 -        default:
  40.518 -            BUG();
  40.519 -        }
  40.520 -    }
  40.521 -    
  40.522 -    resp_cons = i;
  40.523 -
  40.524 -    kick_pending_request_queues();
  40.525 -
  40.526 -    spin_unlock_irqrestore(&io_request_lock, flags);
  40.527 -}
  40.528 -
  40.529 -
  40.530 -static void reset_xlblk_interface(void)
  40.531 -{
  40.532 -    block_io_op_t op; 
  40.533 -
  40.534 -    nr_pending = 0;
  40.535 -
  40.536 -    op.cmd = BLOCK_IO_OP_RESET;
  40.537 -    if ( HYPERVISOR_block_io_op(&op) != 0 )
  40.538 -        printk(KERN_ALERT "Possible blkdev trouble: couldn't reset ring\n");
  40.539 -
  40.540 -    op.cmd = BLOCK_IO_OP_RING_ADDRESS;
  40.541 -    (void)HYPERVISOR_block_io_op(&op);
  40.542 -
  40.543 -    set_fixmap(FIX_BLKRING_BASE, op.u.ring_mfn << PAGE_SHIFT);
  40.544 -    blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
  40.545 -    blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
  40.546 -
  40.547 -    wmb();
  40.548 -    state = STATE_ACTIVE;
  40.549 -}
  40.550 -
  40.551 -
  40.552 -int __init xlblk_init(void)
  40.553 -{
  40.554 -    int error; 
  40.555 -
  40.556 -    reset_xlblk_interface();
  40.557 -
  40.558 -    error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int, 
  40.559 -                        SA_SAMPLE_RANDOM, "blkdev", NULL);
  40.560 -    if ( error )
  40.561 -    {
  40.562 -        printk(KERN_ALERT "Could not allocate receive interrupt\n");
  40.563 -        goto fail;
  40.564 -    }
  40.565 -
  40.566 -    error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int,
  40.567 -                        SA_INTERRUPT, "blkdev", NULL);
  40.568 -
  40.569 -    if ( error )
  40.570 -    {
  40.571 -        printk(KERN_ALERT "Could not allocate block update interrupt\n");
  40.572 -        goto fail;
  40.573 -    }
  40.574 -
  40.575 -    (void)xlvbd_init();
  40.576 -
  40.577 -    return 0;
  40.578 -
  40.579 - fail:
  40.580 -    return error;
  40.581 -}
  40.582 -
  40.583 -
  40.584 -static void __exit xlblk_cleanup(void)
  40.585 -{
  40.586 -    xlvbd_cleanup();
  40.587 -    free_irq(XLBLK_RESPONSE_IRQ, NULL);
  40.588 -    free_irq(XLBLK_UPDATE_IRQ, NULL);
  40.589 -}
  40.590 -
  40.591 -
  40.592 -#ifdef MODULE
  40.593 -module_init(xlblk_init);
  40.594 -module_exit(xlblk_cleanup);
  40.595 -#endif
  40.596 -
  40.597 -
  40.598 -void blkdev_suspend(void)
  40.599 -{
  40.600 -    state = STATE_SUSPENDED;
  40.601 -    wmb();
  40.602 -
  40.603 -    while ( resp_cons != blk_ring->req_prod )
  40.604 -    {
  40.605 -        barrier();
  40.606 -        current->state = TASK_INTERRUPTIBLE;
  40.607 -        schedule_timeout(1);
  40.608 -    }
  40.609 -
  40.610 -    wmb();
  40.611 -    state = STATE_CLOSED;
  40.612 -    wmb();
  40.613 -
  40.614 -    clear_fixmap(FIX_BLKRING_BASE);
  40.615 -}
  40.616 -
  40.617 -
  40.618 -void blkdev_resume(void)
  40.619 -{
  40.620 -    reset_xlblk_interface();
  40.621 -    spin_lock_irq(&io_request_lock);
  40.622 -    kick_pending_request_queues();
  40.623 -    spin_unlock_irq(&io_request_lock);
  40.624 -}
    41.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_block.h	Tue Mar 16 12:37:49 2004 +0000
    41.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    41.3 @@ -1,82 +0,0 @@
    41.4 -/******************************************************************************
    41.5 - * xl_block.h
    41.6 - * 
    41.7 - * Shared definitions between all levels of XenoLinux Virtual block devices.
    41.8 - */
    41.9 -
   41.10 -#ifndef __XL_BLOCK_H__
   41.11 -#define __XL_BLOCK_H__
   41.12 -
   41.13 -#include <linux/config.h>
   41.14 -#include <linux/module.h>
   41.15 -
   41.16 -#include <linux/kernel.h>
   41.17 -#include <linux/sched.h>
   41.18 -#include <linux/slab.h>
   41.19 -#include <linux/string.h>
   41.20 -#include <linux/errno.h>
   41.21 -
   41.22 -#include <linux/fs.h>
   41.23 -#include <linux/hdreg.h>
   41.24 -#include <linux/blkdev.h>
   41.25 -#include <linux/major.h>
   41.26 -
   41.27 -#include <asm/hypervisor-ifs/hypervisor-if.h>
   41.28 -#include <asm/hypervisor-ifs/vbd.h>
   41.29 -#include <asm/io.h>
   41.30 -#include <asm/atomic.h>
   41.31 -#include <asm/uaccess.h>
   41.32 -
   41.33 -#if 0
   41.34 -#define DPRINTK(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   41.35 -#else
   41.36 -#define DPRINTK(_f, _a...) ((void)0)
   41.37 -#endif
   41.38 -
   41.39 -#if 0
   41.40 -#define DPRINTK_IOCTL(_f, _a...) printk ( KERN_ALERT _f , ## _a )
   41.41 -#else
   41.42 -#define DPRINTK_IOCTL(_f, _a...) ((void)0)
   41.43 -#endif
   41.44 -
   41.45 -/* Private gendisk->flags[] values. */
   41.46 -#define GENHD_FL_XENO        2 /* Is unit a Xen block device?  */
   41.47 -#define GENHD_FL_VIRT_PARTNS 4 /* Are unit partitions virtual? */
   41.48 -
   41.49 -/*
   41.50 - * We have one of these per vbd, whether ide, scsi or 'other'.
   41.51 - * They hang in an array off the gendisk structure. We may end up putting
   41.52 - * all kinds of interesting stuff here :-)
   41.53 - */
   41.54 -typedef struct xl_disk {
   41.55 -    int usage;
   41.56 -} xl_disk_t;
   41.57 -
   41.58 -extern int xenolinux_control_msg(int operration, char *buffer, int size);
   41.59 -extern int xenolinux_block_open(struct inode *inode, struct file *filep);
   41.60 -extern int xenolinux_block_release(struct inode *inode, struct file *filep);
   41.61 -extern int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
   41.62 -                                 unsigned command, unsigned long argument);
   41.63 -extern int xenolinux_block_check(kdev_t dev);
   41.64 -extern int xenolinux_block_revalidate(kdev_t dev);
   41.65 -extern void do_xlblk_request (request_queue_t *rq); 
   41.66 -
   41.67 -extern void xlvbd_update_vbds(void);
   41.68 -
   41.69 -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
   41.70 -{
   41.71 -    struct gendisk *gd = get_gendisk(xldev);
   41.72 -    
   41.73 -    if ( gd == NULL ) 
   41.74 -        return NULL;
   41.75 -    
   41.76 -    return (xl_disk_t *)gd->real_devices + 
   41.77 -        (MINOR(xldev) >> gd->minor_shift);
   41.78 -}
   41.79 -
   41.80 -
   41.81 -/* Virtual block-device subsystem. */
   41.82 -extern int  xlvbd_init(void);
   41.83 -extern void xlvbd_cleanup(void); 
   41.84 -
   41.85 -#endif /* __XL_BLOCK_H__ */
    42.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/block/xl_vbd.c	Tue Mar 16 12:37:49 2004 +0000
    42.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    42.3 @@ -1,561 +0,0 @@
    42.4 -/******************************************************************************
    42.5 - * xl_vbd.c
    42.6 - * 
    42.7 - * Xenolinux virtual block-device driver (xvd).
    42.8 - * 
    42.9 - * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
   42.10 - * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
   42.11 - */
   42.12 -
   42.13 -#include "xl_block.h"
   42.14 -#include <linux/blk.h>
   42.15 -
   42.16 -/*
   42.17 - * For convenience we distinguish between ide, scsi and 'other' (i.e.
   42.18 - * potentially combinations of the two) in the naming scheme and in a few 
   42.19 - * other places (like default readahead, etc).
   42.20 - */
   42.21 -#define XLIDE_MAJOR_NAME  "hd"
   42.22 -#define XLSCSI_MAJOR_NAME "sd"
   42.23 -#define XLVBD_MAJOR_NAME "xvd"
   42.24 -
   42.25 -#define XLIDE_DEVS_PER_MAJOR   2
   42.26 -#define XLSCSI_DEVS_PER_MAJOR 16
   42.27 -#define XLVBD_DEVS_PER_MAJOR  16
   42.28 -
   42.29 -#define XLIDE_PARTN_SHIFT  6    /* amount to shift minor to get 'real' minor */
   42.30 -#define XLIDE_MAX_PART    (1 << XLIDE_PARTN_SHIFT)     /* minors per ide vbd */
   42.31 -
   42.32 -#define XLSCSI_PARTN_SHIFT 4    /* amount to shift minor to get 'real' minor */
   42.33 -#define XLSCSI_MAX_PART   (1 << XLSCSI_PARTN_SHIFT)   /* minors per scsi vbd */
   42.34 -
   42.35 -#define XLVBD_PARTN_SHIFT  4    /* amount to shift minor to get 'real' minor */
   42.36 -#define XLVBD_MAX_PART    (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */
   42.37 -
   42.38 -/* The below are for the generic drivers/block/ll_rw_block.c code. */
   42.39 -static int xlide_blksize_size[256];
   42.40 -static int xlide_hardsect_size[256];
   42.41 -static int xlide_max_sectors[256];
   42.42 -static int xlscsi_blksize_size[256];
   42.43 -static int xlscsi_hardsect_size[256];
   42.44 -static int xlscsi_max_sectors[256];
   42.45 -static int xlvbd_blksize_size[256];
   42.46 -static int xlvbd_hardsect_size[256];
   42.47 -static int xlvbd_max_sectors[256];
   42.48 -
   42.49 -/* Information from Xen about our VBDs. */
   42.50 -#define MAX_VBDS 64
   42.51 -static int nr_vbds;
   42.52 -static xen_disk_t *vbd_info;
   42.53 -
   42.54 -static struct block_device_operations xlvbd_block_fops = 
   42.55 -{
   42.56 -    open:               xenolinux_block_open,
   42.57 -    release:            xenolinux_block_release,
   42.58 -    ioctl:              xenolinux_block_ioctl,
   42.59 -    check_media_change: xenolinux_block_check,
   42.60 -    revalidate:         xenolinux_block_revalidate,
   42.61 -};
   42.62 -
   42.63 -static int xlvbd_get_vbd_info(xen_disk_t *disk_info)
   42.64 -{
   42.65 -    int error;
   42.66 -    block_io_op_t op; 
   42.67 -
   42.68 -    /* Probe for disk information. */
   42.69 -    memset(&op, 0, sizeof(op)); 
   42.70 -    op.cmd = BLOCK_IO_OP_VBD_PROBE; 
   42.71 -    op.u.probe_params.domain    = 0; 
   42.72 -    op.u.probe_params.xdi.max   = MAX_VBDS;
   42.73 -    op.u.probe_params.xdi.disks = disk_info;
   42.74 -    op.u.probe_params.xdi.count = 0;
   42.75 -
   42.76 -    if ( (error = HYPERVISOR_block_io_op(&op)) != 0 )
   42.77 -    {
   42.78 -        printk(KERN_ALERT "Could not probe disks (%d)\n", error);
   42.79 -        return -1;
   42.80 -    }
   42.81 -
   42.82 -    return op.u.probe_params.xdi.count;
   42.83 -}
   42.84 -
   42.85 -/*
   42.86 - * xlvbd_init_device - initialise a VBD device
   42.87 - * @disk:              a xen_disk_t describing the VBD
   42.88 - *
   42.89 - * Takes a xen_disk_t * that describes a VBD the domain has access to.
   42.90 - * Performs appropriate initialisation and registration of the device.
   42.91 - *
   42.92 - * Care needs to be taken when making re-entrant calls to ensure that
   42.93 - * corruption does not occur.  Also, devices that are in use should not have
   42.94 - * their details updated.  This is the caller's responsibility.
   42.95 - */
   42.96 -static int xlvbd_init_device(xen_disk_t *xd)
   42.97 -{
   42.98 -    int device = xd->device;
   42.99 -    int major  = MAJOR(device); 
  42.100 -    int minor  = MINOR(device);
  42.101 -    int is_ide = IDE_DISK_MAJOR(major);  /* is this an ide device? */
  42.102 -    int is_scsi= SCSI_BLK_MAJOR(major);  /* is this a scsi device? */
  42.103 -    char *major_name;
  42.104 -    struct gendisk *gd;
  42.105 -    struct block_device *bd;
  42.106 -    xl_disk_t *disk;
  42.107 -    int i, rc = 0, max_part, partno;
  42.108 -    unsigned long capacity;
  42.109 -
  42.110 -    unsigned char buf[64];
  42.111 -
  42.112 -    if ( (bd = bdget(device)) == NULL )
  42.113 -        return -1;
  42.114 -
  42.115 -    /*
  42.116 -     * Update of partition info, and check of usage count, is protected
  42.117 -     * by the per-block-device semaphore.
  42.118 -     */
  42.119 -    down(&bd->bd_sem);
  42.120 -
  42.121 -    if ( ((disk = xldev_to_xldisk(device)) != NULL) && (disk->usage != 0) )
  42.122 -    {
  42.123 -        printk(KERN_ALERT "VBD update failed - in use [dev=%x]\n", device);
  42.124 -        rc = -1;
  42.125 -        goto out;
  42.126 -    }
  42.127 -
  42.128 -    if ( is_ide ) {
  42.129 -
  42.130 -	major_name = XLIDE_MAJOR_NAME; 
  42.131 -	max_part   = XLIDE_MAX_PART;
  42.132 -
  42.133 -    } else if ( is_scsi ) {
  42.134 -
  42.135 -	major_name = XLSCSI_MAJOR_NAME;
  42.136 -	max_part   = XLSCSI_MAX_PART;
  42.137 -
  42.138 -    } else if (XD_VIRTUAL(xd->info)) {
  42.139 -
  42.140 -	major_name = XLVBD_MAJOR_NAME;
  42.141 -	max_part   = XLVBD_MAX_PART;
  42.142 -
  42.143 -    } else { 
  42.144 -
  42.145 -        /* SMH: hmm - probably a CCISS driver or sim; assume CCISS for now */
  42.146 -	printk(KERN_ALERT "Assuming device %02x:%02x is CCISS/SCSI\n", 
  42.147 -	       major, minor);
  42.148 -	is_scsi    = 1; 
  42.149 -	major_name = "cciss"; 
  42.150 -	max_part   = XLSCSI_MAX_PART;
  42.151 -
  42.152 -    }
  42.153 -    
  42.154 -    partno = minor & (max_part - 1); 
  42.155 -    
  42.156 -    if ( (gd = get_gendisk(device)) == NULL )
  42.157 -    {
  42.158 -        rc = register_blkdev(major, major_name, &xlvbd_block_fops);
  42.159 -        if ( rc < 0 )
  42.160 -        {
  42.161 -            printk(KERN_ALERT "XL VBD: can't get major %d\n", major);
  42.162 -            goto out;
  42.163 -        }
  42.164 -
  42.165 -        if ( is_ide )
  42.166 -        { 
  42.167 -            blksize_size[major]  = xlide_blksize_size;
  42.168 -            hardsect_size[major] = xlide_hardsect_size;
  42.169 -            max_sectors[major]   = xlide_max_sectors;
  42.170 -            read_ahead[major]    = 8; /* from drivers/ide/ide-probe.c */
  42.171 -        } 
  42.172 -        else if ( is_scsi )
  42.173 -        { 
  42.174 -            blksize_size[major]  = xlscsi_blksize_size;
  42.175 -            hardsect_size[major] = xlscsi_hardsect_size;
  42.176 -            max_sectors[major]   = xlscsi_max_sectors;
  42.177 -            read_ahead[major]    = 0; /* XXX 8; -- guessing */
  42.178 -        }
  42.179 -        else
  42.180 -        { 
  42.181 -            blksize_size[major]  = xlvbd_blksize_size;
  42.182 -            hardsect_size[major] = xlvbd_hardsect_size;
  42.183 -            max_sectors[major]   = xlvbd_max_sectors;
  42.184 -            read_ahead[major]    = 8;
  42.185 -        }
  42.186 -
  42.187 -        blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request);
  42.188 -
  42.189 -        /*
  42.190 -         * Turn off barking 'headactive' mode. We dequeue buffer heads as
  42.191 -         * soon as we pass them down to Xen.
  42.192 -         */
  42.193 -        blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0);
  42.194 -
  42.195 -        /* Construct an appropriate gendisk structure. */
  42.196 -        gd             = kmalloc(sizeof(struct gendisk), GFP_KERNEL);
  42.197 -        gd->major      = major;
  42.198 -        gd->major_name = major_name; 
  42.199 -    
  42.200 -        gd->max_p      = max_part; 
  42.201 -        if ( is_ide )
  42.202 -        { 
  42.203 -            gd->minor_shift  = XLIDE_PARTN_SHIFT; 
  42.204 -            gd->nr_real      = XLIDE_DEVS_PER_MAJOR; 
  42.205 -        } 
  42.206 -        else if ( is_scsi )
  42.207 -        { 
  42.208 -            gd->minor_shift  = XLSCSI_PARTN_SHIFT; 
  42.209 -            gd->nr_real      = XLSCSI_DEVS_PER_MAJOR; 
  42.210 -        }
  42.211 -        else
  42.212 -        { 
  42.213 -            gd->minor_shift  = XLVBD_PARTN_SHIFT; 
  42.214 -            gd->nr_real      = XLVBD_DEVS_PER_MAJOR; 
  42.215 -        }
  42.216 -
  42.217 -        /* 
  42.218 -        ** The sizes[] and part[] arrays hold the sizes and other 
  42.219 -        ** information about every partition with this 'major' (i.e. 
  42.220 -        ** every disk sharing the 8 bit prefix * max partns per disk) 
  42.221 -        */
  42.222 -        gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL);
  42.223 -        gd->part  = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), 
  42.224 -                            GFP_KERNEL);
  42.225 -        memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int));
  42.226 -        memset(gd->part,  0, max_part * gd->nr_real 
  42.227 -               * sizeof(struct hd_struct));
  42.228 -
  42.229 -
  42.230 -        gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), 
  42.231 -                                   GFP_KERNEL);
  42.232 -        memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t));
  42.233 -
  42.234 -        gd->next   = NULL;            
  42.235 -        gd->fops   = &xlvbd_block_fops;
  42.236 -
  42.237 -        gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), 
  42.238 -                             GFP_KERNEL);
  42.239 -        gd->flags  = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL);
  42.240 -    
  42.241 -        memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr));
  42.242 -        memset(gd->flags, 0, gd->nr_real *  sizeof(*gd->flags));
  42.243 -
  42.244 -        add_gendisk(gd);
  42.245 -
  42.246 -        blk_size[major] = gd->sizes;
  42.247 -    }
  42.248 -
  42.249 -    if ( XD_READONLY(xd->info) )
  42.250 -        set_device_ro(device, 1); 
  42.251 -
  42.252 -    gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO;
  42.253 -
  42.254 -    /* NB. Linux 2.4 only handles 32-bit sector offsets and capacities. */
  42.255 -    capacity = (unsigned long)xd->capacity;
  42.256 -
  42.257 -    if ( partno != 0 )
  42.258 -    {
  42.259 -        /*
  42.260 -         * If this was previously set up as a real disc we will have set 
  42.261 -         * up partition-table information. Virtual partitions override 
  42.262 -         * 'real' partitions, and the two cannot coexist on a device.
  42.263 -         */
  42.264 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  42.265 -             (gd->sizes[minor & ~(max_part-1)] != 0) )
  42.266 -        {
  42.267 -            /*
  42.268 -             * Any non-zero sub-partition entries must be cleaned out before
  42.269 -             * installing 'virtual' partition entries. The two types cannot
  42.270 -             * coexist, and virtual partitions are favoured.
  42.271 -             */
  42.272 -            kdev_t dev = device & ~(max_part-1);
  42.273 -            for ( i = max_part - 1; i > 0; i-- )
  42.274 -            {
  42.275 -                invalidate_device(dev+i, 1);
  42.276 -                gd->part[MINOR(dev+i)].start_sect = 0;
  42.277 -                gd->part[MINOR(dev+i)].nr_sects   = 0;
  42.278 -                gd->sizes[MINOR(dev+i)]           = 0;
  42.279 -            }
  42.280 -            printk(KERN_ALERT
  42.281 -                   "Virtual partitions found for /dev/%s - ignoring any "
  42.282 -                   "real partition information we may have found.\n",
  42.283 -                   disk_name(gd, MINOR(device), buf));
  42.284 -        }
  42.285 -
  42.286 -        /* Need to skankily setup 'partition' information */
  42.287 -        gd->part[minor].start_sect = 0; 
  42.288 -        gd->part[minor].nr_sects   = capacity; 
  42.289 -        gd->sizes[minor]           = capacity; 
  42.290 -
  42.291 -        gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  42.292 -    }
  42.293 -    else
  42.294 -    {
  42.295 -        gd->part[minor].nr_sects = capacity;
  42.296 -        gd->sizes[minor] = capacity>>(BLOCK_SIZE_BITS-9);
  42.297 -        
  42.298 -        /* Some final fix-ups depending on the device type */
  42.299 -        switch ( XD_TYPE(xd->info) )
  42.300 -        { 
  42.301 -        case XD_TYPE_CDROM:
  42.302 -        case XD_TYPE_FLOPPY: 
  42.303 -        case XD_TYPE_TAPE:
  42.304 -            gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; 
  42.305 -            printk(KERN_ALERT 
  42.306 -                   "Skipping partition check on %s /dev/%s\n", 
  42.307 -                   XD_TYPE(xd->info)==XD_TYPE_CDROM ? "cdrom" : 
  42.308 -                   (XD_TYPE(xd->info)==XD_TYPE_TAPE ? "tape" : 
  42.309 -                    "floppy"), disk_name(gd, MINOR(device), buf)); 
  42.310 -            break; 
  42.311 -
  42.312 -        case XD_TYPE_DISK:
  42.313 -            /* Only check partitions on real discs (not virtual!). */
  42.314 -            if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  42.315 -            {
  42.316 -                printk(KERN_ALERT
  42.317 -                       "Skipping partition check on virtual /dev/%s\n",
  42.318 -                       disk_name(gd, MINOR(device), buf));
  42.319 -                break;
  42.320 -            }
  42.321 -            register_disk(gd, device, gd->max_p, &xlvbd_block_fops, capacity);
  42.322 -            break; 
  42.323 -
  42.324 -        default:
  42.325 -            printk(KERN_ALERT "XenoLinux: unknown device type %d\n", 
  42.326 -                   XD_TYPE(xd->info)); 
  42.327 -            break; 
  42.328 -        }
  42.329 -    }
  42.330 -
  42.331 - out:
  42.332 -    up(&bd->bd_sem);
  42.333 -    bdput(bd);    
  42.334 -    return rc;
  42.335 -}
  42.336 -
  42.337 -
  42.338 -/*
  42.339 - * xlvbd_remove_device - remove a device node if possible
  42.340 - * @device:       numeric device ID
  42.341 - *
  42.342 - * Updates the gendisk structure and invalidates devices.
  42.343 - *
  42.344 - * This is OK for now but in future, should perhaps consider where this should
  42.345 - * deallocate gendisks / unregister devices.
  42.346 - */
  42.347 -static int xlvbd_remove_device(int device)
  42.348 -{
  42.349 -    int i, rc = 0, minor = MINOR(device);
  42.350 -    struct gendisk *gd;
  42.351 -    struct block_device *bd;
  42.352 -    xl_disk_t *disk = NULL;
  42.353 -
  42.354 -    if ( (bd = bdget(device)) == NULL )
  42.355 -        return -1;
  42.356 -
  42.357 -    /*
  42.358 -     * Update of partition info, and check of usage count, is protected
  42.359 -     * by the per-block-device semaphore.
  42.360 -     */
  42.361 -    down(&bd->bd_sem);
  42.362 -
  42.363 -    if ( ((gd = get_gendisk(device)) == NULL) ||
  42.364 -         ((disk = xldev_to_xldisk(device)) == NULL) )
  42.365 -        BUG();
  42.366 -
  42.367 -    if ( disk->usage != 0 )
  42.368 -    {
  42.369 -        printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
  42.370 -        rc = -1;
  42.371 -        goto out;
  42.372 -    }
  42.373 - 
  42.374 -    if ( (minor & (gd->max_p-1)) != 0 )
  42.375 -    {
  42.376 -        /* 1: The VBD is mapped to a partition rather than a whole unit. */
  42.377 -        invalidate_device(device, 1);
  42.378 -	gd->part[minor].start_sect = 0;
  42.379 -        gd->part[minor].nr_sects   = 0;
  42.380 -        gd->sizes[minor]           = 0;
  42.381 -
  42.382 -        /* Clear the consists-of-virtual-partitions flag if possible. */
  42.383 -        gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
  42.384 -        for ( i = 1; i < gd->max_p; i++ )
  42.385 -            if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
  42.386 -                gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
  42.387 -
  42.388 -        /*
  42.389 -         * If all virtual partitions are now gone, and a 'whole unit' VBD is
  42.390 -         * present, then we can try to grok the unit's real partition table.
  42.391 -         */
  42.392 -        if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
  42.393 -             (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
  42.394 -             !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
  42.395 -        {
  42.396 -            register_disk(gd,
  42.397 -                          device&~(gd->max_p-1), 
  42.398 -                          gd->max_p, 
  42.399 -                          &xlvbd_block_fops,
  42.400 -                          gd->part[minor&~(gd->max_p-1)].nr_sects);
  42.401 -        }
  42.402 -    }
  42.403 -    else
  42.404 -    {
  42.405 -        /*
  42.406 -         * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
  42.407 -         * NB. The partition entries are only cleared if there are no VBDs
  42.408 -         * mapped to individual partitions on this unit.
  42.409 -         */
  42.410 -        i = gd->max_p - 1; /* Default: clear subpartitions as well. */
  42.411 -        if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
  42.412 -            i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
  42.413 -        while ( i >= 0 )
  42.414 -        {
  42.415 -            invalidate_device(device+i, 1);
  42.416 -            gd->part[minor+i].start_sect = 0;
  42.417 -            gd->part[minor+i].nr_sects   = 0;
  42.418 -            gd->sizes[minor+i]           = 0;
  42.419 -            i--;
  42.420 -        }
  42.421 -    }
  42.422 -
  42.423 - out:
  42.424 -    up(&bd->bd_sem);
  42.425 -    bdput(bd);
  42.426 -    return rc;
  42.427 -}
  42.428 -
  42.429 -/*
  42.430 - * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
  42.431 - * state. The VBDs need to be updated in this way when the domain is
  42.432 - * initialised and also each time we receive an XLBLK_UPDATE event.
  42.433 - */
  42.434 -void xlvbd_update_vbds(void)
  42.435 -{
  42.436 -    int i, j, k, old_nr, new_nr;
  42.437 -    xen_disk_t *old_info, *new_info, *merged_info;
  42.438 -
  42.439 -    old_info = vbd_info;
  42.440 -    old_nr   = nr_vbds;
  42.441 -
  42.442 -    new_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  42.443 -    if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
  42.444 -    {
  42.445 -        kfree(new_info);
  42.446 -        return;
  42.447 -    }
  42.448 -
  42.449 -    /*
  42.450 -     * Final list maximum size is old list + new list. This occurs only when
  42.451 -     * old list and new list do not overlap at all, and we cannot yet destroy
  42.452 -     * VBDs in the old list because the usage counts are busy.
  42.453 -     */
  42.454 -    merged_info = kmalloc((old_nr + new_nr) * sizeof(xen_disk_t), GFP_KERNEL);
  42.455 -
  42.456 -    /* @i tracks old list; @j tracks new list; @k tracks merged list. */
  42.457 -    i = j = k = 0;
  42.458 -
  42.459 -    while ( (i < old_nr) && (j < new_nr) )
  42.460 -    {
  42.461 -        if ( old_info[i].device < new_info[j].device )
  42.462 -        {
  42.463 -            if ( xlvbd_remove_device(old_info[i].device) != 0 )
  42.464 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  42.465 -            i++;
  42.466 -        }
  42.467 -        else if ( old_info[i].device > new_info[j].device )
  42.468 -        {
  42.469 -            if ( xlvbd_init_device(&new_info[j]) == 0 )
  42.470 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  42.471 -            j++;
  42.472 -        }
  42.473 -        else
  42.474 -        {
  42.475 -            if ( ((old_info[i].capacity == new_info[j].capacity) &&
  42.476 -                  (old_info[i].info == new_info[j].info)) ||
  42.477 -                 (xlvbd_remove_device(old_info[i].device) != 0) )
  42.478 -                memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  42.479 -            else if ( xlvbd_init_device(&new_info[j]) == 0 )
  42.480 -                memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  42.481 -            i++; j++;
  42.482 -        }
  42.483 -    }
  42.484 -
  42.485 -    for ( ; i < old_nr; i++ )
  42.486 -    {
  42.487 -        if ( xlvbd_remove_device(old_info[i].device) != 0 )
  42.488 -            memcpy(&merged_info[k++], &old_info[i], sizeof(xen_disk_t));
  42.489 -    }
  42.490 -
  42.491 -    for ( ; j < new_nr; j++ )
  42.492 -    {
  42.493 -        if ( xlvbd_init_device(&new_info[j]) == 0 )
  42.494 -            memcpy(&merged_info[k++], &new_info[j], sizeof(xen_disk_t));
  42.495 -    }
  42.496 -
  42.497 -    vbd_info = merged_info;
  42.498 -    nr_vbds  = k;
  42.499 -
  42.500 -    kfree(old_info);
  42.501 -    kfree(new_info);
  42.502 -}
  42.503 -
  42.504 -
  42.505 -/*
  42.506 - * Set up all the linux device goop for the virtual block devices (vbd's) that 
  42.507 - * xen tells us about. Note that although from xen's pov VBDs are addressed 
  42.508 - * simply an opaque 16-bit device number, the domain creation tools 
  42.509 - * conventionally allocate these numbers to correspond to those used by 'real' 
  42.510 - * linux -- this is just for convenience as it means e.g. that the same 
  42.511 - * /etc/fstab can be used when booting with or without xen.
  42.512 - */
  42.513 -int __init xlvbd_init(void)
  42.514 -{
  42.515 -    int i;
  42.516 -    
  42.517 -    /*
  42.518 -     * If compiled as a module, we don't support unloading yet. We therefore 
  42.519 -     * permanently increment the reference count to disallow it.
  42.520 -     */
  42.521 -    SET_MODULE_OWNER(&xlvbd_block_fops);
  42.522 -    MOD_INC_USE_COUNT;
  42.523 -
  42.524 -    /* Initialize the global arrays. */
  42.525 -    for ( i = 0; i < 256; i++ ) 
  42.526 -    {
  42.527 -        /* from the generic ide code (drivers/ide/ide-probe.c, etc) */
  42.528 -        xlide_blksize_size[i]  = 1024;
  42.529 -        xlide_hardsect_size[i] = 512;
  42.530 -        xlide_max_sectors[i]   = 128;  /* 'hwif->rqsize' if we knew it */
  42.531 -
  42.532 -        /* from the generic scsi disk code (drivers/scsi/sd.c) */
  42.533 -        xlscsi_blksize_size[i]  = 1024; /* XXX 512; */
  42.534 -        xlscsi_hardsect_size[i] = 512;
  42.535 -        xlscsi_max_sectors[i]   = 128*8; /* XXX 128; */
  42.536 -
  42.537 -        /* we don't really know what to set these too since it depends */
  42.538 -        xlvbd_blksize_size[i]  = 512;
  42.539 -        xlvbd_hardsect_size[i] = 512;
  42.540 -        xlvbd_max_sectors[i]   = 128;
  42.541 -    }
  42.542 -
  42.543 -    vbd_info = kmalloc(MAX_VBDS * sizeof(xen_disk_t), GFP_KERNEL);
  42.544 -    nr_vbds  = xlvbd_get_vbd_info(vbd_info);
  42.545 -
  42.546 -    if ( nr_vbds < 0 )
  42.547 -    {
  42.548 -        kfree(vbd_info);
  42.549 -        vbd_info = NULL;
  42.550 -        nr_vbds  = 0;
  42.551 -    }
  42.552 -    else
  42.553 -    {
  42.554 -        for ( i = 0; i < nr_vbds; i++ )
  42.555 -            xlvbd_init_device(&vbd_info[i]);
  42.556 -    }
  42.557 -
  42.558 -    return 0;
  42.559 -}
  42.560 -
  42.561 -
  42.562 -#ifdef MODULE
  42.563 -module_init(xlvbd_init);
  42.564 -#endif
    43.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile	Tue Mar 16 12:37:49 2004 +0000
    43.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/console/Makefile	Wed Mar 17 13:35:01 2004 +0000
    43.3 @@ -1,3 +1,3 @@
    43.4 -O_TARGET := con.o
    43.5 +O_TARGET := drv.o
    43.6  obj-$(CONFIG_XEN_CONSOLE) := console.o
    43.7  include $(TOPDIR)/Rules.make
    44.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile	Tue Mar 16 12:37:49 2004 +0000
    44.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/Makefile	Wed Mar 17 13:35:01 2004 +0000
    44.3 @@ -1,3 +1,3 @@
    44.4 -O_TARGET := dom0.o
    44.5 -obj-y := dom0_core.o vfr.o
    44.6 +O_TARGET := drv.o
    44.7 +obj-y := core.o vfr.o
    44.8  include $(TOPDIR)/Rules.make
    45.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    45.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/core.c	Wed Mar 17 13:35:01 2004 +0000
    45.3 @@ -0,0 +1,104 @@
    45.4 +/******************************************************************************
    45.5 + * core.c
    45.6 + * 
    45.7 + * Interface to privileged domain-0 commands.
    45.8 + * 
    45.9 + * Copyright (c) 2002-2004, K A Fraser, B Dragovic
   45.10 + */
   45.11 +
   45.12 +#include <linux/config.h>
   45.13 +#include <linux/module.h>
   45.14 +#include <linux/kernel.h>
   45.15 +#include <linux/sched.h>
   45.16 +#include <linux/slab.h>
   45.17 +#include <linux/string.h>
   45.18 +#include <linux/errno.h>
   45.19 +#include <linux/mm.h>
   45.20 +#include <linux/mman.h>
   45.21 +#include <linux/swap.h>
   45.22 +#include <linux/smp_lock.h>
   45.23 +#include <linux/swapctl.h>
   45.24 +#include <linux/iobuf.h>
   45.25 +#include <linux/highmem.h>
   45.26 +#include <linux/pagemap.h>
   45.27 +#include <linux/seq_file.h>
   45.28 +
   45.29 +#include <asm/pgalloc.h>
   45.30 +#include <asm/pgtable.h>
   45.31 +#include <asm/uaccess.h>
   45.32 +#include <asm/tlb.h>
   45.33 +#include <asm/proc_cmd.h>
   45.34 +#include <asm/hypervisor-ifs/dom0_ops.h>
   45.35 +#include <asm/xeno_proc.h>
   45.36 +
   45.37 +static struct proc_dir_entry *privcmd_intf;
   45.38 +
   45.39 +static int privcmd_ioctl(struct inode *inode, struct file *file,
   45.40 +                         unsigned int cmd, unsigned long data)
   45.41 +{
   45.42 +    int ret = 0;
   45.43 +
   45.44 +    switch ( cmd )
   45.45 +    {
   45.46 +    case IOCTL_PRIVCMD_HYPERCALL:
   45.47 +    {
   45.48 +        privcmd_hypercall_t hypercall;
   45.49 +  
   45.50 +        if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
   45.51 +            return -EFAULT;
   45.52 +
   45.53 +        __asm__ __volatile__ (
   45.54 +            "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
   45.55 +            "movl  4(%%eax),%%ebx ;"
   45.56 +            "movl  8(%%eax),%%ecx ;"
   45.57 +            "movl 12(%%eax),%%edx ;"
   45.58 +            "movl 16(%%eax),%%esi ;"
   45.59 +            "movl 20(%%eax),%%edi ;"
   45.60 +            "movl   (%%eax),%%eax ;"
   45.61 +            TRAP_INSTR "; "
   45.62 +            "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
   45.63 +            : "=a" (ret) : "0" (&hypercall) : "memory" );
   45.64 +
   45.65 +    }
   45.66 +    break;
   45.67 +
   45.68 +    default:
   45.69 +        ret = -EINVAL;
   45.70 +    	break;
   45.71 +	}
   45.72 +    return ret;
   45.73 +}
   45.74 +
   45.75 +
   45.76 +static struct file_operations privcmd_file_ops = {
   45.77 +  ioctl : privcmd_ioctl
   45.78 +};
   45.79 +
   45.80 +
   45.81 +static int __init init_module(void)
   45.82 +{
   45.83 +    if ( !(start_info.flags & SIF_PRIVILEGED) )
   45.84 +        return 0;
   45.85 +
   45.86 +    privcmd_intf = create_xeno_proc_entry("privcmd", 0400);
   45.87 +    if ( privcmd_intf != NULL )
   45.88 +    {
   45.89 +        privcmd_intf->owner      = THIS_MODULE;
   45.90 +        privcmd_intf->nlink      = 1;
   45.91 +	privcmd_intf->proc_fops  = &privcmd_file_ops;
   45.92 +    }
   45.93 +
   45.94 +    return 0;
   45.95 +}
   45.96 +
   45.97 +
   45.98 +static void __exit cleanup_module(void)
   45.99 +{
  45.100 +    if ( privcmd_intf == NULL ) return;
  45.101 +    remove_xeno_proc_entry("privcmd");
  45.102 +    privcmd_intf = NULL;
  45.103 +}
  45.104 +
  45.105 +
  45.106 +module_init(init_module);
  45.107 +module_exit(cleanup_module);
    46.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/dom0/dom0_core.c	Tue Mar 16 12:37:49 2004 +0000
    46.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    46.3 @@ -1,108 +0,0 @@
    46.4 -/******************************************************************************
    46.5 - * dom0_core.c
    46.6 - * 
    46.7 - * Interface to privileged domain-0 commands.
    46.8 - * 
    46.9 - * Copyright (c) 2002-2003, K A Fraser, B Dragovic
   46.10 - */
   46.11 -
   46.12 -#include <linux/config.h>
   46.13 -#include <linux/module.h>
   46.14 -#include <linux/kernel.h>
   46.15 -#include <linux/sched.h>
   46.16 -#include <linux/slab.h>
   46.17 -#include <linux/string.h>
   46.18 -#include <linux/errno.h>
   46.19 -#include <linux/mm.h>
   46.20 -#include <linux/mman.h>
   46.21 -#include <linux/swap.h>
   46.22 -#include <linux/smp_lock.h>
   46.23 -#include <linux/swapctl.h>
   46.24 -#include <linux/iobuf.h>
   46.25 -#include <linux/highmem.h>
   46.26 -#include <linux/pagemap.h>
   46.27 -#include <linux/seq_file.h>
   46.28 -
   46.29 -#include <asm/pgalloc.h>
   46.30 -#include <asm/pgtable.h>
   46.31 -#include <asm/uaccess.h>
   46.32 -#include <asm/tlb.h>
   46.33 -#include <asm/proc_cmd.h>
   46.34 -#include <asm/hypervisor-ifs/dom0_ops.h>
   46.35 -#include <asm/xeno_proc.h>
   46.36 -
   46.37 -#include "../block/xl_block.h"
   46.38 -
   46.39 -static struct proc_dir_entry *privcmd_intf;
   46.40 -
   46.41 -
   46.42 -static int privcmd_ioctl(struct inode *inode, struct file *file,
   46.43 -                         unsigned int cmd, unsigned long data)
   46.44 -{
   46.45 -    int ret = 0;
   46.46 -
   46.47 -    switch ( cmd )
   46.48 -    {
   46.49 -    case IOCTL_PRIVCMD_HYPERCALL:
   46.50 -    {
   46.51 -        privcmd_hypercall_t hypercall;
   46.52 -  
   46.53 -        if ( copy_from_user(&hypercall, (void *)data, sizeof(hypercall)) )
   46.54 -            return -EFAULT;
   46.55 -
   46.56 -        __asm__ __volatile__ (
   46.57 -            "pushl %%ebx; pushl %%ecx; pushl %%edx; pushl %%esi; pushl %%edi; "
   46.58 -            "movl  4(%%eax),%%ebx ;"
   46.59 -            "movl  8(%%eax),%%ecx ;"
   46.60 -            "movl 12(%%eax),%%edx ;"
   46.61 -            "movl 16(%%eax),%%esi ;"
   46.62 -            "movl 20(%%eax),%%edi ;"
   46.63 -            "movl   (%%eax),%%eax ;"
   46.64 -            TRAP_INSTR "; "
   46.65 -            "popl %%edi; popl %%esi; popl %%edx; popl %%ecx; popl %%ebx"
   46.66 -            : "=a" (ret) : "0" (&hypercall) : "memory" );
   46.67 -
   46.68 -    }
   46.69 -    break;
   46.70 -
   46.71 -    default:
   46.72 -        ret = -EINVAL;
   46.73 -    	break;
   46.74 -	}
   46.75 -    return ret;
   46.76 -}
   46.77 -
   46.78 -
   46.79 -static struct file_operations privcmd_file_ops = {
   46.80 -  ioctl : privcmd_ioctl
   46.81 -};
   46.82 -
   46.83 -
   46.84 -static int __init init_module(void)
   46.85 -{
   46.86 -    if ( !(start_info.flags & SIF_PRIVILEGED) )
   46.87 -        return 0;
   46.88 -
   46.89 -    /* xeno control interface */
   46.90 -    privcmd_intf = create_xeno_proc_entry("privcmd", 0400);
   46.91 -    if ( privcmd_intf != NULL )
   46.92 -    {
   46.93 -        privcmd_intf->owner      = THIS_MODULE;
   46.94 -        privcmd_intf->nlink      = 1;
   46.95 -	privcmd_intf->proc_fops  = &privcmd_file_ops;
   46.96 -    }
   46.97 -
   46.98 -    return 0;
   46.99 -}
  46.100 -
  46.101 -
  46.102 -static void __exit cleanup_module(void)
  46.103 -{
  46.104 -    if ( privcmd_intf == NULL ) return;
  46.105 -    remove_xeno_proc_entry("privcmd");
  46.106 -    privcmd_intf = NULL;
  46.107 -}
  46.108 -
  46.109 -
  46.110 -module_init(init_module);
  46.111 -module_exit(cleanup_module);
    47.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile	Tue Mar 16 12:37:49 2004 +0000
    47.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/Makefile	Wed Mar 17 13:35:01 2004 +0000
    47.3 @@ -1,3 +1,3 @@
    47.4 -O_TARGET := evtchn.o
    47.5 -obj-y := xl_evtchn.o
    47.6 +O_TARGET := drv.o
    47.7 +obj-y := evtchn.o
    47.8  include $(TOPDIR)/Rules.make
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/evtchn.c	Wed Mar 17 13:35:01 2004 +0000
    48.3 @@ -0,0 +1,481 @@
    48.4 +/******************************************************************************
    48.5 + * evtchn.c
    48.6 + * 
    48.7 + * Xenolinux driver for receiving and demuxing event-channel signals.
    48.8 + * 
    48.9 + * Copyright (c) 2004, K A Fraser
   48.10 + */
   48.11 +
   48.12 +#include <linux/config.h>
   48.13 +#include <linux/module.h>
   48.14 +#include <linux/kernel.h>
   48.15 +#include <linux/sched.h>
   48.16 +#include <linux/slab.h>
   48.17 +#include <linux/string.h>
   48.18 +#include <linux/errno.h>
   48.19 +#include <linux/fs.h>
   48.20 +#include <linux/errno.h>
   48.21 +#include <linux/miscdevice.h>
   48.22 +#include <linux/major.h>
   48.23 +#include <linux/proc_fs.h>
   48.24 +#include <linux/devfs_fs_kernel.h>
   48.25 +#include <linux/stat.h>
   48.26 +#include <linux/poll.h>
   48.27 +#include <linux/irq.h>
   48.28 +#include <asm/evtchn.h>
   48.29 +
   48.30 +/* NB. This must be shared amongst drivers if more things go in /dev/xen */
   48.31 +static devfs_handle_t xen_dev_dir;
   48.32 +
   48.33 +/* Only one process may open /dev/xen/evtchn at any time. */
   48.34 +static unsigned long evtchn_dev_inuse;
   48.35 +
   48.36 +/* Notification ring, accessed via /dev/xen/evtchn. */
   48.37 +#define RING_SIZE     2048  /* 2048 16-bit entries */
   48.38 +#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
   48.39 +static u16 *ring;
   48.40 +static unsigned int ring_cons, ring_prod, ring_overflow;
   48.41 +
   48.42 +/* Processes wait on this queue via /dev/xen/evtchn when ring is empty. */
   48.43 +static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait);
   48.44 +static struct fasync_struct *evtchn_async_queue;
   48.45 +
   48.46 +static evtchn_receiver_t rx_fns[1024];
   48.47 +
   48.48 +static u32 pend_outstanding[32];
   48.49 +static u32 disc_outstanding[32];
   48.50 +
   48.51 +static spinlock_t lock;
   48.52 +
   48.53 +int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn)
   48.54 +{
   48.55 +    unsigned long flags;
   48.56 +    int rc;
   48.57 +
   48.58 +    spin_lock_irqsave(&lock, flags);
   48.59 +
   48.60 +    if ( rx_fns[port] != NULL )
   48.61 +    {
   48.62 +        printk(KERN_ALERT "Event channel port %d already in use.\n", port);
   48.63 +        rc = -EINVAL;
   48.64 +    }
   48.65 +    else
   48.66 +    {
   48.67 +        rx_fns[port] = rx_fn;
   48.68 +        rc = 0;
   48.69 +    }
   48.70 +
   48.71 +    spin_unlock_irqrestore(&lock, flags);
   48.72 +
   48.73 +    return rc;
   48.74 +}
   48.75 +
   48.76 +int evtchn_free_port(unsigned int port)
   48.77 +{
   48.78 +    unsigned long flags;
   48.79 +    int rc;
   48.80 +
   48.81 +    spin_lock_irqsave(&lock, flags);
   48.82 +
   48.83 +    if ( rx_fns[port] == NULL )
   48.84 +    {
   48.85 +        printk(KERN_ALERT "Event channel port %d not in use.\n", port);
   48.86 +        rc = -EINVAL;
   48.87 +    }
   48.88 +    else
   48.89 +    {
   48.90 +        rx_fns[port] = NULL;
   48.91 +        rc = 0;
   48.92 +    }
   48.93 +
   48.94 +    spin_unlock_irqrestore(&lock, flags);
   48.95 +
   48.96 +    return rc;
   48.97 +}
   48.98 +
   48.99 +/*
  48.100 + * NB. Clearing port can race a notification from remote end. Caller must
  48.101 + * therefore recheck notification status on return to avoid missing events.
  48.102 + */
  48.103 +void evtchn_clear_port(unsigned int port)
  48.104 +{
  48.105 +    unsigned int p = port & PORTIDX_MASK;
  48.106 +    unsigned long flags;
  48.107 +
  48.108 +    spin_lock_irqsave(&lock, flags);
  48.109 +
  48.110 +    if ( unlikely(port & PORT_DISCONNECT) )
  48.111 +    {
  48.112 +        clear_bit(p, &disc_outstanding[0]);
  48.113 +        clear_bit(p, &HYPERVISOR_shared_info->event_channel_disc[0]);
  48.114 +    }
  48.115 +    else
  48.116 +    {
  48.117 +        clear_bit(p, &pend_outstanding[0]);
  48.118 +        clear_bit(p, &HYPERVISOR_shared_info->event_channel_pend[0]);
  48.119 +    }
  48.120 +
  48.121 +    spin_unlock_irqrestore(&lock, flags);
  48.122 +}
  48.123 +
  48.124 +static inline void process_bitmask(u32 *sel, 
  48.125 +                                   u32 *mask,
  48.126 +                                   u32 *outstanding,
  48.127 +                                   unsigned int port_subtype)
  48.128 +{
  48.129 +    unsigned long l1, l2;
  48.130 +    unsigned int  l1_idx, l2_idx, port;
  48.131 +
  48.132 +    l1 = xchg(sel, 0);
  48.133 +    while ( (l1_idx = ffs(l1)) != 0 )
  48.134 +    {
  48.135 +        l1_idx--;
  48.136 +        l1 &= ~(1 << l1_idx);
  48.137 +
  48.138 +        l2 = mask[l1_idx] & ~outstanding[l1_idx];
  48.139 +        outstanding[l1_idx] |= l2;
  48.140 +        while ( (l2_idx = ffs(l2)) != 0 )
  48.141 +        {
  48.142 +            l2_idx--;
  48.143 +            l2 &= ~(1 << l2_idx);
  48.144 +
  48.145 +            port = (l1_idx * 32) + l2_idx;
  48.146 +            if ( rx_fns[port] != NULL )
  48.147 +            {
  48.148 +                (*rx_fns[port])(port | port_subtype);
  48.149 +            }
  48.150 +            else if ( ring != NULL )
  48.151 +            {
  48.152 +                if ( (ring_prod - ring_cons) < RING_SIZE )
  48.153 +                {
  48.154 +                    ring[RING_MASK(ring_prod)] = (u16)(port | port_subtype);
  48.155 +                    if ( ring_cons == ring_prod++ )
  48.156 +                    {
  48.157 +                        wake_up_interruptible(&evtchn_wait);
  48.158 +                        kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN);
  48.159 +                    }
  48.160 +                }
  48.161 +                else
  48.162 +                {
  48.163 +                    ring_overflow = 1;
  48.164 +                }
  48.165 +            }
  48.166 +        }
  48.167 +    }
  48.168 +}
  48.169 +
  48.170 +static void evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  48.171 +{
  48.172 +    shared_info_t *si = HYPERVISOR_shared_info;
  48.173 +    unsigned long flags;
  48.174 +
  48.175 +    spin_lock_irqsave(&lock, flags);
  48.176 +
  48.177 +    process_bitmask(&si->event_channel_pend_sel, 
  48.178 +                    &si->event_channel_pend[0],
  48.179 +                    &pend_outstanding[0],
  48.180 +                    PORT_NORMAL);
  48.181 +        
  48.182 +    process_bitmask(&si->event_channel_disc_sel,
  48.183 +                    &si->event_channel_disc[0],
  48.184 +                    &disc_outstanding[0],
  48.185 +                    PORT_DISCONNECT);
  48.186 +        
  48.187 +    spin_unlock_irqrestore(&lock, flags);
  48.188 +}
  48.189 +
  48.190 +static void __evtchn_reset_buffer_ring(void)
  48.191 +{
  48.192 +    u32          m;
  48.193 +    unsigned int i, j;
  48.194 +
  48.195 +    /* Initialise the ring with currently outstanding notifications. */
  48.196 +    ring_cons = ring_prod = ring_overflow = 0;
  48.197 +
  48.198 +    for ( i = 0; i < 32; i++ )
  48.199 +    {
  48.200 +        m = pend_outstanding[i];
  48.201 +        while ( (j = ffs(m)) != 0 )
  48.202 +        {
  48.203 +            m &= ~(1 << --j);
  48.204 +            if ( rx_fns[(i * 32) + j] == NULL )
  48.205 +                ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL);
  48.206 +        }
  48.207 +
  48.208 +        m = disc_outstanding[i];
  48.209 +        while ( (j = ffs(m)) != 0 )
  48.210 +        {
  48.211 +            m &= ~(1 << --j);
  48.212 +            if ( rx_fns[(i * 32) + j] == NULL )
  48.213 +                ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_DISCONNECT);
  48.214 +        }
  48.215 +    }
  48.216 +}
  48.217 +
  48.218 +static ssize_t evtchn_read(struct file *file, char *buf,
  48.219 +                           size_t count, loff_t *ppos)
  48.220 +{
  48.221 +    int rc;
  48.222 +    unsigned int c, p, bytes1 = 0, bytes2 = 0;
  48.223 +    DECLARE_WAITQUEUE(wait, current);
  48.224 +
  48.225 +    add_wait_queue(&evtchn_wait, &wait);
  48.226 +
  48.227 +    count &= ~1; /* even number of bytes */
  48.228 +
  48.229 +    if ( count == 0 )
  48.230 +    {
  48.231 +        rc = 0;
  48.232 +        goto out;
  48.233 +    }
  48.234 +
  48.235 +    if ( count > PAGE_SIZE )
  48.236 +        count = PAGE_SIZE;
  48.237 +
  48.238 +    for ( ; ; )
  48.239 +    {
  48.240 +        set_current_state(TASK_INTERRUPTIBLE);
  48.241 +
  48.242 +        if ( (c = ring_cons) != (p = ring_prod) )
  48.243 +            break;
  48.244 +
  48.245 +        if ( ring_overflow )
  48.246 +        {
  48.247 +            rc = -EFBIG;
  48.248 +            goto out;
  48.249 +        }
  48.250 +
  48.251 +        if ( file->f_flags & O_NONBLOCK )
  48.252 +        {
  48.253 +            rc = -EAGAIN;
  48.254 +            goto out;
  48.255 +        }
  48.256 +
  48.257 +        if ( signal_pending(current) )
  48.258 +        {
  48.259 +            rc = -ERESTARTSYS;
  48.260 +            goto out;
  48.261 +        }
  48.262 +
  48.263 +        schedule();
  48.264 +    }
  48.265 +
  48.266 +    /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
  48.267 +    if ( ((c ^ p) & RING_SIZE) != 0 )
  48.268 +    {
  48.269 +        bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(u16);
  48.270 +        bytes2 = RING_MASK(p) * sizeof(u16);
  48.271 +    }
  48.272 +    else
  48.273 +    {
  48.274 +        bytes1 = (p - c) * sizeof(u16);
  48.275 +        bytes2 = 0;
  48.276 +    }
  48.277 +
  48.278 +    /* Truncate chunks according to caller's maximum byte count. */
  48.279 +    if ( bytes1 > count )
  48.280 +    {
  48.281 +        bytes1 = count;
  48.282 +        bytes2 = 0;
  48.283 +    }
  48.284 +    else if ( (bytes1 + bytes2) > count )
  48.285 +    {
  48.286 +        bytes2 = count - bytes1;
  48.287 +    }
  48.288 +
  48.289 +    if ( copy_to_user(buf, &ring[RING_MASK(c)], bytes1) ||
  48.290 +         ((bytes2 != 0) && copy_to_user(&buf[bytes1], &ring[0], bytes2)) )
  48.291 +    {
  48.292 +        rc = -EFAULT;
  48.293 +        goto out;
  48.294 +    }
  48.295 +
  48.296 +    ring_cons += (bytes1 + bytes2) / sizeof(u16);
  48.297 +
  48.298 +    rc = bytes1 + bytes2;
  48.299 +
  48.300 + out:
  48.301 +    __set_current_state(TASK_RUNNING);
  48.302 +    remove_wait_queue(&evtchn_wait, &wait);
  48.303 +    return rc;
  48.304 +}
  48.305 +
  48.306 +static ssize_t evtchn_write(struct file *file, const char *buf,
  48.307 +                            size_t count, loff_t *ppos)
  48.308 +{
  48.309 +    int  rc, i;
  48.310 +    u16 *kbuf = (u16 *)get_free_page(GFP_KERNEL);
  48.311 +
  48.312 +    if ( kbuf == NULL )
  48.313 +        return -ENOMEM;
  48.314 +
  48.315 +    count &= ~1; /* even number of bytes */
  48.316 +
  48.317 +    if ( count == 0 )
  48.318 +    {
  48.319 +        rc = 0;
  48.320 +        goto out;
  48.321 +    }
  48.322 +
  48.323 +    if ( count > PAGE_SIZE )
  48.324 +        count = PAGE_SIZE;
  48.325 +
  48.326 +    if ( copy_from_user(kbuf, buf, count) != 0 )
  48.327 +    {
  48.328 +        rc = -EFAULT;
  48.329 +        goto out;
  48.330 +    }
  48.331 +
  48.332 +    for ( i = 0; i < (count/2); i++ )
  48.333 +        evtchn_clear_port(kbuf[i]);
  48.334 +
  48.335 +    rc = count;
  48.336 +
  48.337 + out:
  48.338 +    free_page((unsigned long)kbuf);
  48.339 +    return rc;
  48.340 +}
  48.341 +
  48.342 +static int evtchn_ioctl(struct inode *inode, struct file *file,
  48.343 +                        unsigned int cmd, unsigned long arg)
  48.344 +{
  48.345 +    if ( cmd != EVTCHN_RESET )
  48.346 +        return -EINVAL;
  48.347 +
  48.348 +    spin_lock_irq(&lock);
  48.349 +    __evtchn_reset_buffer_ring();
  48.350 +    spin_unlock_irq(&lock);   
  48.351 +
  48.352 +    return 0;
  48.353 +}
  48.354 +
  48.355 +static unsigned int evtchn_poll(struct file *file, poll_table *wait)
  48.356 +{
  48.357 +    unsigned int mask = POLLOUT | POLLWRNORM;
  48.358 +    poll_wait(file, &evtchn_wait, wait);
  48.359 +    if ( ring_cons != ring_prod )
  48.360 +        mask |= POLLIN | POLLRDNORM;
  48.361 +    if ( ring_overflow )
  48.362 +        mask = POLLERR;
  48.363 +    return mask;
  48.364 +}
  48.365 +
  48.366 +static int evtchn_fasync(int fd, struct file *filp, int on)
  48.367 +{
  48.368 +    return fasync_helper(fd, filp, on, &evtchn_async_queue);
  48.369 +}
  48.370 +
  48.371 +static int evtchn_open(struct inode *inode, struct file *filp)
  48.372 +{
  48.373 +    u16 *_ring;
  48.374 +
  48.375 +    if ( test_and_set_bit(0, &evtchn_dev_inuse) )
  48.376 +        return -EBUSY;
  48.377 +
  48.378 +    /* Allocate outside locked region so that we can use GFP_KERNEL. */
  48.379 +    if ( (_ring = (u16 *)get_free_page(GFP_KERNEL)) == NULL )
  48.380 +        return -ENOMEM;
  48.381 +
  48.382 +    spin_lock_irq(&lock);
  48.383 +    ring = _ring;
  48.384 +    __evtchn_reset_buffer_ring();
  48.385 +    spin_unlock_irq(&lock);
  48.386 +
  48.387 +    MOD_INC_USE_COUNT;
  48.388 +
  48.389 +    return 0;
  48.390 +}
  48.391 +
  48.392 +static int evtchn_release(struct inode *inode, struct file *filp)
  48.393 +{
  48.394 +    spin_lock_irq(&lock);
  48.395 +    if ( ring != NULL )
  48.396 +    {
  48.397 +        free_page((unsigned long)ring);
  48.398 +        ring = NULL;
  48.399 +    }
  48.400 +    spin_unlock_irq(&lock);
  48.401 +
  48.402 +    evtchn_dev_inuse = 0;
  48.403 +
  48.404 +    MOD_DEC_USE_COUNT;
  48.405 +
  48.406 +    return 0;
  48.407 +}
  48.408 +
  48.409 +static struct file_operations evtchn_fops = {
  48.410 +    owner:    THIS_MODULE,
  48.411 +    read:     evtchn_read,
  48.412 +    write:    evtchn_write,
  48.413 +    ioctl:    evtchn_ioctl,
  48.414 +    poll:     evtchn_poll,
  48.415 +    fasync:   evtchn_fasync,
  48.416 +    open:     evtchn_open,
  48.417 +    release:  evtchn_release
  48.418 +};
  48.419 +
  48.420 +static struct miscdevice evtchn_miscdev = {
  48.421 +    minor:    EVTCHN_MINOR,
  48.422 +    name:     "evtchn",
  48.423 +    fops:     &evtchn_fops
  48.424 +};
  48.425 +
  48.426 +static int __init init_module(void)
  48.427 +{
  48.428 +    devfs_handle_t symlink_handle;
  48.429 +    int            err, pos;
  48.430 +    char           link_dest[64];
  48.431 +
  48.432 +    /* (DEVFS) create '/dev/misc/evtchn'. */
  48.433 +    err = misc_register(&evtchn_miscdev);
  48.434 +    if ( err != 0 )
  48.435 +    {
  48.436 +        printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
  48.437 +        return err;
  48.438 +    }
  48.439 +
  48.440 +    /* (DEVFS) create directory '/dev/xen'. */
  48.441 +    xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
  48.442 +
  48.443 +    /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
  48.444 +    pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
  48.445 +                              &link_dest[3], 
  48.446 +                              sizeof(link_dest) - 3);
  48.447 +    if ( pos >= 0 )
  48.448 +        strncpy(&link_dest[pos], "../", 3);
  48.449 +
  48.450 +    /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
  48.451 +    (void)devfs_mk_symlink(xen_dev_dir, 
  48.452 +                           "evtchn", 
  48.453 +                           DEVFS_FL_DEFAULT, 
  48.454 +                           &link_dest[pos],
  48.455 +                           &symlink_handle, 
  48.456 +                           NULL);
  48.457 +
  48.458 +    /* (DEVFS) automatically destroy the symlink with its destination. */
  48.459 +    devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
  48.460 +
  48.461 +    err = request_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN),
  48.462 +                      evtchn_interrupt, 0, "evtchn", NULL);
  48.463 +    if ( err != 0 )
  48.464 +    {
  48.465 +        printk(KERN_ALERT "Could not allocate evtchn receive interrupt\n");
  48.466 +        return err;
  48.467 +    }
  48.468 +
  48.469 +    /* Kickstart servicing of notifications. */
  48.470 +    evtchn_interrupt(0, NULL, NULL);
  48.471 +
  48.472 +    printk("Event-channel driver installed.\n");
  48.473 +
  48.474 +    return 0;
  48.475 +}
  48.476 +
  48.477 +static void cleanup_module(void)
  48.478 +{
  48.479 +    free_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), NULL);
  48.480 +    misc_deregister(&evtchn_miscdev);
  48.481 +}
  48.482 +
  48.483 +module_init(init_module);
  48.484 +module_exit(cleanup_module);
    49.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/evtchn/xl_evtchn.c	Tue Mar 16 12:37:49 2004 +0000
    49.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    49.3 @@ -1,481 +0,0 @@
    49.4 -/******************************************************************************
    49.5 - * xl_evtchn.c
    49.6 - * 
    49.7 - * Xenolinux driver for receiving and demuxing event-channel signals.
    49.8 - * 
    49.9 - * Copyright (c) 2004, K A Fraser
   49.10 - */
   49.11 -
   49.12 -#include <linux/config.h>
   49.13 -#include <linux/module.h>
   49.14 -#include <linux/kernel.h>
   49.15 -#include <linux/sched.h>
   49.16 -#include <linux/slab.h>
   49.17 -#include <linux/string.h>
   49.18 -#include <linux/errno.h>
   49.19 -#include <linux/fs.h>
   49.20 -#include <linux/errno.h>
   49.21 -#include <linux/miscdevice.h>
   49.22 -#include <linux/major.h>
   49.23 -#include <linux/proc_fs.h>
   49.24 -#include <linux/devfs_fs_kernel.h>
   49.25 -#include <linux/stat.h>
   49.26 -#include <linux/poll.h>
   49.27 -#include <linux/irq.h>
   49.28 -#include <asm/evtchn.h>
   49.29 -
   49.30 -/* NB. This must be shared amongst drivers if more things go in /dev/xen */
   49.31 -static devfs_handle_t xen_dev_dir;
   49.32 -
   49.33 -/* Only one process may open /dev/xen/evtchn at any time. */
   49.34 -static unsigned long evtchn_dev_inuse;
   49.35 -
   49.36 -/* Notification ring, accessed via /dev/xen/evtchn. */
   49.37 -#define RING_SIZE     2048  /* 2048 16-bit entries */
   49.38 -#define RING_MASK(_i) ((_i)&(RING_SIZE-1))
   49.39 -static u16 *ring;
   49.40 -static unsigned int ring_cons, ring_prod, ring_overflow;
   49.41 -
   49.42 -/* Processes wait on this queue via /dev/xen/evtchn when ring is empty. */
   49.43 -static DECLARE_WAIT_QUEUE_HEAD(evtchn_wait);
   49.44 -static struct fasync_struct *evtchn_async_queue;
   49.45 -
   49.46 -static evtchn_receiver_t rx_fns[1024];
   49.47 -
   49.48 -static u32 pend_outstanding[32];
   49.49 -static u32 disc_outstanding[32];
   49.50 -
   49.51 -static spinlock_t lock;
   49.52 -
   49.53 -int evtchn_request_port(unsigned int port, evtchn_receiver_t rx_fn)
   49.54 -{
   49.55 -    unsigned long flags;
   49.56 -    int rc;
   49.57 -
   49.58 -    spin_lock_irqsave(&lock, flags);
   49.59 -
   49.60 -    if ( rx_fns[port] != NULL )
   49.61 -    {
   49.62 -        printk(KERN_ALERT "Event channel port %d already in use.\n", port);
   49.63 -        rc = -EINVAL;
   49.64 -    }
   49.65 -    else
   49.66 -    {
   49.67 -        rx_fns[port] = rx_fn;
   49.68 -        rc = 0;
   49.69 -    }
   49.70 -
   49.71 -    spin_unlock_irqrestore(&lock, flags);
   49.72 -
   49.73 -    return rc;
   49.74 -}
   49.75 -
   49.76 -int evtchn_free_port(unsigned int port)
   49.77 -{
   49.78 -    unsigned long flags;
   49.79 -    int rc;
   49.80 -
   49.81 -    spin_lock_irqsave(&lock, flags);
   49.82 -
   49.83 -    if ( rx_fns[port] == NULL )
   49.84 -    {
   49.85 -        printk(KERN_ALERT "Event channel port %d not in use.\n", port);
   49.86 -        rc = -EINVAL;
   49.87 -    }
   49.88 -    else
   49.89 -    {
   49.90 -        rx_fns[port] = NULL;
   49.91 -        rc = 0;
   49.92 -    }
   49.93 -
   49.94 -    spin_unlock_irqrestore(&lock, flags);
   49.95 -
   49.96 -    return rc;
   49.97 -}
   49.98 -
   49.99 -/*
  49.100 - * NB. Clearing port can race a notification from remote end. Caller must
  49.101 - * therefore recheck notification status on return to avoid missing events.
  49.102 - */
  49.103 -void evtchn_clear_port(unsigned int port)
  49.104 -{
  49.105 -    unsigned int p = port & PORTIDX_MASK;
  49.106 -    unsigned long flags;
  49.107 -
  49.108 -    spin_lock_irqsave(&lock, flags);
  49.109 -
  49.110 -    if ( unlikely(port & PORT_DISCONNECT) )
  49.111 -    {
  49.112 -        clear_bit(p, &disc_outstanding[0]);
  49.113 -        clear_bit(p, &HYPERVISOR_shared_info->event_channel_disc[0]);
  49.114 -    }
  49.115 -    else
  49.116 -    {
  49.117 -        clear_bit(p, &pend_outstanding[0]);
  49.118 -        clear_bit(p, &HYPERVISOR_shared_info->event_channel_pend[0]);
  49.119 -    }
  49.120 -
  49.121 -    spin_unlock_irqrestore(&lock, flags);
  49.122 -}
  49.123 -
  49.124 -static inline void process_bitmask(u32 *sel, 
  49.125 -                                   u32 *mask,
  49.126 -                                   u32 *outstanding,
  49.127 -                                   unsigned int port_subtype)
  49.128 -{
  49.129 -    unsigned long l1, l2;
  49.130 -    unsigned int  l1_idx, l2_idx, port;
  49.131 -
  49.132 -    l1 = xchg(sel, 0);
  49.133 -    while ( (l1_idx = ffs(l1)) != 0 )
  49.134 -    {
  49.135 -        l1_idx--;
  49.136 -        l1 &= ~(1 << l1_idx);
  49.137 -
  49.138 -        l2 = mask[l1_idx] & ~outstanding[l1_idx];
  49.139 -        outstanding[l1_idx] |= l2;
  49.140 -        while ( (l2_idx = ffs(l2)) != 0 )
  49.141 -        {
  49.142 -            l2_idx--;
  49.143 -            l2 &= ~(1 << l2_idx);
  49.144 -
  49.145 -            port = (l1_idx * 32) + l2_idx;
  49.146 -            if ( rx_fns[port] != NULL )
  49.147 -            {
  49.148 -                (*rx_fns[port])(port | port_subtype);
  49.149 -            }
  49.150 -            else if ( ring != NULL )
  49.151 -            {
  49.152 -                if ( (ring_prod - ring_cons) < RING_SIZE )
  49.153 -                {
  49.154 -                    ring[RING_MASK(ring_prod)] = (u16)(port | port_subtype);
  49.155 -                    if ( ring_cons == ring_prod++ )
  49.156 -                    {
  49.157 -                        wake_up_interruptible(&evtchn_wait);
  49.158 -                        kill_fasync(&evtchn_async_queue, SIGIO, POLL_IN);
  49.159 -                    }
  49.160 -                }
  49.161 -                else
  49.162 -                {
  49.163 -                    ring_overflow = 1;
  49.164 -                }
  49.165 -            }
  49.166 -        }
  49.167 -    }
  49.168 -}
  49.169 -
  49.170 -static void evtchn_interrupt(int irq, void *dev_id, struct pt_regs *regs)
  49.171 -{
  49.172 -    shared_info_t *si = HYPERVISOR_shared_info;
  49.173 -    unsigned long flags;
  49.174 -
  49.175 -    spin_lock_irqsave(&lock, flags);
  49.176 -
  49.177 -    process_bitmask(&si->event_channel_pend_sel, 
  49.178 -                    &si->event_channel_pend[0],
  49.179 -                    &pend_outstanding[0],
  49.180 -                    PORT_NORMAL);
  49.181 -        
  49.182 -    process_bitmask(&si->event_channel_disc_sel,
  49.183 -                    &si->event_channel_disc[0],
  49.184 -                    &disc_outstanding[0],
  49.185 -                    PORT_DISCONNECT);
  49.186 -        
  49.187 -    spin_unlock_irqrestore(&lock, flags);
  49.188 -}
  49.189 -
  49.190 -static void __evtchn_reset_buffer_ring(void)
  49.191 -{
  49.192 -    u32          m;
  49.193 -    unsigned int i, j;
  49.194 -
  49.195 -    /* Initialise the ring with currently outstanding notifications. */
  49.196 -    ring_cons = ring_prod = ring_overflow = 0;
  49.197 -
  49.198 -    for ( i = 0; i < 32; i++ )
  49.199 -    {
  49.200 -        m = pend_outstanding[i];
  49.201 -        while ( (j = ffs(m)) != 0 )
  49.202 -        {
  49.203 -            m &= ~(1 << --j);
  49.204 -            if ( rx_fns[(i * 32) + j] == NULL )
  49.205 -                ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_NORMAL);
  49.206 -        }
  49.207 -
  49.208 -        m = disc_outstanding[i];
  49.209 -        while ( (j = ffs(m)) != 0 )
  49.210 -        {
  49.211 -            m &= ~(1 << --j);
  49.212 -            if ( rx_fns[(i * 32) + j] == NULL )
  49.213 -                ring[ring_prod++] = (u16)(((i * 32) + j) | PORT_DISCONNECT);
  49.214 -        }
  49.215 -    }
  49.216 -}
  49.217 -
  49.218 -static ssize_t evtchn_read(struct file *file, char *buf,
  49.219 -                           size_t count, loff_t *ppos)
  49.220 -{
  49.221 -    int rc;
  49.222 -    unsigned int c, p, bytes1 = 0, bytes2 = 0;
  49.223 -    DECLARE_WAITQUEUE(wait, current);
  49.224 -
  49.225 -    add_wait_queue(&evtchn_wait, &wait);
  49.226 -
  49.227 -    count &= ~1; /* even number of bytes */
  49.228 -
  49.229 -    if ( count == 0 )
  49.230 -    {
  49.231 -        rc = 0;
  49.232 -        goto out;
  49.233 -    }
  49.234 -
  49.235 -    if ( count > PAGE_SIZE )
  49.236 -        count = PAGE_SIZE;
  49.237 -
  49.238 -    for ( ; ; )
  49.239 -    {
  49.240 -        set_current_state(TASK_INTERRUPTIBLE);
  49.241 -
  49.242 -        if ( (c = ring_cons) != (p = ring_prod) )
  49.243 -            break;
  49.244 -
  49.245 -        if ( ring_overflow )
  49.246 -        {
  49.247 -            rc = -EFBIG;
  49.248 -            goto out;
  49.249 -        }
  49.250 -
  49.251 -        if ( file->f_flags & O_NONBLOCK )
  49.252 -        {
  49.253 -            rc = -EAGAIN;
  49.254 -            goto out;
  49.255 -        }
  49.256 -
  49.257 -        if ( signal_pending(current) )
  49.258 -        {
  49.259 -            rc = -ERESTARTSYS;
  49.260 -            goto out;
  49.261 -        }
  49.262 -
  49.263 -        schedule();
  49.264 -    }
  49.265 -
  49.266 -    /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
  49.267 -    if ( ((c ^ p) & RING_SIZE) != 0 )
  49.268 -    {
  49.269 -        bytes1 = (RING_SIZE - RING_MASK(c)) * sizeof(u16);
  49.270 -        bytes2 = RING_MASK(p) * sizeof(u16);
  49.271 -    }
  49.272 -    else
  49.273 -    {
  49.274 -        bytes1 = (p - c) * sizeof(u16);
  49.275 -        bytes2 = 0;
  49.276 -    }
  49.277 -
  49.278 -    /* Truncate chunks according to caller's maximum byte count. */
  49.279 -    if ( bytes1 > count )
  49.280 -    {
  49.281 -        bytes1 = count;
  49.282 -        bytes2 = 0;
  49.283 -    }
  49.284 -    else if ( (bytes1 + bytes2) > count )
  49.285 -    {
  49.286 -        bytes2 = count - bytes1;
  49.287 -    }
  49.288 -
  49.289 -    if ( copy_to_user(buf, &ring[RING_MASK(c)], bytes1) ||
  49.290 -         ((bytes2 != 0) && copy_to_user(&buf[bytes1], &ring[0], bytes2)) )
  49.291 -    {
  49.292 -        rc = -EFAULT;
  49.293 -        goto out;
  49.294 -    }
  49.295 -
  49.296 -    ring_cons += (bytes1 + bytes2) / sizeof(u16);
  49.297 -
  49.298 -    rc = bytes1 + bytes2;
  49.299 -
  49.300 - out:
  49.301 -    __set_current_state(TASK_RUNNING);
  49.302 -    remove_wait_queue(&evtchn_wait, &wait);
  49.303 -    return rc;
  49.304 -}
  49.305 -
  49.306 -static ssize_t evtchn_write(struct file *file, const char *buf,
  49.307 -                            size_t count, loff_t *ppos)
  49.308 -{
  49.309 -    int  rc, i;
  49.310 -    u16 *kbuf = (u16 *)get_free_page(GFP_KERNEL);
  49.311 -
  49.312 -    if ( kbuf == NULL )
  49.313 -        return -ENOMEM;
  49.314 -
  49.315 -    count &= ~1; /* even number of bytes */
  49.316 -
  49.317 -    if ( count == 0 )
  49.318 -    {
  49.319 -        rc = 0;
  49.320 -        goto out;
  49.321 -    }
  49.322 -
  49.323 -    if ( count > PAGE_SIZE )
  49.324 -        count = PAGE_SIZE;
  49.325 -
  49.326 -    if ( copy_from_user(kbuf, buf, count) != 0 )
  49.327 -    {
  49.328 -        rc = -EFAULT;
  49.329 -        goto out;
  49.330 -    }
  49.331 -
  49.332 -    for ( i = 0; i < (count/2); i++ )
  49.333 -        evtchn_clear_port(kbuf[i]);
  49.334 -
  49.335 -    rc = count;
  49.336 -
  49.337 - out:
  49.338 -    free_page((unsigned long)kbuf);
  49.339 -    return rc;
  49.340 -}
  49.341 -
  49.342 -static int evtchn_ioctl(struct inode *inode, struct file *file,
  49.343 -                        unsigned int cmd, unsigned long arg)
  49.344 -{
  49.345 -    if ( cmd != EVTCHN_RESET )
  49.346 -        return -EINVAL;
  49.347 -
  49.348 -    spin_lock_irq(&lock);
  49.349 -    __evtchn_reset_buffer_ring();
  49.350 -    spin_unlock_irq(&lock);   
  49.351 -
  49.352 -    return 0;
  49.353 -}
  49.354 -
  49.355 -static unsigned int evtchn_poll(struct file *file, poll_table *wait)
  49.356 -{
  49.357 -    unsigned int mask = POLLOUT | POLLWRNORM;
  49.358 -    poll_wait(file, &evtchn_wait, wait);
  49.359 -    if ( ring_cons != ring_prod )
  49.360 -        mask |= POLLIN | POLLRDNORM;
  49.361 -    if ( ring_overflow )
  49.362 -        mask = POLLERR;
  49.363 -    return mask;
  49.364 -}
  49.365 -
  49.366 -static int evtchn_fasync(int fd, struct file *filp, int on)
  49.367 -{
  49.368 -    return fasync_helper(fd, filp, on, &evtchn_async_queue);
  49.369 -}
  49.370 -
  49.371 -static int evtchn_open(struct inode *inode, struct file *filp)
  49.372 -{
  49.373 -    u16 *_ring;
  49.374 -
  49.375 -    if ( test_and_set_bit(0, &evtchn_dev_inuse) )
  49.376 -        return -EBUSY;
  49.377 -
  49.378 -    /* Allocate outside locked region so that we can use GFP_KERNEL. */
  49.379 -    if ( (_ring = (u16 *)get_free_page(GFP_KERNEL)) == NULL )
  49.380 -        return -ENOMEM;
  49.381 -
  49.382 -    spin_lock_irq(&lock);
  49.383 -    ring = _ring;
  49.384 -    __evtchn_reset_buffer_ring();
  49.385 -    spin_unlock_irq(&lock);
  49.386 -
  49.387 -    MOD_INC_USE_COUNT;
  49.388 -
  49.389 -    return 0;
  49.390 -}
  49.391 -
  49.392 -static int evtchn_release(struct inode *inode, struct file *filp)
  49.393 -{
  49.394 -    spin_lock_irq(&lock);
  49.395 -    if ( ring != NULL )
  49.396 -    {
  49.397 -        free_page((unsigned long)ring);
  49.398 -        ring = NULL;
  49.399 -    }
  49.400 -    spin_unlock_irq(&lock);
  49.401 -
  49.402 -    evtchn_dev_inuse = 0;
  49.403 -
  49.404 -    MOD_DEC_USE_COUNT;
  49.405 -
  49.406 -    return 0;
  49.407 -}
  49.408 -
  49.409 -static struct file_operations evtchn_fops = {
  49.410 -    owner:    THIS_MODULE,
  49.411 -    read:     evtchn_read,
  49.412 -    write:    evtchn_write,
  49.413 -    ioctl:    evtchn_ioctl,
  49.414 -    poll:     evtchn_poll,
  49.415 -    fasync:   evtchn_fasync,
  49.416 -    open:     evtchn_open,
  49.417 -    release:  evtchn_release
  49.418 -};
  49.419 -
  49.420 -static struct miscdevice evtchn_miscdev = {
  49.421 -    minor:    EVTCHN_MINOR,
  49.422 -    name:     "evtchn",
  49.423 -    fops:     &evtchn_fops
  49.424 -};
  49.425 -
  49.426 -static int __init init_module(void)
  49.427 -{
  49.428 -    devfs_handle_t symlink_handle;
  49.429 -    int            err, pos;
  49.430 -    char           link_dest[64];
  49.431 -
  49.432 -    /* (DEVFS) create '/dev/misc/evtchn'. */
  49.433 -    err = misc_register(&evtchn_miscdev);
  49.434 -    if ( err != 0 )
  49.435 -    {
  49.436 -        printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
  49.437 -        return err;
  49.438 -    }
  49.439 -
  49.440 -    /* (DEVFS) create directory '/dev/xen'. */
  49.441 -    xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
  49.442 -
  49.443 -    /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
  49.444 -    pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
  49.445 -                              &link_dest[3], 
  49.446 -                              sizeof(link_dest) - 3);
  49.447 -    if ( pos >= 0 )
  49.448 -        strncpy(&link_dest[pos], "../", 3);
  49.449 -
  49.450 -    /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
  49.451 -    (void)devfs_mk_symlink(xen_dev_dir, 
  49.452 -                           "evtchn", 
  49.453 -                           DEVFS_FL_DEFAULT, 
  49.454 -                           &link_dest[pos],
  49.455 -                           &symlink_handle, 
  49.456 -                           NULL);
  49.457 -
  49.458 -    /* (DEVFS) automatically destroy the symlink with its destination. */
  49.459 -    devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
  49.460 -
  49.461 -    err = request_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN),
  49.462 -                      evtchn_interrupt, 0, "evtchn", NULL);
  49.463 -    if ( err != 0 )
  49.464 -    {
  49.465 -        printk(KERN_ALERT "Could not allocate evtchn receive interrupt\n");
  49.466 -        return err;
  49.467 -    }
  49.468 -
  49.469 -    /* Kickstart servicing of notifications. */
  49.470 -    evtchn_interrupt(0, NULL, NULL);
  49.471 -
  49.472 -    printk("Event-channel driver installed.\n");
  49.473 -
  49.474 -    return 0;
  49.475 -}
  49.476 -
  49.477 -static void cleanup_module(void)
  49.478 -{
  49.479 -    free_irq(HYPEREVENT_IRQ(_EVENT_EVTCHN), NULL);
  49.480 -    misc_deregister(&evtchn_miscdev);
  49.481 -}
  49.482 -
  49.483 -module_init(init_module);
  49.484 -module_exit(cleanup_module);
    50.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile	Tue Mar 16 12:37:49 2004 +0000
    50.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/Makefile	Wed Mar 17 13:35:01 2004 +0000
    50.3 @@ -1,3 +1,3 @@
    50.4 -O_TARGET := net.o
    50.5 +O_TARGET := drv.o
    50.6  obj-y := network.o
    50.7  include $(TOPDIR)/Rules.make
    51.1 --- a/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c	Tue Mar 16 12:37:49 2004 +0000
    51.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/network/network.c	Wed Mar 17 13:35:01 2004 +0000
    51.3 @@ -415,7 +415,7 @@ static void network_interrupt(int irq, v
    51.4  }
    51.5  
    51.6  
    51.7 -int network_close(struct net_device *dev)
    51.8 +static int network_close(struct net_device *dev)
    51.9  {
   51.10      struct net_private *np = dev->priv;
   51.11      netop_t netop;
    52.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    52.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/Makefile	Wed Mar 17 13:35:01 2004 +0000
    52.3 @@ -0,0 +1,3 @@
    52.4 +O_TARGET := drv.o
    52.5 +obj-y := vnetif.o
    52.6 +include $(TOPDIR)/Rules.make
    53.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    53.2 +++ b/xenolinux-2.4.25-sparse/arch/xeno/drivers/vnetif/vnetif.c	Wed Mar 17 13:35:01 2004 +0000
    53.3 @@ -0,0 +1,552 @@
    53.4 +/******************************************************************************
    53.5 + * vnetif.c
    53.6 + * 
    53.7 + * Virtual network driver for XenoLinux.
    53.8 + * 
    53.9 + * Copyright (c) 2002-2004, K A Fraser
   53.10 + */
   53.11 +
   53.12 +#include <linux/config.h>
   53.13 +#include <linux/module.h>
   53.14 +
   53.15 +#include <linux/kernel.h>
   53.16 +#include <linux/sched.h>
   53.17 +#include <linux/slab.h>
   53.18 +#include <linux/string.h>
   53.19 +#include <linux/errno.h>
   53.20 +
   53.21 +#include <linux/netdevice.h>
   53.22 +#include <linux/inetdevice.h>
   53.23 +#include <linux/etherdevice.h>
   53.24 +#include <linux/skbuff.h>
   53.25 +#include <linux/init.h>
   53.26 +
   53.27 +#include <asm/io.h>
   53.28 +#include <net/sock.h>
   53.29 +#include <net/pkt_sched.h>
   53.30 +
   53.31 +#define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
   53.32 +
   53.33 +static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
   53.34 +static void network_tx_buf_gc(struct net_device *dev);
   53.35 +static void network_alloc_rx_buffers(struct net_device *dev);
   53.36 +static void cleanup_module(void);
   53.37 +
   53.38 +static struct list_head dev_list;
   53.39 +
   53.40 +struct net_private
   53.41 +{
   53.42 +    struct list_head list;
   53.43 +    struct net_device *dev;
   53.44 +
   53.45 +    struct net_device_stats stats;
   53.46 +    NET_RING_IDX rx_resp_cons, tx_resp_cons;
   53.47 +    unsigned int net_ring_fixmap_idx, tx_full;
   53.48 +    net_ring_t  *net_ring;
   53.49 +    net_idx_t   *net_idx;
   53.50 +    spinlock_t   tx_lock;
   53.51 +    unsigned int idx; /* Domain-specific index of this VIF. */
   53.52 +
   53.53 +    unsigned int rx_bufs_to_notify;
   53.54 +
   53.55 +#define STATE_ACTIVE    0
   53.56 +#define STATE_SUSPENDED 1
   53.57 +#define STATE_CLOSED    2
   53.58 +    unsigned int state;
   53.59 +
   53.60 +    /*
   53.61 +     * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
   53.62 +     * array is an index into a chain of free entries.
   53.63 +     */
   53.64 +    struct sk_buff *tx_skbs[TX_RING_SIZE+1];
   53.65 +    struct sk_buff *rx_skbs[RX_RING_SIZE+1];
   53.66 +};
   53.67 +
   53.68 +/* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
   53.69 +#define ADD_ID_TO_FREELIST(_list, _id)             \
   53.70 +    (_list)[(_id)] = (_list)[0];                   \
   53.71 +    (_list)[0]     = (void *)(unsigned long)(_id);
   53.72 +#define GET_ID_FROM_FREELIST(_list)                \
   53.73 + ({ unsigned long _id = (unsigned long)(_list)[0]; \
   53.74 +    (_list)[0]  = (_list)[_id];                    \
   53.75 +    (unsigned short)_id; })
   53.76 +
   53.77 +
   53.78 +static void _dbg_network_int(struct net_device *dev)
   53.79 +{
   53.80 +    struct net_private *np = dev->priv;
   53.81 +
   53.82 +    if ( np->state == STATE_CLOSED )
   53.83 +        return;
   53.84 +    
   53.85 +    printk(KERN_ALERT "net: tx_full=%d, tx_resp_cons=0x%08x,"
   53.86 +           " tx_req_prod=0x%08x\nnet: tx_resp_prod=0x%08x,"
   53.87 +           " tx_event=0x%08x, state=%d\n",
   53.88 +           np->tx_full, np->tx_resp_cons, 
   53.89 +           np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod, 
   53.90 +           np->net_idx->tx_event,
   53.91 +           test_bit(__LINK_STATE_XOFF, &dev->state));
   53.92 +    printk(KERN_ALERT "net: rx_resp_cons=0x%08x,"
   53.93 +           " rx_req_prod=0x%08x\nnet: rx_resp_prod=0x%08x, rx_event=0x%08x\n",
   53.94 +           np->rx_resp_cons, np->net_idx->rx_req_prod,
   53.95 +           np->net_idx->rx_resp_prod, np->net_idx->rx_event);
   53.96 +}
   53.97 +
   53.98 +
   53.99 +static void dbg_network_int(int irq, void *unused, struct pt_regs *ptregs)
  53.100 +{
  53.101 +    struct list_head *ent;
  53.102 +    struct net_private *np;
  53.103 +    list_for_each ( ent, &dev_list )
  53.104 +    {
  53.105 +        np = list_entry(ent, struct net_private, list);
  53.106 +        _dbg_network_int(np->dev);
  53.107 +    }
  53.108 +}
  53.109 +
  53.110 +
  53.111 +static int network_open(struct net_device *dev)
  53.112 +{
  53.113 +    struct net_private *np = dev->priv;
  53.114 +    netop_t netop;
  53.115 +    int i, ret;
  53.116 +
  53.117 +    netop.cmd = NETOP_RESET_RINGS;
  53.118 +    netop.vif = np->idx;
  53.119 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  53.120 +    {
  53.121 +        printk(KERN_ALERT "Possible net trouble: couldn't reset ring idxs\n");
  53.122 +        return ret;
  53.123 +    }
  53.124 +
  53.125 +    netop.cmd = NETOP_GET_VIF_INFO;
  53.126 +    netop.vif = np->idx;
  53.127 +    if ( (ret = HYPERVISOR_net_io_op(&netop)) != 0 )
  53.128 +    {
  53.129 +        printk(KERN_ALERT "Couldn't get info for vif %d\n", np->idx);
  53.130 +        return ret;
  53.131 +    }
  53.132 +
  53.133 +    memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  53.134 +
  53.135 +    set_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx, 
  53.136 +               netop.u.get_vif_info.ring_mfn << PAGE_SHIFT);
  53.137 +    np->net_ring = (net_ring_t *)fix_to_virt(
  53.138 +        FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  53.139 +    np->net_idx  = &HYPERVISOR_shared_info->net_idx[np->idx];
  53.140 +
  53.141 +    np->rx_bufs_to_notify = 0;
  53.142 +    np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
  53.143 +    memset(&np->stats, 0, sizeof(np->stats));
  53.144 +    spin_lock_init(&np->tx_lock);
  53.145 +    memset(np->net_ring, 0, sizeof(*np->net_ring));
  53.146 +    memset(np->net_idx, 0, sizeof(*np->net_idx));
  53.147 +
  53.148 +    /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
  53.149 +    for ( i = 0; i <= TX_RING_SIZE; i++ )
  53.150 +        np->tx_skbs[i] = (void *)(i+1);
  53.151 +    for ( i = 0; i <= RX_RING_SIZE; i++ )
  53.152 +        np->rx_skbs[i] = (void *)(i+1);
  53.153 +
  53.154 +    wmb();
  53.155 +    np->state = STATE_ACTIVE;
  53.156 +
  53.157 +    network_alloc_rx_buffers(dev);
  53.158 +
  53.159 +    netif_start_queue(dev);
  53.160 +
  53.161 +    MOD_INC_USE_COUNT;
  53.162 +
  53.163 +    return 0;
  53.164 +}
  53.165 +
  53.166 +
  53.167 +static void network_tx_buf_gc(struct net_device *dev)
  53.168 +{
  53.169 +    NET_RING_IDX i, prod;
  53.170 +    unsigned short id;
  53.171 +    struct net_private *np = dev->priv;
  53.172 +    struct sk_buff *skb;
  53.173 +    tx_entry_t *tx_ring = np->net_ring->tx_ring;
  53.174 +
  53.175 +    do {
  53.176 +        prod = np->net_idx->tx_resp_prod;
  53.177 +
  53.178 +        for ( i = np->tx_resp_cons; i != prod; i++ )
  53.179 +        {
  53.180 +            id  = tx_ring[MASK_NET_TX_IDX(i)].resp.id;
  53.181 +            skb = np->tx_skbs[id];
  53.182 +            ADD_ID_TO_FREELIST(np->tx_skbs, id);
  53.183 +            dev_kfree_skb_any(skb);
  53.184 +        }
  53.185 +        
  53.186 +        np->tx_resp_cons = prod;
  53.187 +        
  53.188 +        /*
  53.189 +         * Set a new event, then check for race with update of tx_cons. Note
  53.190 +         * that it is essential to schedule a callback, no matter how few
  53.191 +         * buffers are pending. Even if there is space in the transmit ring,
  53.192 +         * higher layers may be blocked because too much data is outstanding:
  53.193 +         * in such cases notification from Xen is likely to be the only kick
  53.194 +         * that we'll get.
  53.195 +         */
  53.196 +        np->net_idx->tx_event = 
  53.197 +            prod + ((np->net_idx->tx_req_prod - prod) >> 1) + 1;
  53.198 +        mb();
  53.199 +    }
  53.200 +    while ( prod != np->net_idx->tx_resp_prod );
  53.201 +
  53.202 +    if ( np->tx_full && ((np->net_idx->tx_req_prod - prod) < TX_RING_SIZE) )
  53.203 +    {
  53.204 +        np->tx_full = 0;
  53.205 +        if ( np->state == STATE_ACTIVE )
  53.206 +            netif_wake_queue(dev);
  53.207 +    }
  53.208 +}
  53.209 +
  53.210 +
  53.211 +static inline pte_t *get_ppte(void *addr)
  53.212 +{
  53.213 +    pgd_t *pgd; pmd_t *pmd; pte_t *pte;
  53.214 +    pgd = pgd_offset_k(   (unsigned long)addr);
  53.215 +    pmd = pmd_offset(pgd, (unsigned long)addr);
  53.216 +    pte = pte_offset(pmd, (unsigned long)addr);
  53.217 +    return pte;
  53.218 +}
  53.219 +
  53.220 +
  53.221 +static void network_alloc_rx_buffers(struct net_device *dev)
  53.222 +{
  53.223 +    unsigned short id;
  53.224 +    struct net_private *np = dev->priv;
  53.225 +    struct sk_buff *skb;
  53.226 +    netop_t netop;
  53.227 +    NET_RING_IDX i = np->net_idx->rx_req_prod;
  53.228 +
  53.229 +    if ( unlikely((i - np->rx_resp_cons) == RX_RING_SIZE) || 
  53.230 +         unlikely(np->state != STATE_ACTIVE) )
  53.231 +        return;
  53.232 +
  53.233 +    do {
  53.234 +        skb = dev_alloc_skb(RX_BUF_SIZE);
  53.235 +        if ( unlikely(skb == NULL) )
  53.236 +            break;
  53.237 +
  53.238 +        skb->dev = dev;
  53.239 +
  53.240 +        if ( unlikely(((unsigned long)skb->head & (PAGE_SIZE-1)) != 0) )
  53.241 +            panic("alloc_skb needs to provide us page-aligned buffers.");
  53.242 +
  53.243 +        id = GET_ID_FROM_FREELIST(np->rx_skbs);
  53.244 +        np->rx_skbs[id] = skb;
  53.245 +
  53.246 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.id   = id;
  53.247 +        np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].req.addr = 
  53.248 +            virt_to_machine(get_ppte(skb->head));
  53.249 +
  53.250 +        np->rx_bufs_to_notify++;
  53.251 +    }
  53.252 +    while ( (++i - np->rx_resp_cons) != RX_RING_SIZE );
  53.253 +
  53.254 +    /*
  53.255 +     * We may have allocated buffers which have entries outstanding in the page
  53.256 +     * update queue -- make sure we flush those first!
  53.257 +     */
  53.258 +    flush_page_update_queue();
  53.259 +
  53.260 +    np->net_idx->rx_req_prod = i;
  53.261 +    np->net_idx->rx_event    = np->rx_resp_cons + 1;
  53.262 +        
  53.263 +    /* Batch Xen notifications. */
  53.264 +    if ( np->rx_bufs_to_notify > (RX_RING_SIZE/4) )
  53.265 +    {
  53.266 +        netop.cmd = NETOP_PUSH_BUFFERS;
  53.267 +        netop.vif = np->idx;
  53.268 +        (void)HYPERVISOR_net_io_op(&netop);
  53.269 +        np->rx_bufs_to_notify = 0;
  53.270 +    }
  53.271 +}
  53.272 +
  53.273 +
  53.274 +static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
  53.275 +{
  53.276 +    unsigned short id;
  53.277 +    struct net_private *np = (struct net_private *)dev->priv;
  53.278 +    tx_req_entry_t *tx;
  53.279 +    netop_t netop;
  53.280 +    NET_RING_IDX i;
  53.281 +
  53.282 +    if ( unlikely(np->tx_full) )
  53.283 +    {
  53.284 +        printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
  53.285 +        netif_stop_queue(dev);
  53.286 +        return -ENOBUFS;
  53.287 +    }
  53.288 +
  53.289 +    if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
  53.290 +                  PAGE_SIZE) )
  53.291 +    {
  53.292 +        struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
  53.293 +        if ( unlikely(new_skb == NULL) )
  53.294 +            return 1;
  53.295 +        skb_put(new_skb, skb->len);
  53.296 +        memcpy(new_skb->data, skb->data, skb->len);
  53.297 +        dev_kfree_skb(skb);
  53.298 +        skb = new_skb;
  53.299 +    }   
  53.300 +    
  53.301 +    spin_lock_irq(&np->tx_lock);
  53.302 +
  53.303 +    i = np->net_idx->tx_req_prod;
  53.304 +
  53.305 +    id = GET_ID_FROM_FREELIST(np->tx_skbs);
  53.306 +    np->tx_skbs[id] = skb;
  53.307 +
  53.308 +    tx = &np->net_ring->tx_ring[MASK_NET_TX_IDX(i)].req;
  53.309 +
  53.310 +    tx->id   = id;
  53.311 +    tx->addr = phys_to_machine(virt_to_phys(skb->data));
  53.312 +    tx->size = skb->len;
  53.313 +
  53.314 +    wmb();
  53.315 +    np->net_idx->tx_req_prod = i + 1;
  53.316 +
  53.317 +    network_tx_buf_gc(dev);
  53.318 +
  53.319 +    if ( (i - np->tx_resp_cons) == (TX_RING_SIZE - 1) )
  53.320 +    {
  53.321 +        np->tx_full = 1;
  53.322 +        netif_stop_queue(dev);
  53.323 +    }
  53.324 +
  53.325 +    spin_unlock_irq(&np->tx_lock);
  53.326 +
  53.327 +    np->stats.tx_bytes += skb->len;
  53.328 +    np->stats.tx_packets++;
  53.329 +
  53.330 +    /* Only notify Xen if there are no outstanding responses. */
  53.331 +    mb();
  53.332 +    if ( np->net_idx->tx_resp_prod == i )
  53.333 +    {
  53.334 +        netop.cmd = NETOP_PUSH_BUFFERS;
  53.335 +        netop.vif = np->idx;
  53.336 +        (void)HYPERVISOR_net_io_op(&netop);
  53.337 +    }
  53.338 +
  53.339 +    return 0;
  53.340 +}
  53.341 +
  53.342 +
  53.343 +static inline void _network_interrupt(struct net_device *dev)
  53.344 +{
  53.345 +    struct net_private *np = dev->priv;
  53.346 +    unsigned long flags;
  53.347 +    struct sk_buff *skb;
  53.348 +    rx_resp_entry_t *rx;
  53.349 +    NET_RING_IDX i;
  53.350 +
  53.351 +    if ( unlikely(np->state == STATE_CLOSED) )
  53.352 +        return;
  53.353 +    
  53.354 +    spin_lock_irqsave(&np->tx_lock, flags);
  53.355 +    network_tx_buf_gc(dev);
  53.356 +    spin_unlock_irqrestore(&np->tx_lock, flags);
  53.357 +
  53.358 + again:
  53.359 +    for ( i = np->rx_resp_cons; i != np->net_idx->rx_resp_prod; i++ )
  53.360 +    {
  53.361 +        rx = &np->net_ring->rx_ring[MASK_NET_RX_IDX(i)].resp;
  53.362 +
  53.363 +        skb = np->rx_skbs[rx->id];
  53.364 +        ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
  53.365 +
  53.366 +        if ( unlikely(rx->status != RING_STATUS_OK) )
  53.367 +        {
  53.368 +            /* Gate this error. We get a (valid) slew of them on suspend. */
  53.369 +            if ( np->state == STATE_ACTIVE )
  53.370 +                printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
  53.371 +            dev_kfree_skb_any(skb);
  53.372 +            continue;
  53.373 +        }
  53.374 +
  53.375 +        /*
  53.376 +         * Set up shinfo -- from alloc_skb This was particularily nasty:  the
  53.377 +         * shared info is hidden at the back of the data area (presumably so it
  53.378 +         * can be shared), but on page flip it gets very spunked.
  53.379 +         */
  53.380 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  53.381 +        skb_shinfo(skb)->nr_frags = 0;
  53.382 +        skb_shinfo(skb)->frag_list = NULL;
  53.383 +                                
  53.384 +        phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
  53.385 +            (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
  53.386 +
  53.387 +        skb->data = skb->tail = skb->head + rx->offset;
  53.388 +        skb_put(skb, rx->size);
  53.389 +        skb->protocol = eth_type_trans(skb, dev);
  53.390 +
  53.391 +        np->stats.rx_packets++;
  53.392 +
  53.393 +        np->stats.rx_bytes += rx->size;
  53.394 +        netif_rx(skb);
  53.395 +        dev->last_rx = jiffies;
  53.396 +    }
  53.397 +
  53.398 +    np->rx_resp_cons = i;
  53.399 +
  53.400 +    network_alloc_rx_buffers(dev);
  53.401 +    
  53.402 +    /* Deal with hypervisor racing our resetting of rx_event. */
  53.403 +    mb();
  53.404 +    if ( np->net_idx->rx_resp_prod != i )
  53.405 +        goto again;
  53.406 +}
  53.407 +
  53.408 +
  53.409 +static void network_interrupt(int irq, void *unused, struct pt_regs *ptregs)
  53.410 +{
  53.411 +    struct list_head *ent;
  53.412 +    struct net_private *np;
  53.413 +    list_for_each ( ent, &dev_list )
  53.414 +    {
  53.415 +        np = list_entry(ent, struct net_private, list);
  53.416 +        _network_interrupt(np->dev);
  53.417 +    }
  53.418 +}
  53.419 +
  53.420 +
  53.421 +static int network_close(struct net_device *dev)
  53.422 +{
  53.423 +    struct net_private *np = dev->priv;
  53.424 +    netop_t netop;
  53.425 +
  53.426 +    np->state = STATE_SUSPENDED;
  53.427 +    wmb();
  53.428 +
  53.429 +    netif_stop_queue(np->dev);
  53.430 +
  53.431 +    netop.cmd = NETOP_FLUSH_BUFFERS;
  53.432 +    netop.vif = np->idx;
  53.433 +    (void)HYPERVISOR_net_io_op(&netop);
  53.434 +
  53.435 +    while ( (np->rx_resp_cons != np->net_idx->rx_req_prod) ||
  53.436 +            (np->tx_resp_cons != np->net_idx->tx_req_prod) )
  53.437 +    {
  53.438 +        barrier();
  53.439 +        current->state = TASK_INTERRUPTIBLE;
  53.440 +        schedule_timeout(1);
  53.441 +    }
  53.442 +
  53.443 +    wmb();
  53.444 +    np->state = STATE_CLOSED;
  53.445 +    wmb();
  53.446 +
  53.447 +    /* Now no longer safe to take interrupts for this device. */
  53.448 +    clear_fixmap(FIX_NETRING0_BASE + np->net_ring_fixmap_idx);
  53.449 +
  53.450 +    MOD_DEC_USE_COUNT;
  53.451 +
  53.452 +    return 0;
  53.453 +}
  53.454 +
  53.455 +
  53.456 +static struct net_device_stats *network_get_stats(struct net_device *dev)
  53.457 +{
  53.458 +    struct net_private *np = (struct net_private *)dev->priv;
  53.459 +    return &np->stats;
  53.460 +}
  53.461 +
  53.462 +
  53.463 +static int __init init_module(void)
  53.464 +{
  53.465 +#if 0
  53.466 +    int i, fixmap_idx=-1, err;
  53.467 +    struct net_device *dev;
  53.468 +    struct net_private *np;
  53.469 +    netop_t netop;
  53.470 +
  53.471 +    INIT_LIST_HEAD(&dev_list);
  53.472 +
  53.473 +    err = request_irq(HYPEREVENT_IRQ(_EVENT_NET), network_interrupt, 
  53.474 +                      SA_SAMPLE_RANDOM, "network", NULL);
  53.475 +    if ( err )
  53.476 +    {
  53.477 +        printk(KERN_WARNING "Could not allocate network interrupt\n");
  53.478 +        goto fail;
  53.479 +    }
  53.480 +    
  53.481 +    err = request_irq(HYPEREVENT_IRQ(_EVENT_DEBUG), dbg_network_int, 
  53.482 +                      SA_SHIRQ, "net_dbg", &dbg_network_int);
  53.483 +    if ( err )
  53.484 +        printk(KERN_WARNING "Non-fatal error -- no debug interrupt\n");
  53.485 +
  53.486 +    for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
  53.487 +    {
  53.488 +        /* If the VIF is invalid then the query hypercall will fail. */
  53.489 +        netop.cmd = NETOP_GET_VIF_INFO;
  53.490 +        netop.vif = i;
  53.491 +        if ( HYPERVISOR_net_io_op(&netop) != 0 )
  53.492 +            continue;
  53.493 +
  53.494 +        /* We actually only support up to 4 vifs right now. */
  53.495 +        if ( ++fixmap_idx == 4 )
  53.496 +            break;
  53.497 +
  53.498 +        dev = alloc_etherdev(sizeof(struct net_private));
  53.499 +        if ( dev == NULL )
  53.500 +        {
  53.501 +            err = -ENOMEM;
  53.502 +            goto fail;
  53.503 +        }
  53.504 +
  53.505 +        np = dev->priv;
  53.506 +        np->state               = STATE_CLOSED;
  53.507 +        np->net_ring_fixmap_idx = fixmap_idx;
  53.508 +        np->idx                 = i;
  53.509 +
  53.510 +        SET_MODULE_OWNER(dev);
  53.511 +        dev->open            = network_open;
  53.512 +        dev->hard_start_xmit = network_start_xmit;
  53.513 +        dev->stop            = network_close;
  53.514 +        dev->get_stats       = network_get_stats;
  53.515 +
  53.516 +        memcpy(dev->dev_addr, netop.u.get_vif_info.vmac, ETH_ALEN);
  53.517 +
  53.518 +        if ( (err = register_netdev(dev)) != 0 )
  53.519 +        {
  53.520 +            kfree(dev);
  53.521 +            goto fail;
  53.522 +        }
  53.523 +
  53.524 +        np->dev = dev;
  53.525 +        list_add(&np->list, &dev_list);
  53.526 +    }
  53.527 +
  53.528 +    return 0;
  53.529 +
  53.530 + fail:
  53.531 +    cleanup_module();
  53.532 +    return err;
  53.533 +#endif
  53.534 +    return 0;
  53.535 +}
  53.536 +
  53.537 +
  53.538 +static void cleanup_module(void)
  53.539 +{
  53.540 +    struct net_private *np;
  53.541 +    struct net_device *dev;
  53.542 +
  53.543 +    while ( !list_empty(&dev_list) )
  53.544 +    {
  53.545 +        np = list_entry(dev_list.next, struct net_private, list);
  53.546 +        list_del(&np->list);
  53.547 +        dev = np->dev;
  53.548 +        unregister_netdev(dev);
  53.549 +        kfree(dev);
  53.550 +    }
  53.551 +}
  53.552 +
  53.553 +
  53.554 +module_init(init_module);
  53.555 +module_exit(cleanup_module);