ia64/xen-unstable

changeset 4275:989b998fd8ca

bitkeeper revision 1.1236.1.117 (4241709cmfgF-94U74o-PmIJUmoksQ)

Fix iopl/iobitmap interfaces. Control tools can set per-domain
access limits via DOM0_IOPL_PERMISSION/DOM0_IOPORT_PERMISSION.
Guests can set current permissions per VCPU via physdev ops.
These will always succeed -- checking against admin-set limits is
done at access time.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Mar 23 13:35:24 2005 +0000 (2005-03-23)
parents bc69e161b5fe
children 5c8a42bc39b3
files .rootkeys linux-2.4.29-xen-sparse/arch/xen/kernel/ioport.c linux-2.4.29-xen-sparse/arch/xen/kernel/process.c linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c linux-2.4.29-xen-sparse/mkbuildtree linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c xen/arch/x86/dom0_ops.c xen/arch/x86/domain.c xen/arch/x86/setup.c xen/arch/x86/traps.c xen/common/domain.c xen/common/physdev.c xen/include/asm-x86/domain.h xen/include/asm-x86/processor.h xen/include/public/dom0_ops.h xen/include/public/physdev.h
line diff
     1.1 --- a/.rootkeys	Wed Mar 23 09:08:07 2005 +0000
     1.2 +++ b/.rootkeys	Wed Mar 23 13:35:24 2005 +0000
     1.3 @@ -145,6 +145,7 @@ 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g linux-2.4
     1.4  3e5a4e65_hqfuxtGG8IUy6wRM86Ecg linux-2.4.29-xen-sparse/arch/xen/kernel/entry.S
     1.5  3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg linux-2.4.29-xen-sparse/arch/xen/kernel/head.S
     1.6  3e5a4e65RMGcuA-HCn3-wNx3fFQwdg linux-2.4.29-xen-sparse/arch/xen/kernel/i386_ksyms.c
     1.7 +4241709bNBs1q4Ss32YW0CyFVOGhEg linux-2.4.29-xen-sparse/arch/xen/kernel/ioport.c
     1.8  3e5a4e653U6cELGv528IxOLHvCq8iA linux-2.4.29-xen-sparse/arch/xen/kernel/irq.c
     1.9  3e5a4e65muT6SU3ck47IP87Q7Ti5hA linux-2.4.29-xen-sparse/arch/xen/kernel/ldt.c
    1.10  4051db95N9N99FjsRwi49YKUNHWI8A linux-2.4.29-xen-sparse/arch/xen/kernel/pci-pc.c
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/ioport.c	Wed Mar 23 13:35:24 2005 +0000
     2.3 @@ -0,0 +1,43 @@
     2.4 +#include <linux/sched.h>
     2.5 +#include <linux/kernel.h>
     2.6 +#include <linux/errno.h>
     2.7 +#include <linux/types.h>
     2.8 +#include <linux/ioport.h>
     2.9 +#include <linux/mm.h>
    2.10 +#include <linux/smp.h>
    2.11 +#include <linux/smp_lock.h>
    2.12 +#include <linux/stddef.h>
    2.13 +#include <linux/slab.h>
    2.14 +#include <asm-xen/xen-public/physdev.h>
    2.15 +
    2.16 +asmlinkage long sys_iopl(unsigned int new_io_pl)
    2.17 +{
    2.18 +    unsigned int old_io_pl = current->thread.io_pl;
    2.19 +    physdev_op_t op;
    2.20 +
    2.21 +    if (new_io_pl > 3)
    2.22 +        return -EINVAL;
    2.23 +
    2.24 +	/* Need "raw I/O" privileges for direct port access. */
    2.25 +    if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
    2.26 +        return -EPERM;
    2.27 +
    2.28 +	/* Maintain OS privileges even if user attempts to relinquish them. */
    2.29 +    if (new_io_pl == 0)
    2.30 +        new_io_pl = 1;
    2.31 +
    2.32 +	/* Change our version of the privilege levels. */
    2.33 +    current->thread.io_pl = new_io_pl;
    2.34 +
    2.35 +    /* Force the change at ring 0. */
    2.36 +    op.cmd             = PHYSDEVOP_SET_IOPL;
    2.37 +    op.u.set_iopl.iopl = new_io_pl;
    2.38 +    HYPERVISOR_physdev_op(&op);
    2.39 +
    2.40 +    return 0;
    2.41 +}
    2.42 +
    2.43 +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
    2.44 +{
    2.45 +    return turn_on ? sys_iopl(3) : 0;
    2.46 +}
     3.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c	Wed Mar 23 09:08:07 2005 +0000
     3.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c	Wed Mar 23 13:35:24 2005 +0000
     3.3 @@ -44,7 +44,7 @@
     3.4  #include <asm/desc.h>
     3.5  #include <asm/mmu_context.h>
     3.6  #include <asm/multicall.h>
     3.7 -#include <asm-xen/xen-public/dom0_ops.h>
     3.8 +#include <asm-xen/xen-public/physdev.h>
     3.9  
    3.10  #include <linux/irq.h>
    3.11  
    3.12 @@ -304,6 +304,7 @@ void dump_thread(struct pt_regs * regs, 
    3.13  void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
    3.14  {
    3.15      struct thread_struct *next = &next_p->thread;
    3.16 +    physdev_op_t op;
    3.17  
    3.18      __cli();
    3.19  
    3.20 @@ -335,14 +336,12 @@ void fastcall __switch_to(struct task_st
    3.21      }
    3.22  
    3.23      queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0);
    3.24 -    if ( xen_start_info.flags & SIF_PRIVILEGED ) 
    3.25 +
    3.26 +    if ( prev_p->thread.io_pl != next->io_pl ) 
    3.27      {
    3.28 -        dom0_op_t op;
    3.29 -        op.cmd           = DOM0_IOPL;
    3.30 -        op.u.iopl.domain = DOMID_SELF;
    3.31 -        op.u.iopl.iopl   = next->io_pl;
    3.32 -        op.interface_version = DOM0_INTERFACE_VERSION;
    3.33 -        queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op);
    3.34 +        op.cmd             = PHYSDEVOP_SET_IOPL;
    3.35 +	op.u.set_iopl.iopl = next->io_pl;
    3.36 +        queue_multicall1(__HYPERVISOR_physdev_op, (unsigned long)&op);
    3.37      }
    3.38  
    3.39      /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
     4.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c	Wed Mar 23 09:08:07 2005 +0000
     4.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c	Wed Mar 23 13:35:24 2005 +0000
     4.3 @@ -48,7 +48,7 @@ static int errno;
     4.4  #include <asm/mmu_context.h>
     4.5  #include <asm/ctrl_if.h>
     4.6  #include <asm/hypervisor.h>
     4.7 -#include <asm-xen/xen-public/dom0_ops.h>
     4.8 +#include <asm-xen/xen-public/physdev.h>
     4.9  #include <linux/netdevice.h>
    4.10  #include <linux/rtnetlink.h>
    4.11  #include <linux/tqueue.h>
    4.12 @@ -206,6 +206,7 @@ void __init setup_arch(char **cmdline_p)
    4.13      unsigned long bootmap_size, start_pfn, lmax_low_pfn;
    4.14      int mem_param;  /* user specified memory size in pages */
    4.15      int boot_pfn;   /* low pages available for bootmem */
    4.16 +    physdev_op_t op;
    4.17  
    4.18      extern void hypervisor_callback(void);
    4.19      extern void failsafe_callback(void);
    4.20 @@ -416,17 +417,9 @@ void __init setup_arch(char **cmdline_p)
    4.21      HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list =
    4.22  	virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT;
    4.23  
    4.24 -    /* If we are a privileged guest OS then we should request IO privileges. */
    4.25 -    if ( xen_start_info.flags & SIF_PRIVILEGED ) 
    4.26 -    {
    4.27 -        dom0_op_t op;
    4.28 -        op.cmd           = DOM0_IOPL;
    4.29 -        op.u.iopl.domain = DOMID_SELF;
    4.30 -        op.u.iopl.iopl   = 1;
    4.31 -        if( HYPERVISOR_dom0_op(&op) != 0 )
    4.32 -            panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED");
    4.33 -        current->thread.io_pl = 1;
    4.34 -    }
    4.35 +    op.cmd             = PHYSDEVOP_SET_IOPL;
    4.36 +    op.u.set_iopl.iopl = current->thread.io_pl = 1;
    4.37 +    HYPERVISOR_physdev_op(&op);
    4.38  
    4.39      if (xen_start_info.flags & SIF_INITDOMAIN )
    4.40      {
     5.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree	Wed Mar 23 09:08:07 2005 +0000
     5.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree	Wed Mar 23 13:35:24 2005 +0000
     5.3 @@ -232,7 +232,6 @@ ln -sf ../../../${LINUX_26}/arch/xen/ker
     5.4  ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c
     5.5  ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c
     5.6  ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c
     5.7 -ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/ioport.c
     5.8  ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c
     5.9  
    5.10  cd ${AD}/arch/xen/lib
     6.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c	Wed Mar 23 09:08:07 2005 +0000
     6.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c	Wed Mar 23 13:35:24 2005 +0000
     6.3 @@ -1,19 +1,110 @@
     6.4 +/*
     6.5 + *	linux/arch/i386/kernel/ioport.c
     6.6 + *
     6.7 + * This contains the io-permission bitmap code - written by obz, with changes
     6.8 + * by Linus.
     6.9 + */
    6.10 +
    6.11  #include <linux/sched.h>
    6.12  #include <linux/kernel.h>
    6.13  #include <linux/errno.h>
    6.14  #include <linux/types.h>
    6.15  #include <linux/ioport.h>
    6.16 -#include <linux/mm.h>
    6.17  #include <linux/smp.h>
    6.18  #include <linux/smp_lock.h>
    6.19  #include <linux/stddef.h>
    6.20  #include <linux/slab.h>
    6.21 -#include <asm-xen/xen-public/dom0_ops.h>
    6.22 +#include <linux/thread_info.h>
    6.23 +#include <asm-xen/xen-public/physdev.h>
    6.24 +
    6.25 +/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
    6.26 +static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
    6.27 +{
    6.28 +	unsigned long mask;
    6.29 +	unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG);
    6.30 +	unsigned int low_index = base & (BITS_PER_LONG-1);
    6.31 +	int length = low_index + extent;
    6.32 +
    6.33 +	if (low_index != 0) {
    6.34 +		mask = (~0UL << low_index);
    6.35 +		if (length < BITS_PER_LONG)
    6.36 +			mask &= ~(~0UL << length);
    6.37 +		if (new_value)
    6.38 +			*bitmap_base++ |= mask;
    6.39 +		else
    6.40 +			*bitmap_base++ &= ~mask;
    6.41 +		length -= BITS_PER_LONG;
    6.42 +	}
    6.43 +
    6.44 +	mask = (new_value ? ~0UL : 0UL);
    6.45 +	while (length >= BITS_PER_LONG) {
    6.46 +		*bitmap_base++ = mask;
    6.47 +		length -= BITS_PER_LONG;
    6.48 +	}
    6.49 +
    6.50 +	if (length > 0) {
    6.51 +		mask = ~(~0UL << length);
    6.52 +		if (new_value)
    6.53 +			*bitmap_base++ |= mask;
    6.54 +		else
    6.55 +			*bitmap_base++ &= ~mask;
    6.56 +	}
    6.57 +}
    6.58 +
    6.59 +
    6.60 +/*
    6.61 + * this changes the io permissions bitmap in the current task.
    6.62 + */
    6.63 +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
    6.64 +{
    6.65 +	struct thread_struct * t = &current->thread;
    6.66 +	unsigned long *bitmap;
    6.67 +	physdev_op_t op;
    6.68 +
    6.69 +	if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
    6.70 +		return -EINVAL;
    6.71 +	if (turn_on && !capable(CAP_SYS_RAWIO))
    6.72 +		return -EPERM;
    6.73 +
    6.74 +	/*
    6.75 +	 * If it's the first ioperm() call in this thread's lifetime, set the
    6.76 +	 * IO bitmap up. ioperm() is much less timing critical than clone(),
    6.77 +	 * this is why we delay this operation until now:
    6.78 +	 */
    6.79 +	if (!t->io_bitmap_ptr) {
    6.80 +		bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
    6.81 +		if (!bitmap)
    6.82 +			return -ENOMEM;
    6.83 +
    6.84 +		memset(bitmap, 0xff, IO_BITMAP_BYTES);
    6.85 +		t->io_bitmap_ptr = bitmap;
    6.86 +
    6.87 +		op.cmd = PHYSDEVOP_SET_IOBITMAP;
    6.88 +		op.u.set_iobitmap.bitmap   = (unsigned long)bitmap;
    6.89 +		op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS;
    6.90 +		HYPERVISOR_physdev_op(&op);
    6.91 +	}
    6.92 +
    6.93 +	set_bitmap(t->io_bitmap_ptr, from, num, !turn_on);
    6.94 +
    6.95 +	return 0;
    6.96 +}
    6.97 +
    6.98 +/*
    6.99 + * sys_iopl has to be used when you want to access the IO ports
   6.100 + * beyond the 0x3ff range: to get the full 65536 ports bitmapped
   6.101 + * you'd need 8kB of bitmaps/process, which is a bit excessive.
   6.102 + *
   6.103 + * Here we just change the eflags value on the stack: we allow
   6.104 + * only the super-user to do it. This depends on the stack-layout
   6.105 + * on system-call entry - see also fork() and the signal handling
   6.106 + * code.
   6.107 + */
   6.108  
   6.109  asmlinkage long sys_iopl(unsigned int new_io_pl)
   6.110  {
   6.111  	unsigned int old_io_pl = current->thread.io_pl;
   6.112 -	dom0_op_t op;
   6.113 +	physdev_op_t op;
   6.114  
   6.115  	if (new_io_pl > 3)
   6.116  		return -EINVAL;
   6.117 @@ -22,9 +113,6 @@ asmlinkage long sys_iopl(unsigned int ne
   6.118  	if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO))
   6.119  		return -EPERM;
   6.120  
   6.121 -	if (!(xen_start_info.flags & SIF_PRIVILEGED))
   6.122 -		return -EPERM;
   6.123 -
   6.124  	/* Maintain OS privileges even if user attempts to relinquish them. */
   6.125  	if (new_io_pl == 0)
   6.126  		new_io_pl = 1;
   6.127 @@ -33,19 +121,9 @@ asmlinkage long sys_iopl(unsigned int ne
   6.128  	current->thread.io_pl = new_io_pl;
   6.129  
   6.130  	/* Force the change at ring 0. */
   6.131 -	op.cmd           = DOM0_IOPL;
   6.132 -	op.u.iopl.domain = DOMID_SELF;
   6.133 -	op.u.iopl.iopl   = new_io_pl;
   6.134 -	HYPERVISOR_dom0_op(&op);
   6.135 +	op.cmd             = PHYSDEVOP_SET_IOPL;
   6.136 +	op.u.set_iopl.iopl = new_io_pl;
   6.137 +	HYPERVISOR_physdev_op(&op);
   6.138  
   6.139  	return 0;
   6.140  }
   6.141 -
   6.142 -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
   6.143 -{
   6.144 -#if 0
   6.145 -	printk(KERN_INFO "ioperm not fully supported - %s\n",
   6.146 -		turn_on ? "set iopl to 3" : "ignore resource release");
   6.147 -#endif
   6.148 -	return turn_on ? sys_iopl(3) : 0;
   6.149 -}
     7.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c	Wed Mar 23 09:08:07 2005 +0000
     7.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c	Wed Mar 23 13:35:24 2005 +0000
     7.3 @@ -47,7 +47,7 @@
     7.4  #include <asm/irq.h>
     7.5  #include <asm/desc.h>
     7.6  #include <asm-xen/multicall.h>
     7.7 -#include <asm-xen/xen-public/dom0_ops.h>
     7.8 +#include <asm-xen/xen-public/physdev.h>
     7.9  #ifdef CONFIG_MATH_EMULATION
    7.10  #include <asm/math_emu.h>
    7.11  #endif
    7.12 @@ -228,20 +228,11 @@ void exit_thread(void)
    7.13  
    7.14  	/* The process may have allocated an io port bitmap... nuke it. */
    7.15  	if (unlikely(NULL != t->io_bitmap_ptr)) {
    7.16 -		int cpu = get_cpu();
    7.17 -		struct tss_struct *tss = &per_cpu(init_tss, cpu);
    7.18 -
    7.19 +		physdev_op_t op = { 0 };
    7.20 +		op.cmd = PHYSDEVOP_SET_IOBITMAP;
    7.21 +		HYPERVISOR_physdev_op(&op);
    7.22  		kfree(t->io_bitmap_ptr);
    7.23  		t->io_bitmap_ptr = NULL;
    7.24 -		/*
    7.25 -		 * Careful, clear this in the TSS too:
    7.26 -		 */
    7.27 -		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
    7.28 -		t->io_bitmap_max = 0;
    7.29 -		tss->io_bitmap_owner = NULL;
    7.30 -		tss->io_bitmap_max = 0;
    7.31 -		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
    7.32 -		put_cpu();
    7.33  	}
    7.34  }
    7.35  
    7.36 @@ -412,37 +403,6 @@ int dump_task_regs(struct task_struct *t
    7.37  	return 1;
    7.38  }
    7.39  
    7.40 -static inline void
    7.41 -handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss)
    7.42 -{
    7.43 -	if (!next->io_bitmap_ptr) {
    7.44 -		/*
    7.45 -		 * Disable the bitmap via an invalid offset. We still cache
    7.46 -		 * the previous bitmap owner and the IO bitmap contents:
    7.47 -		 */
    7.48 -		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
    7.49 -		return;
    7.50 -	}
    7.51 -	if (likely(next == tss->io_bitmap_owner)) {
    7.52 -		/*
    7.53 -		 * Previous owner of the bitmap (hence the bitmap content)
    7.54 -		 * matches the next task, we dont have to do anything but
    7.55 -		 * to set a valid offset in the TSS:
    7.56 -		 */
    7.57 -		tss->io_bitmap_base = IO_BITMAP_OFFSET;
    7.58 -		return;
    7.59 -	}
    7.60 -	/*
    7.61 -	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
    7.62 -	 * and we let the task to get a GPF in case an I/O instruction
    7.63 -	 * is performed.  The handler of the GPF will verify that the
    7.64 -	 * faulting task has a valid I/O bitmap and, it true, does the
    7.65 -	 * real copy and restart the instruction.  This will save us
    7.66 -	 * redundant copies when the currently switched task does not
    7.67 -	 * perform any I/O during its timeslice.
    7.68 -	 */
    7.69 -	tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
    7.70 -}
    7.71  /*
    7.72   * This special macro can be used to load a debugging register
    7.73   */
    7.74 @@ -483,7 +443,7 @@ struct task_struct fastcall * __switch_t
    7.75  				 *next = &next_p->thread;
    7.76  	int cpu = smp_processor_id();
    7.77  	struct tss_struct *tss = &per_cpu(init_tss, cpu);
    7.78 -	dom0_op_t op;
    7.79 +	physdev_op_t iopl_op, iobmp_op;
    7.80  
    7.81          /* NB. No need to disable interrupts as already done in sched.c */
    7.82          /* __cli(); */
    7.83 @@ -540,12 +500,22 @@ struct task_struct fastcall * __switch_t
    7.84  	C(0); C(1); C(2);
    7.85  #undef C
    7.86  
    7.87 -	if (xen_start_info.flags & SIF_PRIVILEGED) {
    7.88 -		op.cmd           = DOM0_IOPL;
    7.89 -		op.u.iopl.domain = DOMID_SELF;
    7.90 -		op.u.iopl.iopl   = next->io_pl;
    7.91 -		op.interface_version = DOM0_INTERFACE_VERSION;
    7.92 -		queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op);
    7.93 +	if (unlikely(prev->io_pl != next->io_pl)) {
    7.94 +		iopl_op.cmd             = PHYSDEVOP_SET_IOPL;
    7.95 +		iopl_op.u.set_iopl.iopl = next->io_pl;
    7.96 +		queue_multicall1(__HYPERVISOR_physdev_op,
    7.97 +				(unsigned long)&iopl_op);
    7.98 +	}
    7.99 +
   7.100 +	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
   7.101 +		iobmp_op.cmd                     =
   7.102 +			PHYSDEVOP_SET_IOBITMAP;
   7.103 +		iobmp_op.u.set_iobitmap.bitmap   =
   7.104 +			(unsigned long)next->io_bitmap_ptr;
   7.105 +		iobmp_op.u.set_iobitmap.nr_ports =
   7.106 +			next->io_bitmap_ptr ? IO_BITMAP_BITS : 0;
   7.107 +		queue_multicall1(__HYPERVISOR_physdev_op,
   7.108 +				(unsigned long)&iobmp_op);
   7.109  	}
   7.110  
   7.111  	/* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
   7.112 @@ -573,9 +543,6 @@ struct task_struct fastcall * __switch_t
   7.113  		loaddebug(next, 7);
   7.114  	}
   7.115  
   7.116 -	if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr))
   7.117 -		handle_io_bitmap(next, tss);
   7.118 -
   7.119  	return prev_p;
   7.120  }
   7.121  
     8.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Wed Mar 23 09:08:07 2005 +0000
     8.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c	Wed Mar 23 13:35:24 2005 +0000
     8.3 @@ -52,6 +52,7 @@
     8.4  #include <asm/ist.h>
     8.5  #include <asm/io.h>
     8.6  #include <asm-xen/hypervisor.h>
     8.7 +#include <asm-xen/xen-public/physdev.h>
     8.8  #include "setup_arch_pre.h"
     8.9  #include <bios_ebda.h>
    8.10  
    8.11 @@ -1399,9 +1400,9 @@ static void set_mca_bus(int x) { }
    8.12   */
    8.13  void __init setup_arch(char **cmdline_p)
    8.14  {
    8.15 -        int i,j;
    8.16 -
    8.17 -        unsigned long max_low_pfn;
    8.18 +	int i,j;
    8.19 +	physdev_op_t op;
    8.20 +	unsigned long max_low_pfn;
    8.21  
    8.22  	/* Force a quick death if the kernel panics. */
    8.23  	extern int panic_timeout;
    8.24 @@ -1585,16 +1586,9 @@ void __init setup_arch(char **cmdline_p)
    8.25  
    8.26  	register_memory();
    8.27  
    8.28 -	/* If we are a privileged guest OS then we should request IO privs. */
    8.29 -	if (xen_start_info.flags & SIF_PRIVILEGED) {
    8.30 -		dom0_op_t op;
    8.31 -		op.cmd           = DOM0_IOPL;
    8.32 -		op.u.iopl.domain = DOMID_SELF;
    8.33 -		op.u.iopl.iopl   = 1;
    8.34 -		if (HYPERVISOR_dom0_op(&op) != 0)
    8.35 -			panic("Unable to obtain IOPL, despite SIF_PRIVILEGED");
    8.36 -		current->thread.io_pl = 1;
    8.37 -	}
    8.38 +	op.cmd             = PHYSDEVOP_SET_IOPL;
    8.39 +	op.u.set_iopl.iopl = current->thread.io_pl = 1;
    8.40 +	HYPERVISOR_physdev_op(&op);
    8.41  
    8.42  	if (xen_start_info.flags & SIF_INITDOMAIN) {
    8.43  		if (!(xen_start_info.flags & SIF_PRIVILEGED))
     9.1 --- a/xen/arch/x86/dom0_ops.c	Wed Mar 23 09:08:07 2005 +0000
     9.2 +++ b/xen/arch/x86/dom0_ops.c	Wed Mar 23 13:35:24 2005 +0000
     9.3 @@ -136,14 +136,61 @@ long arch_do_dom0_op(dom0_op_t *op, dom0
     9.4      }
     9.5      break;
     9.6  
     9.7 -    case DOM0_IOPL:
     9.8 +    case DOM0_IOPL_PERMISSION:
     9.9      {
    9.10 +        struct domain *d;
    9.11 +
    9.12 +        ret = -EINVAL;
    9.13 +        if ( op->u.iopl_permission.max_iopl > 3 )
    9.14 +            break;
    9.15 +
    9.16 +        ret = -ESRCH;
    9.17 +        if ( unlikely((d = find_domain_by_id(
    9.18 +            op->u.iopl_permission.domain)) == NULL) )
    9.19 +            break;
    9.20 +
    9.21 +        ret = 0;
    9.22 +        d->arch.max_iopl = op->u.iopl_permission.max_iopl;
    9.23 +
    9.24 +        put_domain(d);
    9.25 +    }
    9.26 +    break;
    9.27 +
    9.28 +    case DOM0_IOPORT_PERMISSION:
    9.29 +    {
    9.30 +        struct domain *d;
    9.31 +        unsigned int fp = op->u.ioport_permission.first_port;
    9.32 +        unsigned int np = op->u.ioport_permission.nr_ports;
    9.33 +        unsigned int p;
    9.34 +
    9.35          ret = -EINVAL;
    9.36 -        if ( op->u.iopl.domain == DOMID_SELF )
    9.37 +        if ( (fp + np) >= 65536 )
    9.38 +            break;
    9.39 +
    9.40 +        ret = -ESRCH;
    9.41 +        if ( unlikely((d = find_domain_by_id(
    9.42 +            op->u.ioport_permission.domain)) == NULL) )
    9.43 +            break;
    9.44 +
    9.45 +        ret = -ENOMEM;
    9.46 +        if ( d->arch.iobmp_mask != NULL )
    9.47          {
    9.48 -            current->arch.iopl = op->u.iopl.iopl & 3;
    9.49 -            ret = 0;
    9.50 +            if ( (d->arch.iobmp_mask = xmalloc_array(
    9.51 +                u8, IOBMP_BYTES)) == NULL )
    9.52 +                break;
    9.53 +            memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
    9.54          }
    9.55 +
    9.56 +        ret = 0;
    9.57 +        for ( p = fp; p < (fp + np); p++ )
    9.58 +        {
    9.59 +            if ( op->u.ioport_permission.allow_access )
    9.60 +                clear_bit(p, d->arch.iobmp_mask);
    9.61 +            else
    9.62 +                set_bit(p, d->arch.iobmp_mask);
    9.63 +        }
    9.64 +
    9.65 +        put_domain(d);
    9.66      }
    9.67      break;
    9.68  
    10.1 --- a/xen/arch/x86/domain.c	Wed Mar 23 09:08:07 2005 +0000
    10.2 +++ b/xen/arch/x86/domain.c	Wed Mar 23 13:35:24 2005 +0000
    10.3 @@ -735,7 +735,6 @@ void context_switch(struct exec_domain *
    10.4  {
    10.5      struct tss_struct *tss = init_tss + smp_processor_id();
    10.6      execution_context_t *stack_ec = get_execution_context();
    10.7 -    int i;
    10.8  
    10.9      __cli();
   10.10  
   10.11 @@ -767,57 +766,33 @@ void context_switch(struct exec_domain *
   10.12              loaddebug(&next_p->arch, 7);
   10.13          }
   10.14  
   10.15 -        if ( VMX_DOMAIN(next_p) )
   10.16 +        if ( !VMX_DOMAIN(next_p) )
   10.17          {
   10.18 -            write_ptbase(next_p);
   10.19 -            set_current(next_p);
   10.20 -            __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
   10.21 -            __sti();
   10.22 -            goto done;
   10.23 -        }
   10.24 - 
   10.25 -        SET_FAST_TRAP(&next_p->arch);
   10.26 +            SET_FAST_TRAP(&next_p->arch);
   10.27  
   10.28  #ifdef __i386__
   10.29 -        /* Switch the kernel ring-1 stack. */
   10.30 -        tss->esp1 = next_p->arch.kernel_sp;
   10.31 -        tss->ss1  = next_p->arch.kernel_ss;
   10.32 +            /* Switch the kernel ring-1 stack. */
   10.33 +            tss->esp1 = next_p->arch.kernel_sp;
   10.34 +            tss->ss1  = next_p->arch.kernel_ss;
   10.35  #endif
   10.36 +        }
   10.37  
   10.38          /* Switch page tables. */
   10.39          write_ptbase(next_p);
   10.40      }
   10.41  
   10.42 -    if ( unlikely(prev_p->arch.io_bitmap != NULL) )
   10.43 -    {
   10.44 -        for ( i = 0; i < sizeof(prev_p->arch.io_bitmap_sel) * 8; i++ )
   10.45 -            if ( !test_bit(i, &prev_p->arch.io_bitmap_sel) )
   10.46 -                memset(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
   10.47 -                       ~0U, IOBMP_BYTES_PER_SELBIT);
   10.48 -        tss->bitmap = IOBMP_INVALID_OFFSET;
   10.49 -    }
   10.50 -
   10.51 -    if ( unlikely(next_p->arch.io_bitmap != NULL) )
   10.52 -    {
   10.53 -        for ( i = 0; i < sizeof(next_p->arch.io_bitmap_sel) * 8; i++ )
   10.54 -            if ( !test_bit(i, &next_p->arch.io_bitmap_sel) )
   10.55 -                memcpy(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
   10.56 -                       &next_p->arch.io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
   10.57 -                       IOBMP_BYTES_PER_SELBIT);
   10.58 -        tss->bitmap = IOBMP_OFFSET;
   10.59 -    }
   10.60 -
   10.61      set_current(next_p);
   10.62  
   10.63 -    /* Switch GDT and LDT. */
   10.64      __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
   10.65 -    load_LDT(next_p);
   10.66  
   10.67      __sti();
   10.68  
   10.69 -    switch_segments(stack_ec, prev_p, next_p);
   10.70 +    if ( !VMX_DOMAIN(next_p) )
   10.71 +    {
   10.72 +        load_LDT(next_p);
   10.73 +        switch_segments(stack_ec, prev_p, next_p);
   10.74 +    }
   10.75  
   10.76 - done:
   10.77      /*
   10.78       * We do this late on because it doesn't need to be protected by the
   10.79       * schedule_lock, and because we want this to be the very last use of
    11.1 --- a/xen/arch/x86/setup.c	Wed Mar 23 09:08:07 2005 +0000
    11.2 +++ b/xen/arch/x86/setup.c	Wed Mar 23 13:35:24 2005 +0000
    11.3 @@ -312,7 +312,6 @@ void __init cpu_init(void)
    11.4  
    11.5      /* Set up and load the per-CPU TSS and LDT. */
    11.6      t->bitmap = IOBMP_INVALID_OFFSET;
    11.7 -    memset(t->io_bitmap, ~0, sizeof(t->io_bitmap));
    11.8  #if defined(__i386__)
    11.9      t->ss0  = __HYPERVISOR_DS;
   11.10      t->esp0 = get_stack_bottom();
    12.1 --- a/xen/arch/x86/traps.c	Wed Mar 23 09:08:07 2005 +0000
    12.2 +++ b/xen/arch/x86/traps.c	Wed Mar 23 13:35:24 2005 +0000
    12.3 @@ -379,15 +379,50 @@ long do_fpu_taskswitch(int set)
    12.4      return 0;
    12.5  }
    12.6  
    12.7 -static inline int user_io_okay(
    12.8 +/* Has the guest requested sufficient permission for this I/O access? */
    12.9 +static inline int guest_io_okay(
   12.10 +    unsigned int port, unsigned int bytes,
   12.11 +    struct exec_domain *ed, struct xen_regs *regs)
   12.12 +{
   12.13 +    u16 x;
   12.14 +    if ( ed->arch.iopl >= (KERNEL_MODE(ed, regs) ? 1 : 3) )
   12.15 +        return 1;
   12.16 +    if ( (ed->arch.iobmp_limit > (port + bytes)) &&
   12.17 +         (__get_user(x, (u16 *)(ed->arch.iobmp+(port>>3))) == 0) &&
   12.18 +         ((x & (((1<<bytes)-1) << (port&7))) == 0) )
   12.19 +        return 1;
   12.20 +    return 0;
   12.21 +}
   12.22 +
   12.23 +/* Has the administrator granted sufficient permission for this I/O access? */
   12.24 +static inline int admin_io_okay(
   12.25      unsigned int port, unsigned int bytes,
   12.26      struct exec_domain *ed, struct xen_regs *regs)
   12.27  {
   12.28 -    if ( ed->arch.iopl < (KERNEL_MODE(ed, regs) ? 1 : 3) )
   12.29 -        return 0;
   12.30 -    return 1;
   12.31 +    struct domain *d = ed->domain;
   12.32 +    u16 x;
   12.33 +    if ( IS_PRIV(d) || (d->arch.max_iopl >= (KERNEL_MODE(ed, regs) ? 1 : 3)) )
   12.34 +        return 1;
   12.35 +    if ( d->arch.iobmp_mask != NULL )
   12.36 +    {
   12.37 +        x = *(u16 *)(d->arch.iobmp_mask + (port >> 3));
   12.38 +        if ( (x & (((1<<bytes)-1) << (port&7))) == 0 )
   12.39 +            return 1;
   12.40 +    }
   12.41 +    return 0;
   12.42  }
   12.43  
   12.44 +/* Check admin limits. Silently fail the access if it is disallowed. */
   12.45 +#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0)
   12.46 +#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0)
   12.47 +#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0)
   12.48 +#define outb_user(_v, _p, _d, _r) \
   12.49 +    (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0))
   12.50 +#define outw_user(_v, _p, _d, _r) \
   12.51 +    (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0))
   12.52 +#define outl_user(_v, _p, _d, _r) \
   12.53 +    (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0))
   12.54 +
   12.55  #define insn_fetch(_type, _size, _ptr)          \
   12.56  ({  unsigned long _x;                           \
   12.57      if ( get_user(_x, (_type *)eip) )           \
   12.58 @@ -450,22 +485,22 @@ static int emulate_privileged_op(struct 
   12.59          case 0x6c: /* INSB */
   12.60              op_bytes = 1;
   12.61          case 0x6d: /* INSW/INSL */
   12.62 -            if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) )
   12.63 +            if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) )
   12.64                  goto fail;
   12.65              switch ( op_bytes )
   12.66              {
   12.67              case 1:
   12.68 -                data = (u8)inb((u16)regs->edx);
   12.69 +                data = (u8)inb_user((u16)regs->edx, ed, regs);
   12.70                  if ( put_user((u8)data, (u8 *)regs->edi) )
   12.71                      goto write_fault;
   12.72                  break;
   12.73              case 2:
   12.74 -                data = (u16)inw((u16)regs->edx);
   12.75 +                data = (u16)inw_user((u16)regs->edx, ed, regs);
   12.76                  if ( put_user((u16)data, (u16 *)regs->edi) )
   12.77                      goto write_fault;
   12.78                  break;
   12.79              case 4:
   12.80 -                data = (u32)inl((u16)regs->edx);
   12.81 +                data = (u32)inl_user((u16)regs->edx, ed, regs);
   12.82                  if ( put_user((u32)data, (u32 *)regs->edi) )
   12.83                      goto write_fault;
   12.84                  break;
   12.85 @@ -476,24 +511,24 @@ static int emulate_privileged_op(struct 
   12.86          case 0x6e: /* OUTSB */
   12.87              op_bytes = 1;
   12.88          case 0x6f: /* OUTSW/OUTSL */
   12.89 -            if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) )
   12.90 +            if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) )
   12.91                  goto fail;
   12.92              switch ( op_bytes )
   12.93              {
   12.94              case 1:
   12.95                  if ( get_user(data, (u8 *)regs->esi) )
   12.96                      goto read_fault;
   12.97 -                outb((u8)data, (u16)regs->edx);
   12.98 +                outb_user((u8)data, (u16)regs->edx, ed, regs);
   12.99                  break;
  12.100              case 2:
  12.101                  if ( get_user(data, (u16 *)regs->esi) )
  12.102                      goto read_fault;
  12.103 -                outw((u16)data, (u16)regs->edx);
  12.104 +                outw_user((u16)data, (u16)regs->edx, ed, regs);
  12.105                  break;
  12.106              case 4:
  12.107                  if ( get_user(data, (u32 *)regs->esi) )
  12.108                      goto read_fault;
  12.109 -                outl((u32)data, (u16)regs->edx);
  12.110 +                outl_user((u32)data, (u16)regs->edx, ed, regs);
  12.111                  break;
  12.112              }
  12.113              regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes;
  12.114 @@ -518,20 +553,20 @@ static int emulate_privileged_op(struct 
  12.115      case 0xe5: /* IN imm8,%eax */
  12.116          port = insn_fetch(u8, 1, eip);
  12.117      exec_in:
  12.118 -        if ( !user_io_okay(port, op_bytes, ed, regs) )
  12.119 +        if ( !guest_io_okay(port, op_bytes, ed, regs) )
  12.120              goto fail;
  12.121          switch ( op_bytes )
  12.122          {
  12.123          case 1:
  12.124              regs->eax &= ~0xffUL;
  12.125 -            regs->eax |= (u8)inb(port);
  12.126 +            regs->eax |= (u8)inb_user(port, ed, regs);
  12.127              break;
  12.128          case 2:
  12.129              regs->eax &= ~0xffffUL;
  12.130 -            regs->eax |= (u16)inw(port);
  12.131 +            regs->eax |= (u16)inw_user(port, ed, regs);
  12.132              break;
  12.133          case 4:
  12.134 -            regs->eax = (u32)inl(port);
  12.135 +            regs->eax = (u32)inl_user(port, ed, regs);
  12.136              break;
  12.137          }
  12.138          goto done;
  12.139 @@ -547,18 +582,18 @@ static int emulate_privileged_op(struct 
  12.140      case 0xe7: /* OUT %eax,imm8 */
  12.141          port = insn_fetch(u8, 1, eip);
  12.142      exec_out:
  12.143 -        if ( !user_io_okay(port, op_bytes, ed, regs) )
  12.144 +        if ( !guest_io_okay(port, op_bytes, ed, regs) )
  12.145              goto fail;
  12.146          switch ( op_bytes )
  12.147          {
  12.148          case 1:
  12.149 -            outb((u8)regs->eax, port);
  12.150 +            outb_user((u8)regs->eax, port, ed, regs);
  12.151              break;
  12.152          case 2:
  12.153 -            outw((u16)regs->eax, port);
  12.154 +            outw_user((u16)regs->eax, port, ed, regs);
  12.155              break;
  12.156          case 4:
  12.157 -            outl((u32)regs->eax, port);
  12.158 +            outl_user((u32)regs->eax, port, ed, regs);
  12.159              break;
  12.160          }
  12.161          goto done;
    13.1 --- a/xen/common/domain.c	Wed Mar 23 09:08:07 2005 +0000
    13.2 +++ b/xen/common/domain.c	Wed Mar 23 13:35:24 2005 +0000
    13.3 @@ -129,6 +129,12 @@ struct domain *find_last_domain(void)
    13.4  }
    13.5  
    13.6  
    13.7 +#ifndef CONFIG_IA64
    13.8 +extern void physdev_destroy_state(struct domain *d);
    13.9 +#else
   13.10 +#define physdev_destroy_state(_d) ((void)0)
   13.11 +#endif
   13.12 +
   13.13  void domain_kill(struct domain *d)
   13.14  {
   13.15      struct exec_domain *ed;
   13.16 @@ -139,6 +145,7 @@ void domain_kill(struct domain *d)
   13.17          for_each_exec_domain(d, ed)
   13.18              sched_rem_domain(ed);
   13.19          domain_relinquish_memory(d);
   13.20 +        physdev_destroy_state(d);
   13.21          put_domain(d);
   13.22      }
   13.23  }
    14.1 --- a/xen/common/physdev.c	Wed Mar 23 09:08:07 2005 +0000
    14.2 +++ b/xen/common/physdev.c	Wed Mar 23 13:35:24 2005 +0000
    14.3 @@ -69,26 +69,47 @@ typedef struct _phys_dev_st {
    14.4  
    14.5  
    14.6  /* Find a device on a per-domain device list. */
    14.7 -static phys_dev_t *find_pdev(struct domain *p, struct pci_dev *dev)
    14.8 +static phys_dev_t *find_pdev(struct domain *d, struct pci_dev *dev)
    14.9  {
   14.10 -    phys_dev_t *t, *res = NULL;
   14.11 +    phys_dev_t *t;
   14.12 +    list_for_each_entry ( t, &d->pcidev_list, node )
   14.13 +        if ( dev == t->dev )
   14.14 +            return t;
   14.15 +    return NULL;
   14.16 +}
   14.17 +
   14.18 +static int setup_ioport_memory_access(struct domain *d, struct pci_dev *pdev)
   14.19 +{
   14.20 +    struct resource *r;
   14.21 +    int i, j;
   14.22  
   14.23 -    list_for_each_entry ( t, &p->pcidev_list, node )
   14.24 +    if ( d->arch.iobmp_mask == NULL )
   14.25      {
   14.26 -        if ( dev == t->dev )
   14.27 +        if ( (d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES)) == NULL )
   14.28 +            return -ENOMEM;
   14.29 +        memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES);
   14.30 +    }
   14.31 +
   14.32 +    for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
   14.33 +    {
   14.34 +        r = &pdev->resource[i];         
   14.35 +        if ( r->flags & IORESOURCE_IO )
   14.36          {
   14.37 -            res = t;
   14.38 -            break;
   14.39 +            INFO("Giving domain %u IO resources (%lx - %lx) "
   14.40 +                 "for device %s\n", d->id, r->start, r->end, pdev->slot_name);
   14.41 +            for ( j = r->start; j < r->end + 1; j++ )
   14.42 +                clear_bit(j, d->arch.iobmp_mask);
   14.43          }
   14.44      }
   14.45 -    return res;
   14.46 +
   14.47 +    return 0;
   14.48  }
   14.49  
   14.50  /* Add a device to a per-domain device-access list. */
   14.51 -static int add_dev_to_task(struct domain *p, struct pci_dev *dev, 
   14.52 -                           int acc)
   14.53 +static int add_dev_to_task(struct domain *d, struct pci_dev *dev, int acc)
   14.54  {
   14.55      phys_dev_t *physdev;
   14.56 +    int         rc;
   14.57      
   14.58      if ( (physdev = xmalloc(phys_dev_t)) == NULL )
   14.59      {
   14.60 @@ -96,104 +117,56 @@ static int add_dev_to_task(struct domain
   14.61          return -ENOMEM;
   14.62      }
   14.63      
   14.64 +    if ( (rc = setup_ioport_memory_access(d, dev)) < 0 )
   14.65 +    {
   14.66 +        xfree(physdev);
   14.67 +        return rc;
   14.68 +    }
   14.69 +
   14.70      physdev->dev = dev;
   14.71      physdev->flags = acc;
   14.72      physdev->state = 0;
   14.73 -    list_add(&physdev->node, &p->pcidev_list);
   14.74 +    list_add(&physdev->node, &d->pcidev_list);
   14.75  
   14.76      if ( acc == ACC_WRITE )
   14.77 -        physdev->owner = p;
   14.78 +        physdev->owner = d;
   14.79  
   14.80      return 0;
   14.81  }
   14.82  
   14.83 -/* Remove a device from a per-domain device-access list. */
   14.84 -static void remove_dev_from_task(struct domain *p, struct pci_dev *dev)
   14.85 -{
   14.86 -    phys_dev_t *physdev = find_pdev(p, dev);
   14.87 -
   14.88 -    if ( physdev == NULL )
   14.89 -        BUG();
   14.90 -    
   14.91 -    list_del(&physdev->node);
   14.92 -
   14.93 -    xfree(physdev);
   14.94 -}
   14.95 -
   14.96 -static int setup_ioport_memory_access(domid_t dom, struct domain* p, 
   14.97 -                                      struct exec_domain* ed,
   14.98 -                                      struct pci_dev *pdev)
   14.99 +void physdev_destroy_state(struct domain *d)
  14.100  {
  14.101 -    struct exec_domain* edc;
  14.102 -    int i, j;
  14.103 -
  14.104 -    /* Now, setup access to the IO ports and memory regions for the device. */
  14.105 -    if ( ed->arch.io_bitmap == NULL )
  14.106 -    {
  14.107 -        if ( (ed->arch.io_bitmap = xmalloc_array(u8, IOBMP_BYTES)) == NULL )
  14.108 -            return -ENOMEM;
  14.109 +    struct list_head *ent;
  14.110  
  14.111 -        memset(ed->arch.io_bitmap, 0xFF, IOBMP_BYTES);
  14.112 -
  14.113 -        ed->arch.io_bitmap_sel = ~0ULL;
  14.114 -
  14.115 -        for_each_exec_domain(p, edc) {
  14.116 -            if (edc == ed)
  14.117 -                continue;
  14.118 -            edc->arch.io_bitmap = ed->arch.io_bitmap;
  14.119 -        }
  14.120 +    if ( d->arch.iobmp_mask != NULL )
  14.121 +    {
  14.122 +        xfree(d->arch.iobmp_mask);
  14.123 +        d->arch.iobmp_mask = NULL;
  14.124      }
  14.125  
  14.126 -    for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
  14.127 +    while ( (ent = d->pcidev_list.next) != &d->pcidev_list )
  14.128      {
  14.129 -        struct resource *r = &pdev->resource[i];
  14.130 -        
  14.131 -        if ( r->flags & IORESOURCE_IO )
  14.132 -        {
  14.133 -            /* Give the domain access to the IO ports it needs.  Currently,
  14.134 -             * this will allow all processes in that domain access to those
  14.135 -             * ports as well.  This will do for now, since driver domains don't
  14.136 -             * run untrusted processes! */
  14.137 -            INFO("Giving domain %u IO resources (%lx - %lx) "
  14.138 -                 "for device %s\n", dom, r->start, r->end, pdev->slot_name);
  14.139 -            for ( j = r->start; j < r->end + 1; j++ )
  14.140 -            {
  14.141 -                clear_bit(j, ed->arch.io_bitmap);
  14.142 -                clear_bit(j / IOBMP_BITS_PER_SELBIT, &ed->arch.io_bitmap_sel);
  14.143 -            }
  14.144 -        }
  14.145 -        /* rights to IO memory regions are checked when the domain maps them */
  14.146 +        list_del(ent);
  14.147 +        xfree(list_entry(ent, phys_dev_t, node));
  14.148      }
  14.149 -
  14.150 -    for_each_exec_domain(p, edc) {
  14.151 -        if (edc == ed)
  14.152 -            continue;
  14.153 -        edc->arch.io_bitmap_sel = ed->arch.io_bitmap_sel;
  14.154 -    }
  14.155 -
  14.156 -    return 0;
  14.157  }
  14.158  
  14.159  /*
  14.160   * physdev_pci_access_modify:
  14.161   * Allow/disallow access to a specific PCI device.  Guests should not be
  14.162   * allowed to see bridge devices as it needlessly complicates things (one
  14.163 - * possible exception to this is the AGP bridge).  If the given device is a
  14.164 - * bridge, then the domain should get access to all the leaf devices below
  14.165 - * that bridge (XXX this is unimplemented!).
  14.166 + * possible exception to this is the AGP bridge).
  14.167   */
  14.168  int physdev_pci_access_modify(domid_t dom, int bus, int dev, int func, 
  14.169                                int enable)
  14.170  {
  14.171      struct domain *p;
  14.172 -    struct exec_domain *ed;
  14.173      struct pci_dev *pdev;
  14.174      phys_dev_t *physdev;
  14.175      int rc = 0;
  14.176 -    int oldacc = -1, allocated_physdev = 0;
  14.177 +    int oldacc = -1;
  14.178  
  14.179 -    if ( !IS_PRIV(current->domain) )
  14.180 -        BUG();
  14.181 +    BUG_ON(!IS_PRIV(current->domain));
  14.182  
  14.183      if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) )
  14.184          return -EINVAL;
  14.185 @@ -209,8 +182,6 @@ int physdev_pci_access_modify(domid_t do
  14.186      if ( (p = find_domain_by_id(dom)) == NULL ) 
  14.187          return -ESRCH;
  14.188  
  14.189 -    ed = p->exec_domain[0];     /* XXX */
  14.190 -
  14.191      /* Make the domain privileged. */
  14.192      set_bit(DF_PHYSDEV, &p->d_flags);
  14.193      /* FIXME: MAW for now make the domain REALLY privileged so that it
  14.194 @@ -222,47 +193,23 @@ int physdev_pci_access_modify(domid_t do
  14.195      {
  14.196          INFO("  dev does not exist\n");
  14.197          rc = -ENODEV;
  14.198 -        goto clear_privilege;
  14.199 +        goto out;
  14.200      }
  14.201      
  14.202 -    if ( (physdev = find_pdev(p, pdev)) != NULL) {
  14.203 -        /* Sevice already on list: update access permissions. */
  14.204 -        oldacc = physdev->flags;
  14.205 -        physdev->flags = ACC_WRITE;
  14.206 -    } else {
  14.207 -        if ( (rc = add_dev_to_task(p, pdev, ACC_WRITE)) < 0)
  14.208 -            goto clear_privilege;
  14.209 -        allocated_physdev = 1;
  14.210 -    }
  14.211 -
  14.212      INFO("  add RW %02x:%02x:%02x\n", pdev->bus->number,
  14.213           PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
  14.214  
  14.215 -    /* Is the device a bridge or cardbus? */
  14.216 -    if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL ) {
  14.217 -        INFO("XXX can't give access to bridge devices yet\n");
  14.218 -        rc = -EPERM;
  14.219 -        goto remove_dev;
  14.220 +    if ( (physdev = find_pdev(p, pdev)) != NULL )
  14.221 +    {
  14.222 +        oldacc = physdev->flags;
  14.223 +        physdev->flags = ACC_WRITE;
  14.224 +    }
  14.225 +    else
  14.226 +    {
  14.227 +        rc = add_dev_to_task(p, pdev, ACC_WRITE);
  14.228      }
  14.229  
  14.230 -    if ( (rc = setup_ioport_memory_access(dom, p, ed, pdev)) < 0 )
  14.231 -        goto remove_dev;
  14.232 -
  14.233 -    put_domain(p);
  14.234 -    return rc;
  14.235 -
  14.236 -remove_dev:
  14.237 -    if (allocated_physdev) {
  14.238 -        /* new device was added - remove it from the list */
  14.239 -        remove_dev_from_task(p, pdev);
  14.240 -    } else {
  14.241 -        /* device already existed - just undo the access changes */
  14.242 -        physdev->flags = oldacc;
  14.243 -    }
  14.244 -    
  14.245 -clear_privilege:
  14.246 -    clear_bit(DF_PHYSDEV, &p->d_flags);
  14.247 -    clear_bit(DF_PRIVILEGED, &p->d_flags);
  14.248 + out:
  14.249      put_domain(p);
  14.250      return rc;
  14.251  }
  14.252 @@ -308,9 +255,8 @@ int domain_iomem_in_pfn(struct domain *p
  14.253  }
  14.254  
  14.255  /* check if a domain has general access to a device */
  14.256 -inline static int check_dev_acc (struct domain *p,
  14.257 -                                 int bus, int dev, int func,
  14.258 -                                 phys_dev_t **pdev) 
  14.259 +static inline int check_dev_acc(
  14.260 +    struct domain *d, int bus, int dev, int func, phys_dev_t **pdev)
  14.261  {
  14.262      struct pci_dev *target_dev;
  14.263      phys_dev_t     *target_pdev;
  14.264 @@ -318,10 +264,10 @@ inline static int check_dev_acc (struct 
  14.265  
  14.266      *pdev = NULL;
  14.267  
  14.268 -     if ( !IS_CAPABLE_PHYSDEV(p) )
  14.269 -         return -EPERM; /* no pci access permission */
  14.270 +     if ( !IS_CAPABLE_PHYSDEV(d) )
  14.271 +         return -EPERM;
  14.272  
  14.273 -    if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
  14.274 +    if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) )
  14.275          return -EINVAL;
  14.276  
  14.277      VERBOSE_INFO("b=%x d=%x f=%x ", bus, dev, func);
  14.278 @@ -336,7 +282,7 @@ inline static int check_dev_acc (struct 
  14.279      }
  14.280  
  14.281      /* check access */
  14.282 -    target_pdev = find_pdev(p, target_dev);
  14.283 +    target_pdev = find_pdev(d, target_dev);
  14.284      if ( !target_pdev )
  14.285      {
  14.286          VERBOSE_INFO("dom has no access to target\n");
  14.287 @@ -748,6 +694,24 @@ long do_physdev_op(physdev_op_t *uop)
  14.288          ret = 0;
  14.289          break;
  14.290  
  14.291 +    case PHYSDEVOP_SET_IOPL:
  14.292 +        ret = -EINVAL;
  14.293 +        if ( op.u.set_iopl.iopl > 3 )
  14.294 +            break;
  14.295 +        ret = 0;
  14.296 +        current->arch.iopl = op.u.set_iopl.iopl;
  14.297 +        break;
  14.298 +
  14.299 +    case PHYSDEVOP_SET_IOBITMAP:
  14.300 +        ret = -EINVAL;
  14.301 +        if ( !access_ok(VERIFY_READ, op.u.set_iobitmap.bitmap, IOBMP_BYTES) ||
  14.302 +             (op.u.set_iobitmap.nr_ports > 65536) )
  14.303 +            break;
  14.304 +        ret = 0;
  14.305 +        current->arch.iobmp       = (u8 *)op.u.set_iobitmap.bitmap;
  14.306 +        current->arch.iobmp_limit = op.u.set_iobitmap.nr_ports;
  14.307 +        break;
  14.308 +
  14.309      default:
  14.310          ret = -EINVAL;
  14.311          break;
    15.1 --- a/xen/include/asm-x86/domain.h	Wed Mar 23 09:08:07 2005 +0000
    15.2 +++ b/xen/include/asm-x86/domain.h	Wed Mar 23 13:35:24 2005 +0000
    15.3 @@ -18,6 +18,10 @@ struct arch_domain
    15.4      l3_pgentry_t *mm_perdomain_l3;
    15.5  #endif
    15.6  
    15.7 +    /* I/O-port access bitmap mask. */
    15.8 +    u8 *iobmp_mask;       /* Address of IO bitmap mask, or NULL.      */
    15.9 +    int max_iopl;         /* Maximum achievable IOPL. */
   15.10 +
   15.11      /* shadow mode status and controls */
   15.12      unsigned int shadow_mode;  /* flags to control shadow table operation */
   15.13      spinlock_t   shadow_lock;
   15.14 @@ -57,7 +61,6 @@ struct arch_exec_domain
   15.15  
   15.16      /* general user-visible register state */
   15.17      execution_context_t user_ctxt;
   15.18 -    unsigned int iopl;
   15.19  
   15.20      void (*schedule_tail) (struct exec_domain *);
   15.21  
   15.22 @@ -81,9 +84,9 @@ struct arch_exec_domain
   15.23      struct trap_bounce trap_bounce;
   15.24  
   15.25      /* I/O-port access bitmap. */
   15.26 -    u64 io_bitmap_sel; /* Selector to tell us which part of the IO bitmap are
   15.27 -                        * "interesting" (i.e. have clear bits) */
   15.28 -    u8 *io_bitmap; /* Pointer to task's IO bitmap or NULL */
   15.29 +    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
   15.30 +    int iobmp_limit;  /* Number of ports represented in the bitmap.  */
   15.31 +    int iopl;         /* Current IOPL for this VCPU. */
   15.32  
   15.33      /* Trap info. */
   15.34  #ifdef ARCH_HAS_FAST_TRAP
    16.1 --- a/xen/include/asm-x86/processor.h	Wed Mar 23 09:08:07 2005 +0000
    16.2 +++ b/xen/include/asm-x86/processor.h	Wed Mar 23 13:35:24 2005 +0000
    16.3 @@ -328,9 +328,6 @@ static inline void clear_in_cr4 (unsigne
    16.4  } while (0)
    16.5  
    16.6  #define IOBMP_BYTES             8192
    16.7 -#define IOBMP_BYTES_PER_SELBIT  (IOBMP_BYTES / 64)
    16.8 -#define IOBMP_BITS_PER_SELBIT   (IOBMP_BYTES_PER_SELBIT * 8)
    16.9 -#define IOBMP_OFFSET            offsetof(struct tss_struct, io_bitmap)
   16.10  #define IOBMP_INVALID_OFFSET    0x8000
   16.11  
   16.12  struct i387_state {
   16.13 @@ -372,9 +369,8 @@ struct tss_struct {
   16.14      u16 trace;
   16.15  #endif
   16.16      u16 bitmap;
   16.17 -    u8  io_bitmap[IOBMP_BYTES+1];
   16.18 -    /* Pads the TSS to be cacheline-aligned (total size is 0x2080). */
   16.19 -    u8 __cacheline_filler[23];
   16.20 +    /* Pads the TSS to be cacheline-aligned (total size is 0x80). */
   16.21 +    u8 __cacheline_filler[24];
   16.22  } __cacheline_aligned PACKED;
   16.23  
   16.24  #define IDT_ENTRIES 256
    17.1 --- a/xen/include/public/dom0_ops.h	Wed Mar 23 09:08:07 2005 +0000
    17.2 +++ b/xen/include/public/dom0_ops.h	Wed Mar 23 13:35:24 2005 +0000
    17.3 @@ -19,7 +19,7 @@
    17.4   * This makes sure that old versions of dom0 tools will stop working in a
    17.5   * well-defined way (rather than crashing the machine, for instance).
    17.6   */
    17.7 -#define DOM0_INTERFACE_VERSION   0xAAAA1001
    17.8 +#define DOM0_INTERFACE_VERSION   0xAAAA1002
    17.9  
   17.10  /************************************************************************/
   17.11  
   17.12 @@ -120,13 +120,6 @@ typedef struct {
   17.13      MEMORY_PADDING;
   17.14  } PACKED dom0_setdomaininfo_t;              /* 16 bytes */
   17.15  
   17.16 -#define DOM0_IOPL             14
   17.17 -typedef struct {
   17.18 -    domid_t domain;                   /*  0 */
   17.19 -    u16     __pad;
   17.20 -    u32     iopl;                     /*  4 */
   17.21 -} PACKED dom0_iopl_t; /* 8 bytes */
   17.22 -
   17.23  #define DOM0_MSR              15
   17.24  typedef struct {
   17.25      /* IN variables. */
   17.26 @@ -414,6 +407,20 @@ typedef struct {
   17.27      u32     _pad0;
   17.28  } PACKED dom0_microcode_t; /* 16 bytes */
   17.29  
   17.30 +#define DOM0_IOPL_PERMISSION     36
   17.31 +typedef struct {
   17.32 +    domid_t domain;                   /* 0: domain to be affected */
   17.33 +    u16     max_iopl;                 /* 2: new effective IOPL limit */
   17.34 +} PACKED dom0_iopl_permission_t; /* 4 bytes */
   17.35 +
   17.36 +#define DOM0_IOPORT_PERMISSION   37
   17.37 +typedef struct {
   17.38 +    domid_t domain;                   /* 0: domain to be affected */
   17.39 +    u16     first_port;               /* 2: first port int range */
   17.40 +    u16     nr_ports;                 /* 4: size of port range */
   17.41 +    u16     allow_access;             /* 6: allow or deny access to range? */
   17.42 +} PACKED dom0_ioport_permission_t; /* 8 bytes */
   17.43 +
   17.44  typedef struct {
   17.45      u32 cmd;                          /* 0 */
   17.46      u32 interface_version;            /* 4 */ /* DOM0_INTERFACE_VERSION */
   17.47 @@ -429,7 +436,6 @@ typedef struct {
   17.48          dom0_setdomaininfo_t     setdomaininfo;
   17.49          dom0_getdomaininfo_t     getdomaininfo;
   17.50          dom0_getpageframeinfo_t  getpageframeinfo;
   17.51 -        dom0_iopl_t              iopl;
   17.52  	dom0_msr_t               msr;
   17.53  	dom0_debug_t             debug;
   17.54  	dom0_settime_t           settime;
   17.55 @@ -449,6 +455,8 @@ typedef struct {
   17.56          dom0_read_memtype_t      read_memtype;
   17.57          dom0_perfccontrol_t      perfccontrol;
   17.58          dom0_microcode_t         microcode;
   17.59 +        dom0_iopl_permission_t   iopl_permission;
   17.60 +        dom0_ioport_permission_t ioport_permission;
   17.61      } PACKED u;
   17.62  } PACKED dom0_op_t; /* 80 bytes */
   17.63  
    18.1 --- a/xen/include/public/physdev.h	Wed Mar 23 09:08:07 2005 +0000
    18.2 +++ b/xen/include/public/physdev.h	Wed Mar 23 13:35:24 2005 +0000
    18.3 @@ -15,6 +15,8 @@
    18.4  #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES  3
    18.5  #define PHYSDEVOP_IRQ_UNMASK_NOTIFY     4
    18.6  #define PHYSDEVOP_IRQ_STATUS_QUERY      5
    18.7 +#define PHYSDEVOP_SET_IOPL              6
    18.8 +#define PHYSDEVOP_SET_IOBITMAP          7
    18.9  
   18.10  /* Read from PCI configuration space. */
   18.11  typedef struct {
   18.12 @@ -62,6 +64,19 @@ typedef struct {
   18.13      u32 flags;                        /*  4 */
   18.14  } PACKED physdevop_irq_status_query_t; /* 8 bytes */
   18.15  
   18.16 +typedef struct {
   18.17 +    /* IN */
   18.18 +    u32 iopl;                         /*  0 */
   18.19 +} PACKED physdevop_set_iopl_t; /* 4 bytes */
   18.20 +
   18.21 +typedef struct {
   18.22 +    /* IN */
   18.23 +    memory_t bitmap;                  /*  0 */
   18.24 +    MEMORY_PADDING;
   18.25 +    u32      nr_ports;                /*  8 */
   18.26 +    u32      __pad0;                  /* 12 */
   18.27 +} PACKED physdevop_set_iobitmap_t; /* 16 bytes */
   18.28 +
   18.29  typedef struct _physdev_op_st 
   18.30  {
   18.31      u32 cmd;                          /*  0 */
   18.32 @@ -72,6 +87,8 @@ typedef struct _physdev_op_st
   18.33          physdevop_pci_initialise_device_t pci_initialise_device;
   18.34          physdevop_pci_probe_root_buses_t  pci_probe_root_buses;
   18.35          physdevop_irq_status_query_t      irq_status_query;
   18.36 +        physdevop_set_iopl_t              set_iopl;
   18.37 +        physdevop_set_iobitmap_t          set_iobitmap;
   18.38          u8                                __dummy[32];
   18.39      } PACKED u;
   18.40  } PACKED physdev_op_t; /* 40 bytes */