direct-io.hg
changeset 4275:989b998fd8ca
bitkeeper revision 1.1236.1.117 (4241709cmfgF-94U74o-PmIJUmoksQ)
Fix iopl/iobitmap interfaces. Control tools can set per-domain
access limits via DOM0_IOPL_PERMISSION/DOM0_IOPORT_PERMISSION.
Guests can set current permissions per VCPU via physdev ops.
These will always succeed -- checking against admin-set limits is
done at access time.
Signed-off-by: Keir Fraser <keir@xensource.com>
Fix iopl/iobitmap interfaces. Control tools can set per-domain
access limits via DOM0_IOPL_PERMISSION/DOM0_IOPORT_PERMISSION.
Guests can set current permissions per VCPU via physdev ops.
These will always succeed -- checking against admin-set limits is
done at access time.
Signed-off-by: Keir Fraser <keir@xensource.com>
line diff
1.1 --- a/.rootkeys Wed Mar 23 09:08:07 2005 +0000 1.2 +++ b/.rootkeys Wed Mar 23 13:35:24 2005 +0000 1.3 @@ -145,6 +145,7 @@ 3e5a4e65lWzkiPXsZdzPt2RNnJGG1g linux-2.4 1.4 3e5a4e65_hqfuxtGG8IUy6wRM86Ecg linux-2.4.29-xen-sparse/arch/xen/kernel/entry.S 1.5 3e5a4e65Hy_1iUvMTPsNqGNXd9uFpg linux-2.4.29-xen-sparse/arch/xen/kernel/head.S 1.6 3e5a4e65RMGcuA-HCn3-wNx3fFQwdg linux-2.4.29-xen-sparse/arch/xen/kernel/i386_ksyms.c 1.7 +4241709bNBs1q4Ss32YW0CyFVOGhEg linux-2.4.29-xen-sparse/arch/xen/kernel/ioport.c 1.8 3e5a4e653U6cELGv528IxOLHvCq8iA linux-2.4.29-xen-sparse/arch/xen/kernel/irq.c 1.9 3e5a4e65muT6SU3ck47IP87Q7Ti5hA linux-2.4.29-xen-sparse/arch/xen/kernel/ldt.c 1.10 4051db95N9N99FjsRwi49YKUNHWI8A linux-2.4.29-xen-sparse/arch/xen/kernel/pci-pc.c
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/ioport.c Wed Mar 23 13:35:24 2005 +0000 2.3 @@ -0,0 +1,43 @@ 2.4 +#include <linux/sched.h> 2.5 +#include <linux/kernel.h> 2.6 +#include <linux/errno.h> 2.7 +#include <linux/types.h> 2.8 +#include <linux/ioport.h> 2.9 +#include <linux/mm.h> 2.10 +#include <linux/smp.h> 2.11 +#include <linux/smp_lock.h> 2.12 +#include <linux/stddef.h> 2.13 +#include <linux/slab.h> 2.14 +#include <asm-xen/xen-public/physdev.h> 2.15 + 2.16 +asmlinkage long sys_iopl(unsigned int new_io_pl) 2.17 +{ 2.18 + unsigned int old_io_pl = current->thread.io_pl; 2.19 + physdev_op_t op; 2.20 + 2.21 + if (new_io_pl > 3) 2.22 + return -EINVAL; 2.23 + 2.24 + /* Need "raw I/O" privileges for direct port access. */ 2.25 + if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO)) 2.26 + return -EPERM; 2.27 + 2.28 + /* Maintain OS privileges even if user attempts to relinquish them. */ 2.29 + if (new_io_pl == 0) 2.30 + new_io_pl = 1; 2.31 + 2.32 + /* Change our version of the privilege levels. */ 2.33 + current->thread.io_pl = new_io_pl; 2.34 + 2.35 + /* Force the change at ring 0. */ 2.36 + op.cmd = PHYSDEVOP_SET_IOPL; 2.37 + op.u.set_iopl.iopl = new_io_pl; 2.38 + HYPERVISOR_physdev_op(&op); 2.39 + 2.40 + return 0; 2.41 +} 2.42 + 2.43 +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 2.44 +{ 2.45 + return turn_on ? sys_iopl(3) : 0; 2.46 +}
3.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c Wed Mar 23 09:08:07 2005 +0000 3.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/process.c Wed Mar 23 13:35:24 2005 +0000 3.3 @@ -44,7 +44,7 @@ 3.4 #include <asm/desc.h> 3.5 #include <asm/mmu_context.h> 3.6 #include <asm/multicall.h> 3.7 -#include <asm-xen/xen-public/dom0_ops.h> 3.8 +#include <asm-xen/xen-public/physdev.h> 3.9 3.10 #include <linux/irq.h> 3.11 3.12 @@ -304,6 +304,7 @@ void dump_thread(struct pt_regs * regs, 3.13 void fastcall __switch_to(struct task_struct *prev_p, struct task_struct *next_p) 3.14 { 3.15 struct thread_struct *next = &next_p->thread; 3.16 + physdev_op_t op; 3.17 3.18 __cli(); 3.19 3.20 @@ -335,14 +336,12 @@ void fastcall __switch_to(struct task_st 3.21 } 3.22 3.23 queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0); 3.24 - if ( xen_start_info.flags & SIF_PRIVILEGED ) 3.25 + 3.26 + if ( prev_p->thread.io_pl != next->io_pl ) 3.27 { 3.28 - dom0_op_t op; 3.29 - op.cmd = DOM0_IOPL; 3.30 - op.u.iopl.domain = DOMID_SELF; 3.31 - op.u.iopl.iopl = next->io_pl; 3.32 - op.interface_version = DOM0_INTERFACE_VERSION; 3.33 - queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op); 3.34 + op.cmd = PHYSDEVOP_SET_IOPL; 3.35 + op.u.set_iopl.iopl = next->io_pl; 3.36 + queue_multicall1(__HYPERVISOR_physdev_op, (unsigned long)&op); 3.37 } 3.38 3.39 /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */
4.1 --- a/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c Wed Mar 23 09:08:07 2005 +0000 4.2 +++ b/linux-2.4.29-xen-sparse/arch/xen/kernel/setup.c Wed Mar 23 13:35:24 2005 +0000 4.3 @@ -48,7 +48,7 @@ static int errno; 4.4 #include <asm/mmu_context.h> 4.5 #include <asm/ctrl_if.h> 4.6 #include <asm/hypervisor.h> 4.7 -#include <asm-xen/xen-public/dom0_ops.h> 4.8 +#include <asm-xen/xen-public/physdev.h> 4.9 #include <linux/netdevice.h> 4.10 #include <linux/rtnetlink.h> 4.11 #include <linux/tqueue.h> 4.12 @@ -206,6 +206,7 @@ void __init setup_arch(char **cmdline_p) 4.13 unsigned long bootmap_size, start_pfn, lmax_low_pfn; 4.14 int mem_param; /* user specified memory size in pages */ 4.15 int boot_pfn; /* low pages available for bootmem */ 4.16 + physdev_op_t op; 4.17 4.18 extern void hypervisor_callback(void); 4.19 extern void failsafe_callback(void); 4.20 @@ -416,17 +417,9 @@ void __init setup_arch(char **cmdline_p) 4.21 HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = 4.22 virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; 4.23 4.24 - /* If we are a privileged guest OS then we should request IO privileges. */ 4.25 - if ( xen_start_info.flags & SIF_PRIVILEGED ) 4.26 - { 4.27 - dom0_op_t op; 4.28 - op.cmd = DOM0_IOPL; 4.29 - op.u.iopl.domain = DOMID_SELF; 4.30 - op.u.iopl.iopl = 1; 4.31 - if( HYPERVISOR_dom0_op(&op) != 0 ) 4.32 - panic("Unable to obtain IOPL, despite being SIF_PRIVILEGED"); 4.33 - current->thread.io_pl = 1; 4.34 - } 4.35 + op.cmd = PHYSDEVOP_SET_IOPL; 4.36 + op.u.set_iopl.iopl = current->thread.io_pl = 1; 4.37 + HYPERVISOR_physdev_op(&op); 4.38 4.39 if (xen_start_info.flags & SIF_INITDOMAIN ) 4.40 {
5.1 --- a/linux-2.4.29-xen-sparse/mkbuildtree Wed Mar 23 09:08:07 2005 +0000 5.2 +++ b/linux-2.4.29-xen-sparse/mkbuildtree Wed Mar 23 13:35:24 2005 +0000 5.3 @@ -232,7 +232,6 @@ ln -sf ../../../${LINUX_26}/arch/xen/ker 5.4 ln -sf ../../../${LINUX_26}/arch/xen/kernel/gnttab.c 5.5 ln -sf ../../../${LINUX_26}/arch/xen/kernel/reboot.c 5.6 ln -sf ../../../${LINUX_26}/arch/xen/kernel/skbuff.c 5.7 -ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/ioport.c 5.8 ln -sf ../../../${LINUX_26}/arch/xen/i386/kernel/pci-dma.c 5.9 5.10 cd ${AD}/arch/xen/lib
6.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c Wed Mar 23 09:08:07 2005 +0000 6.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c Wed Mar 23 13:35:24 2005 +0000 6.3 @@ -1,19 +1,110 @@ 6.4 +/* 6.5 + * linux/arch/i386/kernel/ioport.c 6.6 + * 6.7 + * This contains the io-permission bitmap code - written by obz, with changes 6.8 + * by Linus. 6.9 + */ 6.10 + 6.11 #include <linux/sched.h> 6.12 #include <linux/kernel.h> 6.13 #include <linux/errno.h> 6.14 #include <linux/types.h> 6.15 #include <linux/ioport.h> 6.16 -#include <linux/mm.h> 6.17 #include <linux/smp.h> 6.18 #include <linux/smp_lock.h> 6.19 #include <linux/stddef.h> 6.20 #include <linux/slab.h> 6.21 -#include <asm-xen/xen-public/dom0_ops.h> 6.22 +#include <linux/thread_info.h> 6.23 +#include <asm-xen/xen-public/physdev.h> 6.24 + 6.25 +/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ 6.26 +static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) 6.27 +{ 6.28 + unsigned long mask; 6.29 + unsigned long *bitmap_base = bitmap + (base / BITS_PER_LONG); 6.30 + unsigned int low_index = base & (BITS_PER_LONG-1); 6.31 + int length = low_index + extent; 6.32 + 6.33 + if (low_index != 0) { 6.34 + mask = (~0UL << low_index); 6.35 + if (length < BITS_PER_LONG) 6.36 + mask &= ~(~0UL << length); 6.37 + if (new_value) 6.38 + *bitmap_base++ |= mask; 6.39 + else 6.40 + *bitmap_base++ &= ~mask; 6.41 + length -= BITS_PER_LONG; 6.42 + } 6.43 + 6.44 + mask = (new_value ? ~0UL : 0UL); 6.45 + while (length >= BITS_PER_LONG) { 6.46 + *bitmap_base++ = mask; 6.47 + length -= BITS_PER_LONG; 6.48 + } 6.49 + 6.50 + if (length > 0) { 6.51 + mask = ~(~0UL << length); 6.52 + if (new_value) 6.53 + *bitmap_base++ |= mask; 6.54 + else 6.55 + *bitmap_base++ &= ~mask; 6.56 + } 6.57 +} 6.58 + 6.59 + 6.60 +/* 6.61 + * this changes the io permissions bitmap in the current task. 6.62 + */ 6.63 +asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 6.64 +{ 6.65 + struct thread_struct * t = ¤t->thread; 6.66 + unsigned long *bitmap; 6.67 + physdev_op_t op; 6.68 + 6.69 + if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) 6.70 + return -EINVAL; 6.71 + if (turn_on && !capable(CAP_SYS_RAWIO)) 6.72 + return -EPERM; 6.73 + 6.74 + /* 6.75 + * If it's the first ioperm() call in this thread's lifetime, set the 6.76 + * IO bitmap up. ioperm() is much less timing critical than clone(), 6.77 + * this is why we delay this operation until now: 6.78 + */ 6.79 + if (!t->io_bitmap_ptr) { 6.80 + bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); 6.81 + if (!bitmap) 6.82 + return -ENOMEM; 6.83 + 6.84 + memset(bitmap, 0xff, IO_BITMAP_BYTES); 6.85 + t->io_bitmap_ptr = bitmap; 6.86 + 6.87 + op.cmd = PHYSDEVOP_SET_IOBITMAP; 6.88 + op.u.set_iobitmap.bitmap = (unsigned long)bitmap; 6.89 + op.u.set_iobitmap.nr_ports = IO_BITMAP_BITS; 6.90 + HYPERVISOR_physdev_op(&op); 6.91 + } 6.92 + 6.93 + set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); 6.94 + 6.95 + return 0; 6.96 +} 6.97 + 6.98 +/* 6.99 + * sys_iopl has to be used when you want to access the IO ports 6.100 + * beyond the 0x3ff range: to get the full 65536 ports bitmapped 6.101 + * you'd need 8kB of bitmaps/process, which is a bit excessive. 6.102 + * 6.103 + * Here we just change the eflags value on the stack: we allow 6.104 + * only the super-user to do it. This depends on the stack-layout 6.105 + * on system-call entry - see also fork() and the signal handling 6.106 + * code. 6.107 + */ 6.108 6.109 asmlinkage long sys_iopl(unsigned int new_io_pl) 6.110 { 6.111 unsigned int old_io_pl = current->thread.io_pl; 6.112 - dom0_op_t op; 6.113 + physdev_op_t op; 6.114 6.115 if (new_io_pl > 3) 6.116 return -EINVAL; 6.117 @@ -22,9 +113,6 @@ asmlinkage long sys_iopl(unsigned int ne 6.118 if ((new_io_pl > old_io_pl) && !capable(CAP_SYS_RAWIO)) 6.119 return -EPERM; 6.120 6.121 - if (!(xen_start_info.flags & SIF_PRIVILEGED)) 6.122 - return -EPERM; 6.123 - 6.124 /* Maintain OS privileges even if user attempts to relinquish them. */ 6.125 if (new_io_pl == 0) 6.126 new_io_pl = 1; 6.127 @@ -33,19 +121,9 @@ asmlinkage long sys_iopl(unsigned int ne 6.128 current->thread.io_pl = new_io_pl; 6.129 6.130 /* Force the change at ring 0. */ 6.131 - op.cmd = DOM0_IOPL; 6.132 - op.u.iopl.domain = DOMID_SELF; 6.133 - op.u.iopl.iopl = new_io_pl; 6.134 - HYPERVISOR_dom0_op(&op); 6.135 + op.cmd = PHYSDEVOP_SET_IOPL; 6.136 + op.u.set_iopl.iopl = new_io_pl; 6.137 + HYPERVISOR_physdev_op(&op); 6.138 6.139 return 0; 6.140 } 6.141 - 6.142 -asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) 6.143 -{ 6.144 -#if 0 6.145 - printk(KERN_INFO "ioperm not fully supported - %s\n", 6.146 - turn_on ? "set iopl to 3" : "ignore resource release"); 6.147 -#endif 6.148 - return turn_on ? sys_iopl(3) : 0; 6.149 -}
7.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c Wed Mar 23 09:08:07 2005 +0000 7.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c Wed Mar 23 13:35:24 2005 +0000 7.3 @@ -47,7 +47,7 @@ 7.4 #include <asm/irq.h> 7.5 #include <asm/desc.h> 7.6 #include <asm-xen/multicall.h> 7.7 -#include <asm-xen/xen-public/dom0_ops.h> 7.8 +#include <asm-xen/xen-public/physdev.h> 7.9 #ifdef CONFIG_MATH_EMULATION 7.10 #include <asm/math_emu.h> 7.11 #endif 7.12 @@ -228,20 +228,11 @@ void exit_thread(void) 7.13 7.14 /* The process may have allocated an io port bitmap... nuke it. */ 7.15 if (unlikely(NULL != t->io_bitmap_ptr)) { 7.16 - int cpu = get_cpu(); 7.17 - struct tss_struct *tss = &per_cpu(init_tss, cpu); 7.18 - 7.19 + physdev_op_t op = { 0 }; 7.20 + op.cmd = PHYSDEVOP_SET_IOBITMAP; 7.21 + HYPERVISOR_physdev_op(&op); 7.22 kfree(t->io_bitmap_ptr); 7.23 t->io_bitmap_ptr = NULL; 7.24 - /* 7.25 - * Careful, clear this in the TSS too: 7.26 - */ 7.27 - memset(tss->io_bitmap, 0xff, tss->io_bitmap_max); 7.28 - t->io_bitmap_max = 0; 7.29 - tss->io_bitmap_owner = NULL; 7.30 - tss->io_bitmap_max = 0; 7.31 - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 7.32 - put_cpu(); 7.33 } 7.34 } 7.35 7.36 @@ -412,37 +403,6 @@ int dump_task_regs(struct task_struct *t 7.37 return 1; 7.38 } 7.39 7.40 -static inline void 7.41 -handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) 7.42 -{ 7.43 - if (!next->io_bitmap_ptr) { 7.44 - /* 7.45 - * Disable the bitmap via an invalid offset. We still cache 7.46 - * the previous bitmap owner and the IO bitmap contents: 7.47 - */ 7.48 - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; 7.49 - return; 7.50 - } 7.51 - if (likely(next == tss->io_bitmap_owner)) { 7.52 - /* 7.53 - * Previous owner of the bitmap (hence the bitmap content) 7.54 - * matches the next task, we dont have to do anything but 7.55 - * to set a valid offset in the TSS: 7.56 - */ 7.57 - tss->io_bitmap_base = IO_BITMAP_OFFSET; 7.58 - return; 7.59 - } 7.60 - /* 7.61 - * Lazy TSS's I/O bitmap copy. We set an invalid offset here 7.62 - * and we let the task to get a GPF in case an I/O instruction 7.63 - * is performed. The handler of the GPF will verify that the 7.64 - * faulting task has a valid I/O bitmap and, it true, does the 7.65 - * real copy and restart the instruction. This will save us 7.66 - * redundant copies when the currently switched task does not 7.67 - * perform any I/O during its timeslice. 7.68 - */ 7.69 - tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; 7.70 -} 7.71 /* 7.72 * This special macro can be used to load a debugging register 7.73 */ 7.74 @@ -483,7 +443,7 @@ struct task_struct fastcall * __switch_t 7.75 *next = &next_p->thread; 7.76 int cpu = smp_processor_id(); 7.77 struct tss_struct *tss = &per_cpu(init_tss, cpu); 7.78 - dom0_op_t op; 7.79 + physdev_op_t iopl_op, iobmp_op; 7.80 7.81 /* NB. No need to disable interrupts as already done in sched.c */ 7.82 /* __cli(); */ 7.83 @@ -540,12 +500,22 @@ struct task_struct fastcall * __switch_t 7.84 C(0); C(1); C(2); 7.85 #undef C 7.86 7.87 - if (xen_start_info.flags & SIF_PRIVILEGED) { 7.88 - op.cmd = DOM0_IOPL; 7.89 - op.u.iopl.domain = DOMID_SELF; 7.90 - op.u.iopl.iopl = next->io_pl; 7.91 - op.interface_version = DOM0_INTERFACE_VERSION; 7.92 - queue_multicall1(__HYPERVISOR_dom0_op, (unsigned long)&op); 7.93 + if (unlikely(prev->io_pl != next->io_pl)) { 7.94 + iopl_op.cmd = PHYSDEVOP_SET_IOPL; 7.95 + iopl_op.u.set_iopl.iopl = next->io_pl; 7.96 + queue_multicall1(__HYPERVISOR_physdev_op, 7.97 + (unsigned long)&iopl_op); 7.98 + } 7.99 + 7.100 + if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) { 7.101 + iobmp_op.cmd = 7.102 + PHYSDEVOP_SET_IOBITMAP; 7.103 + iobmp_op.u.set_iobitmap.bitmap = 7.104 + (unsigned long)next->io_bitmap_ptr; 7.105 + iobmp_op.u.set_iobitmap.nr_ports = 7.106 + next->io_bitmap_ptr ? IO_BITMAP_BITS : 0; 7.107 + queue_multicall1(__HYPERVISOR_physdev_op, 7.108 + (unsigned long)&iobmp_op); 7.109 } 7.110 7.111 /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ 7.112 @@ -573,9 +543,6 @@ struct task_struct fastcall * __switch_t 7.113 loaddebug(next, 7); 7.114 } 7.115 7.116 - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) 7.117 - handle_io_bitmap(next, tss); 7.118 - 7.119 return prev_p; 7.120 } 7.121
8.1 --- a/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c Wed Mar 23 09:08:07 2005 +0000 8.2 +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c Wed Mar 23 13:35:24 2005 +0000 8.3 @@ -52,6 +52,7 @@ 8.4 #include <asm/ist.h> 8.5 #include <asm/io.h> 8.6 #include <asm-xen/hypervisor.h> 8.7 +#include <asm-xen/xen-public/physdev.h> 8.8 #include "setup_arch_pre.h" 8.9 #include <bios_ebda.h> 8.10 8.11 @@ -1399,9 +1400,9 @@ static void set_mca_bus(int x) { } 8.12 */ 8.13 void __init setup_arch(char **cmdline_p) 8.14 { 8.15 - int i,j; 8.16 - 8.17 - unsigned long max_low_pfn; 8.18 + int i,j; 8.19 + physdev_op_t op; 8.20 + unsigned long max_low_pfn; 8.21 8.22 /* Force a quick death if the kernel panics. */ 8.23 extern int panic_timeout; 8.24 @@ -1585,16 +1586,9 @@ void __init setup_arch(char **cmdline_p) 8.25 8.26 register_memory(); 8.27 8.28 - /* If we are a privileged guest OS then we should request IO privs. */ 8.29 - if (xen_start_info.flags & SIF_PRIVILEGED) { 8.30 - dom0_op_t op; 8.31 - op.cmd = DOM0_IOPL; 8.32 - op.u.iopl.domain = DOMID_SELF; 8.33 - op.u.iopl.iopl = 1; 8.34 - if (HYPERVISOR_dom0_op(&op) != 0) 8.35 - panic("Unable to obtain IOPL, despite SIF_PRIVILEGED"); 8.36 - current->thread.io_pl = 1; 8.37 - } 8.38 + op.cmd = PHYSDEVOP_SET_IOPL; 8.39 + op.u.set_iopl.iopl = current->thread.io_pl = 1; 8.40 + HYPERVISOR_physdev_op(&op); 8.41 8.42 if (xen_start_info.flags & SIF_INITDOMAIN) { 8.43 if (!(xen_start_info.flags & SIF_PRIVILEGED))
9.1 --- a/xen/arch/x86/dom0_ops.c Wed Mar 23 09:08:07 2005 +0000 9.2 +++ b/xen/arch/x86/dom0_ops.c Wed Mar 23 13:35:24 2005 +0000 9.3 @@ -136,14 +136,61 @@ long arch_do_dom0_op(dom0_op_t *op, dom0 9.4 } 9.5 break; 9.6 9.7 - case DOM0_IOPL: 9.8 + case DOM0_IOPL_PERMISSION: 9.9 { 9.10 + struct domain *d; 9.11 + 9.12 + ret = -EINVAL; 9.13 + if ( op->u.iopl_permission.max_iopl > 3 ) 9.14 + break; 9.15 + 9.16 + ret = -ESRCH; 9.17 + if ( unlikely((d = find_domain_by_id( 9.18 + op->u.iopl_permission.domain)) == NULL) ) 9.19 + break; 9.20 + 9.21 + ret = 0; 9.22 + d->arch.max_iopl = op->u.iopl_permission.max_iopl; 9.23 + 9.24 + put_domain(d); 9.25 + } 9.26 + break; 9.27 + 9.28 + case DOM0_IOPORT_PERMISSION: 9.29 + { 9.30 + struct domain *d; 9.31 + unsigned int fp = op->u.ioport_permission.first_port; 9.32 + unsigned int np = op->u.ioport_permission.nr_ports; 9.33 + unsigned int p; 9.34 + 9.35 ret = -EINVAL; 9.36 - if ( op->u.iopl.domain == DOMID_SELF ) 9.37 + if ( (fp + np) >= 65536 ) 9.38 + break; 9.39 + 9.40 + ret = -ESRCH; 9.41 + if ( unlikely((d = find_domain_by_id( 9.42 + op->u.ioport_permission.domain)) == NULL) ) 9.43 + break; 9.44 + 9.45 + ret = -ENOMEM; 9.46 + if ( d->arch.iobmp_mask != NULL ) 9.47 { 9.48 - current->arch.iopl = op->u.iopl.iopl & 3; 9.49 - ret = 0; 9.50 + if ( (d->arch.iobmp_mask = xmalloc_array( 9.51 + u8, IOBMP_BYTES)) == NULL ) 9.52 + break; 9.53 + memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES); 9.54 } 9.55 + 9.56 + ret = 0; 9.57 + for ( p = fp; p < (fp + np); p++ ) 9.58 + { 9.59 + if ( op->u.ioport_permission.allow_access ) 9.60 + clear_bit(p, d->arch.iobmp_mask); 9.61 + else 9.62 + set_bit(p, d->arch.iobmp_mask); 9.63 + } 9.64 + 9.65 + put_domain(d); 9.66 } 9.67 break; 9.68
10.1 --- a/xen/arch/x86/domain.c Wed Mar 23 09:08:07 2005 +0000 10.2 +++ b/xen/arch/x86/domain.c Wed Mar 23 13:35:24 2005 +0000 10.3 @@ -735,7 +735,6 @@ void context_switch(struct exec_domain * 10.4 { 10.5 struct tss_struct *tss = init_tss + smp_processor_id(); 10.6 execution_context_t *stack_ec = get_execution_context(); 10.7 - int i; 10.8 10.9 __cli(); 10.10 10.11 @@ -767,57 +766,33 @@ void context_switch(struct exec_domain * 10.12 loaddebug(&next_p->arch, 7); 10.13 } 10.14 10.15 - if ( VMX_DOMAIN(next_p) ) 10.16 + if ( !VMX_DOMAIN(next_p) ) 10.17 { 10.18 - write_ptbase(next_p); 10.19 - set_current(next_p); 10.20 - __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt)); 10.21 - __sti(); 10.22 - goto done; 10.23 - } 10.24 - 10.25 - SET_FAST_TRAP(&next_p->arch); 10.26 + SET_FAST_TRAP(&next_p->arch); 10.27 10.28 #ifdef __i386__ 10.29 - /* Switch the kernel ring-1 stack. */ 10.30 - tss->esp1 = next_p->arch.kernel_sp; 10.31 - tss->ss1 = next_p->arch.kernel_ss; 10.32 + /* Switch the kernel ring-1 stack. */ 10.33 + tss->esp1 = next_p->arch.kernel_sp; 10.34 + tss->ss1 = next_p->arch.kernel_ss; 10.35 #endif 10.36 + } 10.37 10.38 /* Switch page tables. */ 10.39 write_ptbase(next_p); 10.40 } 10.41 10.42 - if ( unlikely(prev_p->arch.io_bitmap != NULL) ) 10.43 - { 10.44 - for ( i = 0; i < sizeof(prev_p->arch.io_bitmap_sel) * 8; i++ ) 10.45 - if ( !test_bit(i, &prev_p->arch.io_bitmap_sel) ) 10.46 - memset(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT], 10.47 - ~0U, IOBMP_BYTES_PER_SELBIT); 10.48 - tss->bitmap = IOBMP_INVALID_OFFSET; 10.49 - } 10.50 - 10.51 - if ( unlikely(next_p->arch.io_bitmap != NULL) ) 10.52 - { 10.53 - for ( i = 0; i < sizeof(next_p->arch.io_bitmap_sel) * 8; i++ ) 10.54 - if ( !test_bit(i, &next_p->arch.io_bitmap_sel) ) 10.55 - memcpy(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT], 10.56 - &next_p->arch.io_bitmap[i * IOBMP_BYTES_PER_SELBIT], 10.57 - IOBMP_BYTES_PER_SELBIT); 10.58 - tss->bitmap = IOBMP_OFFSET; 10.59 - } 10.60 - 10.61 set_current(next_p); 10.62 10.63 - /* Switch GDT and LDT. */ 10.64 __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt)); 10.65 - load_LDT(next_p); 10.66 10.67 __sti(); 10.68 10.69 - switch_segments(stack_ec, prev_p, next_p); 10.70 + if ( !VMX_DOMAIN(next_p) ) 10.71 + { 10.72 + load_LDT(next_p); 10.73 + switch_segments(stack_ec, prev_p, next_p); 10.74 + } 10.75 10.76 - done: 10.77 /* 10.78 * We do this late on because it doesn't need to be protected by the 10.79 * schedule_lock, and because we want this to be the very last use of
11.1 --- a/xen/arch/x86/setup.c Wed Mar 23 09:08:07 2005 +0000 11.2 +++ b/xen/arch/x86/setup.c Wed Mar 23 13:35:24 2005 +0000 11.3 @@ -312,7 +312,6 @@ void __init cpu_init(void) 11.4 11.5 /* Set up and load the per-CPU TSS and LDT. */ 11.6 t->bitmap = IOBMP_INVALID_OFFSET; 11.7 - memset(t->io_bitmap, ~0, sizeof(t->io_bitmap)); 11.8 #if defined(__i386__) 11.9 t->ss0 = __HYPERVISOR_DS; 11.10 t->esp0 = get_stack_bottom();
12.1 --- a/xen/arch/x86/traps.c Wed Mar 23 09:08:07 2005 +0000 12.2 +++ b/xen/arch/x86/traps.c Wed Mar 23 13:35:24 2005 +0000 12.3 @@ -379,15 +379,50 @@ long do_fpu_taskswitch(int set) 12.4 return 0; 12.5 } 12.6 12.7 -static inline int user_io_okay( 12.8 +/* Has the guest requested sufficient permission for this I/O access? */ 12.9 +static inline int guest_io_okay( 12.10 + unsigned int port, unsigned int bytes, 12.11 + struct exec_domain *ed, struct xen_regs *regs) 12.12 +{ 12.13 + u16 x; 12.14 + if ( ed->arch.iopl >= (KERNEL_MODE(ed, regs) ? 1 : 3) ) 12.15 + return 1; 12.16 + if ( (ed->arch.iobmp_limit > (port + bytes)) && 12.17 + (__get_user(x, (u16 *)(ed->arch.iobmp+(port>>3))) == 0) && 12.18 + ((x & (((1<<bytes)-1) << (port&7))) == 0) ) 12.19 + return 1; 12.20 + return 0; 12.21 +} 12.22 + 12.23 +/* Has the administrator granted sufficient permission for this I/O access? */ 12.24 +static inline int admin_io_okay( 12.25 unsigned int port, unsigned int bytes, 12.26 struct exec_domain *ed, struct xen_regs *regs) 12.27 { 12.28 - if ( ed->arch.iopl < (KERNEL_MODE(ed, regs) ? 1 : 3) ) 12.29 - return 0; 12.30 - return 1; 12.31 + struct domain *d = ed->domain; 12.32 + u16 x; 12.33 + if ( IS_PRIV(d) || (d->arch.max_iopl >= (KERNEL_MODE(ed, regs) ? 1 : 3)) ) 12.34 + return 1; 12.35 + if ( d->arch.iobmp_mask != NULL ) 12.36 + { 12.37 + x = *(u16 *)(d->arch.iobmp_mask + (port >> 3)); 12.38 + if ( (x & (((1<<bytes)-1) << (port&7))) == 0 ) 12.39 + return 1; 12.40 + } 12.41 + return 0; 12.42 } 12.43 12.44 +/* Check admin limits. Silently fail the access if it is disallowed. */ 12.45 +#define inb_user(_p, _d, _r) (admin_io_okay(_p, 1, _d, _r) ? inb(_p) : ~0) 12.46 +#define inw_user(_p, _d, _r) (admin_io_okay(_p, 2, _d, _r) ? inw(_p) : ~0) 12.47 +#define inl_user(_p, _d, _r) (admin_io_okay(_p, 4, _d, _r) ? inl(_p) : ~0) 12.48 +#define outb_user(_v, _p, _d, _r) \ 12.49 + (admin_io_okay(_p, 1, _d, _r) ? outb(_v, _p) : ((void)0)) 12.50 +#define outw_user(_v, _p, _d, _r) \ 12.51 + (admin_io_okay(_p, 2, _d, _r) ? outw(_v, _p) : ((void)0)) 12.52 +#define outl_user(_v, _p, _d, _r) \ 12.53 + (admin_io_okay(_p, 4, _d, _r) ? outl(_v, _p) : ((void)0)) 12.54 + 12.55 #define insn_fetch(_type, _size, _ptr) \ 12.56 ({ unsigned long _x; \ 12.57 if ( get_user(_x, (_type *)eip) ) \ 12.58 @@ -450,22 +485,22 @@ static int emulate_privileged_op(struct 12.59 case 0x6c: /* INSB */ 12.60 op_bytes = 1; 12.61 case 0x6d: /* INSW/INSL */ 12.62 - if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) ) 12.63 + if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) ) 12.64 goto fail; 12.65 switch ( op_bytes ) 12.66 { 12.67 case 1: 12.68 - data = (u8)inb((u16)regs->edx); 12.69 + data = (u8)inb_user((u16)regs->edx, ed, regs); 12.70 if ( put_user((u8)data, (u8 *)regs->edi) ) 12.71 goto write_fault; 12.72 break; 12.73 case 2: 12.74 - data = (u16)inw((u16)regs->edx); 12.75 + data = (u16)inw_user((u16)regs->edx, ed, regs); 12.76 if ( put_user((u16)data, (u16 *)regs->edi) ) 12.77 goto write_fault; 12.78 break; 12.79 case 4: 12.80 - data = (u32)inl((u16)regs->edx); 12.81 + data = (u32)inl_user((u16)regs->edx, ed, regs); 12.82 if ( put_user((u32)data, (u32 *)regs->edi) ) 12.83 goto write_fault; 12.84 break; 12.85 @@ -476,24 +511,24 @@ static int emulate_privileged_op(struct 12.86 case 0x6e: /* OUTSB */ 12.87 op_bytes = 1; 12.88 case 0x6f: /* OUTSW/OUTSL */ 12.89 - if ( !user_io_okay((u16)regs->edx, op_bytes, ed, regs) ) 12.90 + if ( !guest_io_okay((u16)regs->edx, op_bytes, ed, regs) ) 12.91 goto fail; 12.92 switch ( op_bytes ) 12.93 { 12.94 case 1: 12.95 if ( get_user(data, (u8 *)regs->esi) ) 12.96 goto read_fault; 12.97 - outb((u8)data, (u16)regs->edx); 12.98 + outb_user((u8)data, (u16)regs->edx, ed, regs); 12.99 break; 12.100 case 2: 12.101 if ( get_user(data, (u16 *)regs->esi) ) 12.102 goto read_fault; 12.103 - outw((u16)data, (u16)regs->edx); 12.104 + outw_user((u16)data, (u16)regs->edx, ed, regs); 12.105 break; 12.106 case 4: 12.107 if ( get_user(data, (u32 *)regs->esi) ) 12.108 goto read_fault; 12.109 - outl((u32)data, (u16)regs->edx); 12.110 + outl_user((u32)data, (u16)regs->edx, ed, regs); 12.111 break; 12.112 } 12.113 regs->esi += (regs->eflags & EF_DF) ? -op_bytes : op_bytes; 12.114 @@ -518,20 +553,20 @@ static int emulate_privileged_op(struct 12.115 case 0xe5: /* IN imm8,%eax */ 12.116 port = insn_fetch(u8, 1, eip); 12.117 exec_in: 12.118 - if ( !user_io_okay(port, op_bytes, ed, regs) ) 12.119 + if ( !guest_io_okay(port, op_bytes, ed, regs) ) 12.120 goto fail; 12.121 switch ( op_bytes ) 12.122 { 12.123 case 1: 12.124 regs->eax &= ~0xffUL; 12.125 - regs->eax |= (u8)inb(port); 12.126 + regs->eax |= (u8)inb_user(port, ed, regs); 12.127 break; 12.128 case 2: 12.129 regs->eax &= ~0xffffUL; 12.130 - regs->eax |= (u16)inw(port); 12.131 + regs->eax |= (u16)inw_user(port, ed, regs); 12.132 break; 12.133 case 4: 12.134 - regs->eax = (u32)inl(port); 12.135 + regs->eax = (u32)inl_user(port, ed, regs); 12.136 break; 12.137 } 12.138 goto done; 12.139 @@ -547,18 +582,18 @@ static int emulate_privileged_op(struct 12.140 case 0xe7: /* OUT %eax,imm8 */ 12.141 port = insn_fetch(u8, 1, eip); 12.142 exec_out: 12.143 - if ( !user_io_okay(port, op_bytes, ed, regs) ) 12.144 + if ( !guest_io_okay(port, op_bytes, ed, regs) ) 12.145 goto fail; 12.146 switch ( op_bytes ) 12.147 { 12.148 case 1: 12.149 - outb((u8)regs->eax, port); 12.150 + outb_user((u8)regs->eax, port, ed, regs); 12.151 break; 12.152 case 2: 12.153 - outw((u16)regs->eax, port); 12.154 + outw_user((u16)regs->eax, port, ed, regs); 12.155 break; 12.156 case 4: 12.157 - outl((u32)regs->eax, port); 12.158 + outl_user((u32)regs->eax, port, ed, regs); 12.159 break; 12.160 } 12.161 goto done;
13.1 --- a/xen/common/domain.c Wed Mar 23 09:08:07 2005 +0000 13.2 +++ b/xen/common/domain.c Wed Mar 23 13:35:24 2005 +0000 13.3 @@ -129,6 +129,12 @@ struct domain *find_last_domain(void) 13.4 } 13.5 13.6 13.7 +#ifndef CONFIG_IA64 13.8 +extern void physdev_destroy_state(struct domain *d); 13.9 +#else 13.10 +#define physdev_destroy_state(_d) ((void)0) 13.11 +#endif 13.12 + 13.13 void domain_kill(struct domain *d) 13.14 { 13.15 struct exec_domain *ed; 13.16 @@ -139,6 +145,7 @@ void domain_kill(struct domain *d) 13.17 for_each_exec_domain(d, ed) 13.18 sched_rem_domain(ed); 13.19 domain_relinquish_memory(d); 13.20 + physdev_destroy_state(d); 13.21 put_domain(d); 13.22 } 13.23 }
14.1 --- a/xen/common/physdev.c Wed Mar 23 09:08:07 2005 +0000 14.2 +++ b/xen/common/physdev.c Wed Mar 23 13:35:24 2005 +0000 14.3 @@ -69,26 +69,47 @@ typedef struct _phys_dev_st { 14.4 14.5 14.6 /* Find a device on a per-domain device list. */ 14.7 -static phys_dev_t *find_pdev(struct domain *p, struct pci_dev *dev) 14.8 +static phys_dev_t *find_pdev(struct domain *d, struct pci_dev *dev) 14.9 { 14.10 - phys_dev_t *t, *res = NULL; 14.11 + phys_dev_t *t; 14.12 + list_for_each_entry ( t, &d->pcidev_list, node ) 14.13 + if ( dev == t->dev ) 14.14 + return t; 14.15 + return NULL; 14.16 +} 14.17 + 14.18 +static int setup_ioport_memory_access(struct domain *d, struct pci_dev *pdev) 14.19 +{ 14.20 + struct resource *r; 14.21 + int i, j; 14.22 14.23 - list_for_each_entry ( t, &p->pcidev_list, node ) 14.24 + if ( d->arch.iobmp_mask == NULL ) 14.25 { 14.26 - if ( dev == t->dev ) 14.27 + if ( (d->arch.iobmp_mask = xmalloc_array(u8, IOBMP_BYTES)) == NULL ) 14.28 + return -ENOMEM; 14.29 + memset(d->arch.iobmp_mask, 0xFF, IOBMP_BYTES); 14.30 + } 14.31 + 14.32 + for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ ) 14.33 + { 14.34 + r = &pdev->resource[i]; 14.35 + if ( r->flags & IORESOURCE_IO ) 14.36 { 14.37 - res = t; 14.38 - break; 14.39 + INFO("Giving domain %u IO resources (%lx - %lx) " 14.40 + "for device %s\n", d->id, r->start, r->end, pdev->slot_name); 14.41 + for ( j = r->start; j < r->end + 1; j++ ) 14.42 + clear_bit(j, d->arch.iobmp_mask); 14.43 } 14.44 } 14.45 - return res; 14.46 + 14.47 + return 0; 14.48 } 14.49 14.50 /* Add a device to a per-domain device-access list. */ 14.51 -static int add_dev_to_task(struct domain *p, struct pci_dev *dev, 14.52 - int acc) 14.53 +static int add_dev_to_task(struct domain *d, struct pci_dev *dev, int acc) 14.54 { 14.55 phys_dev_t *physdev; 14.56 + int rc; 14.57 14.58 if ( (physdev = xmalloc(phys_dev_t)) == NULL ) 14.59 { 14.60 @@ -96,104 +117,56 @@ static int add_dev_to_task(struct domain 14.61 return -ENOMEM; 14.62 } 14.63 14.64 + if ( (rc = setup_ioport_memory_access(d, dev)) < 0 ) 14.65 + { 14.66 + xfree(physdev); 14.67 + return rc; 14.68 + } 14.69 + 14.70 physdev->dev = dev; 14.71 physdev->flags = acc; 14.72 physdev->state = 0; 14.73 - list_add(&physdev->node, &p->pcidev_list); 14.74 + list_add(&physdev->node, &d->pcidev_list); 14.75 14.76 if ( acc == ACC_WRITE ) 14.77 - physdev->owner = p; 14.78 + physdev->owner = d; 14.79 14.80 return 0; 14.81 } 14.82 14.83 -/* Remove a device from a per-domain device-access list. */ 14.84 -static void remove_dev_from_task(struct domain *p, struct pci_dev *dev) 14.85 -{ 14.86 - phys_dev_t *physdev = find_pdev(p, dev); 14.87 - 14.88 - if ( physdev == NULL ) 14.89 - BUG(); 14.90 - 14.91 - list_del(&physdev->node); 14.92 - 14.93 - xfree(physdev); 14.94 -} 14.95 - 14.96 -static int setup_ioport_memory_access(domid_t dom, struct domain* p, 14.97 - struct exec_domain* ed, 14.98 - struct pci_dev *pdev) 14.99 +void physdev_destroy_state(struct domain *d) 14.100 { 14.101 - struct exec_domain* edc; 14.102 - int i, j; 14.103 - 14.104 - /* Now, setup access to the IO ports and memory regions for the device. */ 14.105 - if ( ed->arch.io_bitmap == NULL ) 14.106 - { 14.107 - if ( (ed->arch.io_bitmap = xmalloc_array(u8, IOBMP_BYTES)) == NULL ) 14.108 - return -ENOMEM; 14.109 + struct list_head *ent; 14.110 14.111 - memset(ed->arch.io_bitmap, 0xFF, IOBMP_BYTES); 14.112 - 14.113 - ed->arch.io_bitmap_sel = ~0ULL; 14.114 - 14.115 - for_each_exec_domain(p, edc) { 14.116 - if (edc == ed) 14.117 - continue; 14.118 - edc->arch.io_bitmap = ed->arch.io_bitmap; 14.119 - } 14.120 + if ( d->arch.iobmp_mask != NULL ) 14.121 + { 14.122 + xfree(d->arch.iobmp_mask); 14.123 + d->arch.iobmp_mask = NULL; 14.124 } 14.125 14.126 - for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ ) 14.127 + while ( (ent = d->pcidev_list.next) != &d->pcidev_list ) 14.128 { 14.129 - struct resource *r = &pdev->resource[i]; 14.130 - 14.131 - if ( r->flags & IORESOURCE_IO ) 14.132 - { 14.133 - /* Give the domain access to the IO ports it needs. Currently, 14.134 - * this will allow all processes in that domain access to those 14.135 - * ports as well. This will do for now, since driver domains don't 14.136 - * run untrusted processes! */ 14.137 - INFO("Giving domain %u IO resources (%lx - %lx) " 14.138 - "for device %s\n", dom, r->start, r->end, pdev->slot_name); 14.139 - for ( j = r->start; j < r->end + 1; j++ ) 14.140 - { 14.141 - clear_bit(j, ed->arch.io_bitmap); 14.142 - clear_bit(j / IOBMP_BITS_PER_SELBIT, &ed->arch.io_bitmap_sel); 14.143 - } 14.144 - } 14.145 - /* rights to IO memory regions are checked when the domain maps them */ 14.146 + list_del(ent); 14.147 + xfree(list_entry(ent, phys_dev_t, node)); 14.148 } 14.149 - 14.150 - for_each_exec_domain(p, edc) { 14.151 - if (edc == ed) 14.152 - continue; 14.153 - edc->arch.io_bitmap_sel = ed->arch.io_bitmap_sel; 14.154 - } 14.155 - 14.156 - return 0; 14.157 } 14.158 14.159 /* 14.160 * physdev_pci_access_modify: 14.161 * Allow/disallow access to a specific PCI device. Guests should not be 14.162 * allowed to see bridge devices as it needlessly complicates things (one 14.163 - * possible exception to this is the AGP bridge). If the given device is a 14.164 - * bridge, then the domain should get access to all the leaf devices below 14.165 - * that bridge (XXX this is unimplemented!). 14.166 + * possible exception to this is the AGP bridge). 14.167 */ 14.168 int physdev_pci_access_modify(domid_t dom, int bus, int dev, int func, 14.169 int enable) 14.170 { 14.171 struct domain *p; 14.172 - struct exec_domain *ed; 14.173 struct pci_dev *pdev; 14.174 phys_dev_t *physdev; 14.175 int rc = 0; 14.176 - int oldacc = -1, allocated_physdev = 0; 14.177 + int oldacc = -1; 14.178 14.179 - if ( !IS_PRIV(current->domain) ) 14.180 - BUG(); 14.181 + BUG_ON(!IS_PRIV(current->domain)); 14.182 14.183 if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) ) 14.184 return -EINVAL; 14.185 @@ -209,8 +182,6 @@ int physdev_pci_access_modify(domid_t do 14.186 if ( (p = find_domain_by_id(dom)) == NULL ) 14.187 return -ESRCH; 14.188 14.189 - ed = p->exec_domain[0]; /* XXX */ 14.190 - 14.191 /* Make the domain privileged. */ 14.192 set_bit(DF_PHYSDEV, &p->d_flags); 14.193 /* FIXME: MAW for now make the domain REALLY privileged so that it 14.194 @@ -222,47 +193,23 @@ int physdev_pci_access_modify(domid_t do 14.195 { 14.196 INFO(" dev does not exist\n"); 14.197 rc = -ENODEV; 14.198 - goto clear_privilege; 14.199 + goto out; 14.200 } 14.201 14.202 - if ( (physdev = find_pdev(p, pdev)) != NULL) { 14.203 - /* Sevice already on list: update access permissions. */ 14.204 - oldacc = physdev->flags; 14.205 - physdev->flags = ACC_WRITE; 14.206 - } else { 14.207 - if ( (rc = add_dev_to_task(p, pdev, ACC_WRITE)) < 0) 14.208 - goto clear_privilege; 14.209 - allocated_physdev = 1; 14.210 - } 14.211 - 14.212 INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number, 14.213 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); 14.214 14.215 - /* Is the device a bridge or cardbus? */ 14.216 - if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL ) { 14.217 - INFO("XXX can't give access to bridge devices yet\n"); 14.218 - rc = -EPERM; 14.219 - goto remove_dev; 14.220 + if ( (physdev = find_pdev(p, pdev)) != NULL ) 14.221 + { 14.222 + oldacc = physdev->flags; 14.223 + physdev->flags = ACC_WRITE; 14.224 + } 14.225 + else 14.226 + { 14.227 + rc = add_dev_to_task(p, pdev, ACC_WRITE); 14.228 } 14.229 14.230 - if ( (rc = setup_ioport_memory_access(dom, p, ed, pdev)) < 0 ) 14.231 - goto remove_dev; 14.232 - 14.233 - put_domain(p); 14.234 - return rc; 14.235 - 14.236 -remove_dev: 14.237 - if (allocated_physdev) { 14.238 - /* new device was added - remove it from the list */ 14.239 - remove_dev_from_task(p, pdev); 14.240 - } else { 14.241 - /* device already existed - just undo the access changes */ 14.242 - physdev->flags = oldacc; 14.243 - } 14.244 - 14.245 -clear_privilege: 14.246 - clear_bit(DF_PHYSDEV, &p->d_flags); 14.247 - clear_bit(DF_PRIVILEGED, &p->d_flags); 14.248 + out: 14.249 put_domain(p); 14.250 return rc; 14.251 } 14.252 @@ -308,9 +255,8 @@ int domain_iomem_in_pfn(struct domain *p 14.253 } 14.254 14.255 /* check if a domain has general access to a device */ 14.256 -inline static int check_dev_acc (struct domain *p, 14.257 - int bus, int dev, int func, 14.258 - phys_dev_t **pdev) 14.259 +static inline int check_dev_acc( 14.260 + struct domain *d, int bus, int dev, int func, phys_dev_t **pdev) 14.261 { 14.262 struct pci_dev *target_dev; 14.263 phys_dev_t *target_pdev; 14.264 @@ -318,10 +264,10 @@ inline static int check_dev_acc (struct 14.265 14.266 *pdev = NULL; 14.267 14.268 - if ( !IS_CAPABLE_PHYSDEV(p) ) 14.269 - return -EPERM; /* no pci access permission */ 14.270 + if ( !IS_CAPABLE_PHYSDEV(d) ) 14.271 + return -EPERM; 14.272 14.273 - if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX ) 14.274 + if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) ) 14.275 return -EINVAL; 14.276 14.277 VERBOSE_INFO("b=%x d=%x f=%x ", bus, dev, func); 14.278 @@ -336,7 +282,7 @@ inline static int check_dev_acc (struct 14.279 } 14.280 14.281 /* check access */ 14.282 - target_pdev = find_pdev(p, target_dev); 14.283 + target_pdev = find_pdev(d, target_dev); 14.284 if ( !target_pdev ) 14.285 { 14.286 VERBOSE_INFO("dom has no access to target\n"); 14.287 @@ -748,6 +694,24 @@ long do_physdev_op(physdev_op_t *uop) 14.288 ret = 0; 14.289 break; 14.290 14.291 + case PHYSDEVOP_SET_IOPL: 14.292 + ret = -EINVAL; 14.293 + if ( op.u.set_iopl.iopl > 3 ) 14.294 + break; 14.295 + ret = 0; 14.296 + current->arch.iopl = op.u.set_iopl.iopl; 14.297 + break; 14.298 + 14.299 + case PHYSDEVOP_SET_IOBITMAP: 14.300 + ret = -EINVAL; 14.301 + if ( !access_ok(VERIFY_READ, op.u.set_iobitmap.bitmap, IOBMP_BYTES) || 14.302 + (op.u.set_iobitmap.nr_ports > 65536) ) 14.303 + break; 14.304 + ret = 0; 14.305 + current->arch.iobmp = (u8 *)op.u.set_iobitmap.bitmap; 14.306 + current->arch.iobmp_limit = op.u.set_iobitmap.nr_ports; 14.307 + break; 14.308 + 14.309 default: 14.310 ret = -EINVAL; 14.311 break;
15.1 --- a/xen/include/asm-x86/domain.h Wed Mar 23 09:08:07 2005 +0000 15.2 +++ b/xen/include/asm-x86/domain.h Wed Mar 23 13:35:24 2005 +0000 15.3 @@ -18,6 +18,10 @@ struct arch_domain 15.4 l3_pgentry_t *mm_perdomain_l3; 15.5 #endif 15.6 15.7 + /* I/O-port access bitmap mask. */ 15.8 + u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */ 15.9 + int max_iopl; /* Maximum achievable IOPL. */ 15.10 + 15.11 /* shadow mode status and controls */ 15.12 unsigned int shadow_mode; /* flags to control shadow table operation */ 15.13 spinlock_t shadow_lock; 15.14 @@ -57,7 +61,6 @@ struct arch_exec_domain 15.15 15.16 /* general user-visible register state */ 15.17 execution_context_t user_ctxt; 15.18 - unsigned int iopl; 15.19 15.20 void (*schedule_tail) (struct exec_domain *); 15.21 15.22 @@ -81,9 +84,9 @@ struct arch_exec_domain 15.23 struct trap_bounce trap_bounce; 15.24 15.25 /* I/O-port access bitmap. */ 15.26 - u64 io_bitmap_sel; /* Selector to tell us which part of the IO bitmap are 15.27 - * "interesting" (i.e. have clear bits) */ 15.28 - u8 *io_bitmap; /* Pointer to task's IO bitmap or NULL */ 15.29 + u8 *iobmp; /* Guest kernel virtual address of the bitmap. */ 15.30 + int iobmp_limit; /* Number of ports represented in the bitmap. */ 15.31 + int iopl; /* Current IOPL for this VCPU. */ 15.32 15.33 /* Trap info. */ 15.34 #ifdef ARCH_HAS_FAST_TRAP
16.1 --- a/xen/include/asm-x86/processor.h Wed Mar 23 09:08:07 2005 +0000 16.2 +++ b/xen/include/asm-x86/processor.h Wed Mar 23 13:35:24 2005 +0000 16.3 @@ -328,9 +328,6 @@ static inline void clear_in_cr4 (unsigne 16.4 } while (0) 16.5 16.6 #define IOBMP_BYTES 8192 16.7 -#define IOBMP_BYTES_PER_SELBIT (IOBMP_BYTES / 64) 16.8 -#define IOBMP_BITS_PER_SELBIT (IOBMP_BYTES_PER_SELBIT * 8) 16.9 -#define IOBMP_OFFSET offsetof(struct tss_struct, io_bitmap) 16.10 #define IOBMP_INVALID_OFFSET 0x8000 16.11 16.12 struct i387_state { 16.13 @@ -372,9 +369,8 @@ struct tss_struct { 16.14 u16 trace; 16.15 #endif 16.16 u16 bitmap; 16.17 - u8 io_bitmap[IOBMP_BYTES+1]; 16.18 - /* Pads the TSS to be cacheline-aligned (total size is 0x2080). */ 16.19 - u8 __cacheline_filler[23]; 16.20 + /* Pads the TSS to be cacheline-aligned (total size is 0x80). */ 16.21 + u8 __cacheline_filler[24]; 16.22 } __cacheline_aligned PACKED; 16.23 16.24 #define IDT_ENTRIES 256
17.1 --- a/xen/include/public/dom0_ops.h Wed Mar 23 09:08:07 2005 +0000 17.2 +++ b/xen/include/public/dom0_ops.h Wed Mar 23 13:35:24 2005 +0000 17.3 @@ -19,7 +19,7 @@ 17.4 * This makes sure that old versions of dom0 tools will stop working in a 17.5 * well-defined way (rather than crashing the machine, for instance). 17.6 */ 17.7 -#define DOM0_INTERFACE_VERSION 0xAAAA1001 17.8 +#define DOM0_INTERFACE_VERSION 0xAAAA1002 17.9 17.10 /************************************************************************/ 17.11 17.12 @@ -120,13 +120,6 @@ typedef struct { 17.13 MEMORY_PADDING; 17.14 } PACKED dom0_setdomaininfo_t; /* 16 bytes */ 17.15 17.16 -#define DOM0_IOPL 14 17.17 -typedef struct { 17.18 - domid_t domain; /* 0 */ 17.19 - u16 __pad; 17.20 - u32 iopl; /* 4 */ 17.21 -} PACKED dom0_iopl_t; /* 8 bytes */ 17.22 - 17.23 #define DOM0_MSR 15 17.24 typedef struct { 17.25 /* IN variables. */ 17.26 @@ -414,6 +407,20 @@ typedef struct { 17.27 u32 _pad0; 17.28 } PACKED dom0_microcode_t; /* 16 bytes */ 17.29 17.30 +#define DOM0_IOPL_PERMISSION 36 17.31 +typedef struct { 17.32 + domid_t domain; /* 0: domain to be affected */ 17.33 + u16 max_iopl; /* 2: new effective IOPL limit */ 17.34 +} PACKED dom0_iopl_permission_t; /* 4 bytes */ 17.35 + 17.36 +#define DOM0_IOPORT_PERMISSION 37 17.37 +typedef struct { 17.38 + domid_t domain; /* 0: domain to be affected */ 17.39 + u16 first_port; /* 2: first port int range */ 17.40 + u16 nr_ports; /* 4: size of port range */ 17.41 + u16 allow_access; /* 6: allow or deny access to range? */ 17.42 +} PACKED dom0_ioport_permission_t; /* 8 bytes */ 17.43 + 17.44 typedef struct { 17.45 u32 cmd; /* 0 */ 17.46 u32 interface_version; /* 4 */ /* DOM0_INTERFACE_VERSION */ 17.47 @@ -429,7 +436,6 @@ typedef struct { 17.48 dom0_setdomaininfo_t setdomaininfo; 17.49 dom0_getdomaininfo_t getdomaininfo; 17.50 dom0_getpageframeinfo_t getpageframeinfo; 17.51 - dom0_iopl_t iopl; 17.52 dom0_msr_t msr; 17.53 dom0_debug_t debug; 17.54 dom0_settime_t settime; 17.55 @@ -449,6 +455,8 @@ typedef struct { 17.56 dom0_read_memtype_t read_memtype; 17.57 dom0_perfccontrol_t perfccontrol; 17.58 dom0_microcode_t microcode; 17.59 + dom0_iopl_permission_t iopl_permission; 17.60 + dom0_ioport_permission_t ioport_permission; 17.61 } PACKED u; 17.62 } PACKED dom0_op_t; /* 80 bytes */ 17.63
18.1 --- a/xen/include/public/physdev.h Wed Mar 23 09:08:07 2005 +0000 18.2 +++ b/xen/include/public/physdev.h Wed Mar 23 13:35:24 2005 +0000 18.3 @@ -15,6 +15,8 @@ 18.4 #define PHYSDEVOP_PCI_PROBE_ROOT_BUSES 3 18.5 #define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 18.6 #define PHYSDEVOP_IRQ_STATUS_QUERY 5 18.7 +#define PHYSDEVOP_SET_IOPL 6 18.8 +#define PHYSDEVOP_SET_IOBITMAP 7 18.9 18.10 /* Read from PCI configuration space. */ 18.11 typedef struct { 18.12 @@ -62,6 +64,19 @@ typedef struct { 18.13 u32 flags; /* 4 */ 18.14 } PACKED physdevop_irq_status_query_t; /* 8 bytes */ 18.15 18.16 +typedef struct { 18.17 + /* IN */ 18.18 + u32 iopl; /* 0 */ 18.19 +} PACKED physdevop_set_iopl_t; /* 4 bytes */ 18.20 + 18.21 +typedef struct { 18.22 + /* IN */ 18.23 + memory_t bitmap; /* 0 */ 18.24 + MEMORY_PADDING; 18.25 + u32 nr_ports; /* 8 */ 18.26 + u32 __pad0; /* 12 */ 18.27 +} PACKED physdevop_set_iobitmap_t; /* 16 bytes */ 18.28 + 18.29 typedef struct _physdev_op_st 18.30 { 18.31 u32 cmd; /* 0 */ 18.32 @@ -72,6 +87,8 @@ typedef struct _physdev_op_st 18.33 physdevop_pci_initialise_device_t pci_initialise_device; 18.34 physdevop_pci_probe_root_buses_t pci_probe_root_buses; 18.35 physdevop_irq_status_query_t irq_status_query; 18.36 + physdevop_set_iopl_t set_iopl; 18.37 + physdevop_set_iobitmap_t set_iobitmap; 18.38 u8 __dummy[32]; 18.39 } PACKED u; 18.40 } PACKED physdev_op_t; /* 40 bytes */