ia64/xen-unstable
changeset 309:75d5c2fc3073
bitkeeper revision 1.131 (3e7270ecL24hQl_PjDBYoS8hhB8GTA)
multicall.h:
new file
Many files:
Multicall now available, so that Xen syscalls can be batched to amortise cost of trap to ring 0. Used by xenolinux to reduce the cost of a context switch.
multicall.h:
new file
Many files:
Multicall now available, so that Xen syscalls can be batched to amortise cost of trap to ring 0. Used by xenolinux to reduce the cost of a context switch.
line diff
1.1 --- a/.rootkeys Fri Mar 14 18:21:09 2003 +0000 1.2 +++ b/.rootkeys Sat Mar 15 00:16:44 2003 +0000 1.3 @@ -529,6 +529,7 @@ 3e5a4e67Ulv-Ll8Zp4j2GwMwQ8aAXQ xenolinux 1.4 3e5a4e673p7PEOyHFm3nHkYX6HQYBg xenolinux-2.4.21-pre4-sparse/include/asm-xeno/irq.h 1.5 3e5a4e67zoNch27qYhEBpr2k6SABOg xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu.h 1.6 3e5a4e678ddsQOpbSiRdy1GRcDc9WA xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h 1.7 +3e7270deQqtGPSnFxcW4AvJZuTUWfg xenolinux-2.4.21-pre4-sparse/include/asm-xeno/multicall.h 1.8 3e5a4e67mnQfh-R8KcQCaVo2Oho6yg xenolinux-2.4.21-pre4-sparse/include/asm-xeno/page.h 1.9 3e5a4e67uTYU5oEnIDjxuaez8njjqg xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgalloc.h 1.10 3e5a4e67X7JyupgdYkgDX19Huj2sAw xenolinux-2.4.21-pre4-sparse/include/asm-xeno/pgtable-2level.h
2.1 --- a/xen/arch/i386/entry.S Fri Mar 14 18:21:09 2003 +0000 2.2 +++ b/xen/arch/i386/entry.S Sat Mar 15 00:16:44 2003 +0000 2.3 @@ -79,6 +79,8 @@ 2.4 */ 2.5 2.6 #include <xeno/config.h> 2.7 +#include <xeno/errno.h> 2.8 +#include <hypervisor-ifs/hypervisor-if.h> 2.9 #include <asm/smp.h> 2.10 2.11 EBX = 0x00 2.12 @@ -184,6 +186,77 @@ ENTRY(ret_from_newdomain) 2.13 jmp test_all_events 2.14 2.15 ALIGN 2.16 +/* 2.17 + * HYPERVISOR_multicall(call_list, nr_calls) 2.18 + * Execute a list of 'nr_calls' system calls, pointed at by 'call_list'. 2.19 + * This is fairly easy except that: 2.20 + * 1. We may fault reading the call list, and must patch that up; and 2.21 + * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious 2.22 + * caller could cause our stack to blow up. 2.23 + */ 2.24 +stringstring: 2.25 + .asciz "%08x %08x %08x %08x %08x %08x %d\n" 2.26 +do_multicall: 2.27 + popl %eax 2.28 + cmpl $SYMBOL_NAME(ret_from_hypervisor_call),%eax 2.29 + jne multicall_exit /* bail if called recursively */ 2.30 + pushl %ebx 2.31 + movl 4(%esp),%ebx /* EBX == call_list */ 2.32 + movl 8(%esp),%ecx /* ECX == nr_calls */ 2.33 +multicall_loop: 2.34 + pushl %ecx 2.35 +multicall_fault1: 2.36 + pushl 20(%ebx) 2.37 +multicall_fault2: 2.38 + pushl 16(%ebx) 2.39 +multicall_fault3: 2.40 + pushl 12(%ebx) 2.41 +multicall_fault4: 2.42 + pushl 8(%ebx) 2.43 +multicall_fault5: 2.44 + pushl 4(%ebx) 2.45 +multicall_fault6: 2.46 + movl (%ebx),%eax 2.47 + andl $255,%eax 2.48 + call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) 2.49 + addl $20,%esp 2.50 + popl %ecx 2.51 + addl $BYTES_PER_MULTICALL_ENTRY,%ebx 2.52 + loop multicall_loop 2.53 + popl %ebx 2.54 +multicall_exit: 2.55 + xorl %eax,%eax 2.56 + jmp ret_from_hypervisor_call 2.57 + 2.58 +.section __ex_table,"a" 2.59 + .align 4 2.60 + .long multicall_fault1, multicall_fixup1 2.61 + .long multicall_fault2, multicall_fixup2 2.62 + .long multicall_fault3, multicall_fixup3 2.63 + .long multicall_fault4, multicall_fixup4 2.64 + .long multicall_fault5, multicall_fixup5 2.65 + .long multicall_fault6, multicall_fixup6 2.66 +.previous 2.67 + 2.68 +.section .fixup,"ax" 2.69 +multicall_fixup6: 2.70 + addl $4,%esp 2.71 +multicall_fixup5: 2.72 + addl $4,%esp 2.73 +multicall_fixup4: 2.74 + addl $4,%esp 2.75 +multicall_fixup3: 2.76 + addl $4,%esp 2.77 +multicall_fixup2: 2.78 + addl $4,%esp 2.79 +multicall_fixup1: 2.80 + addl $4,%esp 2.81 + popl %ebx 2.82 + movl $-EFAULT,%eax 2.83 + jmp ret_from_hypervisor_call 2.84 +.previous 2.85 + 2.86 + ALIGN 2.87 restore_all: 2.88 RESTORE_ALL 2.89 2.90 @@ -194,7 +267,9 @@ ENTRY(hypervisor_call) 2.91 GET_CURRENT(%ebx) 2.92 andl $255,%eax 2.93 call *SYMBOL_NAME(hypervisor_call_table)(,%eax,4) 2.94 - movl %eax,EAX(%esp) # save the return value 2.95 + 2.96 +ret_from_hypervisor_call: 2.97 + movl %eax,EAX(%esp) # save the return value 2.98 2.99 test_all_events: 2.100 mov PROCESSOR(%ebx),%eax 2.101 @@ -517,10 +592,11 @@ ENTRY(hypervisor_call_table) 2.102 .long SYMBOL_NAME(do_process_page_updates) 2.103 .long SYMBOL_NAME(do_console_write) 2.104 .long SYMBOL_NAME(do_set_gdt) 2.105 - .long SYMBOL_NAME(do_stack_and_ldt_switch) 2.106 + .long SYMBOL_NAME(do_stack_switch) 2.107 + .long SYMBOL_NAME(do_ldt_switch) 2.108 .long SYMBOL_NAME(do_net_update) 2.109 .long SYMBOL_NAME(do_fpu_taskswitch) 2.110 - .long SYMBOL_NAME(do_sched_op) 2.111 + .long SYMBOL_NAME(do_yield) 2.112 .long SYMBOL_NAME(kill_domain) 2.113 .long SYMBOL_NAME(do_dom0_op) 2.114 .long SYMBOL_NAME(do_network_op) 2.115 @@ -530,6 +606,7 @@ ENTRY(hypervisor_call_table) 2.116 .long SYMBOL_NAME(do_update_descriptor) 2.117 .long SYMBOL_NAME(do_set_fast_trap) 2.118 .long SYMBOL_NAME(do_dom_mem_op) 2.119 - .rept NR_syscalls-(.-hypervisor_call_table)/4 2.120 + .long SYMBOL_NAME(do_multicall) 2.121 + .rept NR_syscalls-((.-hypervisor_call_table)/4) 2.122 .long SYMBOL_NAME(sys_ni_syscall) 2.123 .endr
3.1 --- a/xen/arch/i386/mm.c Fri Mar 14 18:21:09 2003 +0000 3.2 +++ b/xen/arch/i386/mm.c Sat Mar 15 00:16:44 2003 +0000 3.3 @@ -97,8 +97,7 @@ void __init zap_low_mappings (void) 3.4 } 3.5 3.6 3.7 -long do_stack_and_ldt_switch( 3.8 - unsigned long ss, unsigned long esp, unsigned long ldts) 3.9 +long do_stack_switch(unsigned long ss, unsigned long esp) 3.10 { 3.11 int nr = smp_processor_id(); 3.12 struct tss_struct *t = &init_tss[nr]; 3.13 @@ -106,19 +105,6 @@ long do_stack_and_ldt_switch( 3.14 if ( (ss == __HYPERVISOR_CS) || (ss == __HYPERVISOR_DS) ) 3.15 return -1; 3.16 3.17 - if ( ldts != current->mm.ldt_sel ) 3.18 - { 3.19 - unsigned long *ptabent; 3.20 - ptabent = (unsigned long *)GET_GDT_ADDRESS(current); 3.21 - /* Out of range for GDT table? */ 3.22 - if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1; 3.23 - ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */ 3.24 - /* Not an LDT entry? (S=0b, type =0010b) */ 3.25 - if ( (*ptabent & 0x00001f00) != 0x00000200 ) return -1; 3.26 - current->mm.ldt_sel = ldts; 3.27 - __load_LDT(ldts); 3.28 - } 3.29 - 3.30 current->thread.ss1 = ss; 3.31 current->thread.esp1 = esp; 3.32 t->ss1 = ss; 3.33 @@ -128,6 +114,23 @@ long do_stack_and_ldt_switch( 3.34 } 3.35 3.36 3.37 +long do_ldt_switch(unsigned long ldts) 3.38 +{ 3.39 + unsigned long *ptabent; 3.40 + 3.41 + ptabent = (unsigned long *)GET_GDT_ADDRESS(current); 3.42 + /* Out of range for GDT table? */ 3.43 + if ( (ldts * 8) > GET_GDT_ENTRIES(current) ) return -1; 3.44 + ptabent += ldts * 2; /* 8 bytes per desc == 2 * unsigned long */ 3.45 + /* Not an LDT entry? (S=0b, type =0010b) */ 3.46 + if ( ldts && ((*ptabent & 0x00001f00) != 0x00000200) ) return -1; 3.47 + current->mm.ldt_sel = ldts; 3.48 + __load_LDT(ldts); 3.49 + 3.50 + return 0; 3.51 +} 3.52 + 3.53 + 3.54 long do_set_gdt(unsigned long *frame_list, int entries) 3.55 { 3.56 return -ENOSYS;
4.1 --- a/xen/common/kernel.c Fri Mar 14 18:21:09 2003 +0000 4.2 +++ b/xen/common/kernel.c Sat Mar 15 00:16:44 2003 +0000 4.3 @@ -533,7 +533,7 @@ int console_export(char *str, int len) 4.4 } 4.5 4.6 4.7 -long do_console_write(char *str, int count) 4.8 +long do_console_write(char *str, unsigned int count) 4.9 { 4.10 #define SIZEOF_BUF 256 4.11 unsigned char safe_str[SIZEOF_BUF];
5.1 --- a/xen/common/schedule.c Fri Mar 14 18:21:09 2003 +0000 5.2 +++ b/xen/common/schedule.c Sat Mar 15 00:16:44 2003 +0000 5.3 @@ -148,12 +148,10 @@ int wake_up(struct task_struct *p) 5.4 } 5.5 5.6 /**************************************************************************** 5.7 - * Domain requested scheduling operations 5.8 - * KAF: turn it back into do_yield()! 5.9 + * Voluntarily yield the processor to another domain, until an event occurs. 5.10 ****************************************************************************/ 5.11 -long do_sched_op(void) 5.12 +long do_yield(void) 5.13 { 5.14 - /* XXX implement proper */ 5.15 current->state = TASK_INTERRUPTIBLE; 5.16 schedule(); 5.17 return 0; 5.18 @@ -519,7 +517,7 @@ void schedulers_start(void) 5.19 * Schedule timeout is used at a number of places and is a bit meaningless 5.20 * in the context of Xen, as Domains are not able to call these and all 5.21 * there entry points into Xen should be asynchronous. If a domain wishes 5.22 - * to block for a while it should use Xen's sched_op entry point. 5.23 + * to block for a while it should use Xen's sched_op/yield entry point. 5.24 ****************************************************************************/ 5.25 5.26 static void process_timeout(unsigned long __data)
6.1 --- a/xen/include/hypervisor-ifs/hypervisor-if.h Fri Mar 14 18:21:09 2003 +0000 6.2 +++ b/xen/include/hypervisor-ifs/hypervisor-if.h Sat Mar 15 00:16:44 2003 +0000 6.3 @@ -4,101 +4,64 @@ 6.4 * Interface to Xeno hypervisor. 6.5 */ 6.6 6.7 -#include "network.h" 6.8 -#include "block.h" 6.9 - 6.10 #ifndef __HYPERVISOR_IF_H__ 6.11 #define __HYPERVISOR_IF_H__ 6.12 6.13 /* 6.14 - * Virtual addresses beyond this are not modifiable by guest OSes. 6.15 - * The machine->physical mapping table starts at this address, read-only 6.16 - * to all domains except DOM0. 6.17 + * SEGMENT DESCRIPTOR TABLES 6.18 */ 6.19 -#define HYPERVISOR_VIRT_START (0xFC000000UL) 6.20 -#ifndef machine_to_phys_mapping 6.21 -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) 6.22 -#endif 6.23 - 6.24 -typedef struct trap_info_st 6.25 -{ 6.26 - unsigned char vector; /* exception/interrupt vector */ 6.27 - unsigned char dpl; /* privilege level */ 6.28 - unsigned short cs; /* code selector */ 6.29 - unsigned long address; /* code address */ 6.30 -} trap_info_t; 6.31 - 6.32 - 6.33 -typedef struct 6.34 -{ 6.35 -/* 6.36 - * PGREQ_XXX: specified in least-significant bits of 'ptr' field. 6.37 - * All requests specify relevent PTE or PT address in 'ptr'. 6.38 - * Normal requests specify update value in 'value'. 6.39 - * Extended requests specify command in least 8 bits of 'value'. 6.40 - */ 6.41 -/* A normal page-table update request. */ 6.42 -#define PGREQ_NORMAL 0 6.43 -/* Update an entry in the machine->physical mapping table. */ 6.44 -#define PGREQ_MPT_UPDATE 1 6.45 -/* An extended command. */ 6.46 -#define PGREQ_EXTENDED_COMMAND 2 6.47 -/* DOM0 can make entirely unchecked updates which do not affect refcnts. */ 6.48 -#define PGREQ_UNCHECKED_UPDATE 3 6.49 - unsigned long ptr, val; /* *ptr = val */ 6.50 -/* Announce a new top-level page table. */ 6.51 -#define PGEXT_PIN_L1_TABLE 0 6.52 -#define PGEXT_PIN_L2_TABLE 1 6.53 -#define PGEXT_PIN_L3_TABLE 2 6.54 -#define PGEXT_PIN_L4_TABLE 3 6.55 -#define PGEXT_UNPIN_TABLE 4 6.56 -#define PGEXT_NEW_BASEPTR 5 6.57 -#define PGEXT_TLB_FLUSH 6 6.58 -#define PGEXT_INVLPG 7 6.59 -#define PGEXT_CMD_MASK 255 6.60 -#define PGEXT_CMD_SHIFT 8 6.61 -} page_update_request_t; 6.62 +/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */ 6.63 +#define FIRST_DOMAIN_GDT_ENTRY 40 6.64 +/* These are flat segments for domain bootstrap and fallback. */ 6.65 +#define FLAT_RING1_CS 0x11 6.66 +#define FLAT_RING1_DS 0x19 6.67 +#define FLAT_RING3_CS 0x23 6.68 +#define FLAT_RING3_DS 0x2b 6.69 6.70 6.71 /* 6.72 - * Segment descriptor tables. 6.73 + * HYPERVISOR "SYSTEM CALLS" 6.74 */ 6.75 -/* 8 entries, plus a TSS entry for each CPU (up to 32 CPUs). */ 6.76 -#define FIRST_DOMAIN_GDT_ENTRY 40 6.77 -/* These are flat segments for domain bootstrap and fallback. */ 6.78 -#define FLAT_RING1_CS 0x11 6.79 -#define FLAT_RING1_DS 0x19 6.80 -#define FLAT_RING3_CS 0x23 6.81 -#define FLAT_RING3_DS 0x2b 6.82 - 6.83 6.84 /* EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. */ 6.85 +#define __HYPERVISOR_set_trap_table 0 6.86 +#define __HYPERVISOR_pt_update 1 6.87 +#define __HYPERVISOR_console_write 2 6.88 +#define __HYPERVISOR_set_gdt 3 6.89 +#define __HYPERVISOR_stack_switch 4 6.90 +#define __HYPERVISOR_ldt_switch 5 6.91 +#define __HYPERVISOR_net_update 6 6.92 +#define __HYPERVISOR_fpu_taskswitch 7 6.93 +#define __HYPERVISOR_yield 8 6.94 +#define __HYPERVISOR_exit 9 6.95 +#define __HYPERVISOR_dom0_op 10 6.96 +#define __HYPERVISOR_network_op 11 6.97 +#define __HYPERVISOR_block_io_op 12 6.98 +#define __HYPERVISOR_set_debugreg 13 6.99 +#define __HYPERVISOR_get_debugreg 14 6.100 +#define __HYPERVISOR_update_descriptor 15 6.101 +#define __HYPERVISOR_set_fast_trap 16 6.102 +#define __HYPERVISOR_dom_mem_op 17 6.103 +#define __HYPERVISOR_multicall 18 6.104 6.105 -#define __HYPERVISOR_set_trap_table 0 6.106 -#define __HYPERVISOR_pt_update 1 6.107 -#define __HYPERVISOR_console_write 2 6.108 -#define __HYPERVISOR_set_gdt 3 6.109 -#define __HYPERVISOR_stack_and_ldt_switch 4 6.110 -#define __HYPERVISOR_net_update 5 6.111 -#define __HYPERVISOR_fpu_taskswitch 6 6.112 -#define __HYPERVISOR_sched_op 7 6.113 -#define __HYPERVISOR_exit 8 6.114 -#define __HYPERVISOR_dom0_op 9 6.115 -#define __HYPERVISOR_network_op 10 6.116 -#define __HYPERVISOR_block_io_op 11 6.117 -#define __HYPERVISOR_set_debugreg 12 6.118 -#define __HYPERVISOR_get_debugreg 13 6.119 -#define __HYPERVISOR_update_descriptor 14 6.120 -#define __HYPERVISOR_set_fast_trap 15 6.121 -#define __HYPERVISOR_dom_mem_op 16 6.122 - 6.123 +/* And the trap vector is... */ 6.124 #define TRAP_INSTR "int $0x82" 6.125 6.126 6.127 -/* Event message note: 6.128 +/* 6.129 + * MULTICALLS 6.130 + * 6.131 + * Multicalls are listed in an array, with each element being a fixed size 6.132 + * (BYTES_PER_MULTICALL_ENTRY). Each is of the form (op, arg1, ..., argN) 6.133 + * where each element of the tuple is a machine word. 6.134 + */ 6.135 +#define BYTES_PER_MULTICALL_ENTRY 32 6.136 + 6.137 + 6.138 +/* EVENT MESSAGES 6.139 * 6.140 * Here, as in the interrupts to the guestos, additional network interfaces 6.141 - * are defined. These definitions server as placeholders for the event bits, 6.142 + * are defined. These definitions server as placeholders for the event bits, 6.143 * however, in the code these events will allways be referred to as shifted 6.144 * offsets from the base NET events. 6.145 */ 6.146 @@ -113,14 +76,88 @@ typedef struct 6.147 6.148 /* Bit offsets, as opposed to the above masks. */ 6.149 #define _EVENT_BLK_RESP 0 6.150 -#define _EVENT_TIMER 1 6.151 -#define _EVENT_DIE 2 6.152 -#define _EVENT_NET_TX 3 6.153 -#define _EVENT_NET_RX 4 6.154 -#define _EVENT_DEBUG 5 6.155 +#define _EVENT_TIMER 1 6.156 +#define _EVENT_DIE 2 6.157 +#define _EVENT_NET_TX 3 6.158 +#define _EVENT_NET_RX 4 6.159 +#define _EVENT_DEBUG 5 6.160 + 6.161 + 6.162 +/* 6.163 + * Virtual addresses beyond this are not modifiable by guest OSes. 6.164 + * The machine->physical mapping table starts at this address, read-only 6.165 + * to all domains except DOM0. 6.166 + */ 6.167 +#define HYPERVISOR_VIRT_START (0xFC000000UL) 6.168 +#ifndef machine_to_phys_mapping 6.169 +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) 6.170 +#endif 6.171 6.172 6.173 /* 6.174 + * PAGE UPDATE COMMANDS AND FLAGS 6.175 + * 6.176 + * PGREQ_XXX: specified in least-significant bits of 'ptr' field. 6.177 + * All requests specify relevent PTE or PT address in 'ptr'. 6.178 + * Normal requests specify update value in 'value'. 6.179 + * Extended requests specify command in least 8 bits of 'value'. 6.180 + */ 6.181 +/* A normal page-table update request. */ 6.182 +#define PGREQ_NORMAL 0 6.183 +/* Update an entry in the machine->physical mapping table. */ 6.184 +#define PGREQ_MPT_UPDATE 1 6.185 +/* An extended command. */ 6.186 +#define PGREQ_EXTENDED_COMMAND 2 6.187 +/* DOM0 can make entirely unchecked updates which do not affect refcnts. */ 6.188 +#define PGREQ_UNCHECKED_UPDATE 3 6.189 +/* Announce a new top-level page table. */ 6.190 +#define PGEXT_PIN_L1_TABLE 0 6.191 +#define PGEXT_PIN_L2_TABLE 1 6.192 +#define PGEXT_PIN_L3_TABLE 2 6.193 +#define PGEXT_PIN_L4_TABLE 3 6.194 +#define PGEXT_UNPIN_TABLE 4 6.195 +#define PGEXT_NEW_BASEPTR 5 6.196 +#define PGEXT_TLB_FLUSH 6 6.197 +#define PGEXT_INVLPG 7 6.198 +#define PGEXT_CMD_MASK 255 6.199 +#define PGEXT_CMD_SHIFT 8 6.200 + 6.201 + 6.202 +#ifndef __ASSEMBLY__ 6.203 + 6.204 +#include "network.h" 6.205 +#include "block.h" 6.206 + 6.207 +/* 6.208 + * Send an array of these to HYPERVISOR_set_trap_table() 6.209 + */ 6.210 +typedef struct trap_info_st 6.211 +{ 6.212 + unsigned char vector; /* exception/interrupt vector */ 6.213 + unsigned char dpl; /* privilege level */ 6.214 + unsigned short cs; /* code selector */ 6.215 + unsigned long address; /* code address */ 6.216 +} trap_info_t; 6.217 + 6.218 +/* 6.219 + * Send an array of these to HYPERVISOR_pt_update() 6.220 + */ 6.221 +typedef struct 6.222 +{ 6.223 + unsigned long ptr, val; /* *ptr = val */ 6.224 +} page_update_request_t; 6.225 + 6.226 +/* 6.227 + * Send an array of these to HYPERVISOR_multicall() 6.228 + */ 6.229 +typedef struct 6.230 +{ 6.231 + unsigned long op; 6.232 + unsigned long args[7]; 6.233 +} multicall_entry_t; 6.234 + 6.235 +/* 6.236 + * Xen/guestos shared data -- pointer provided in start_info. 6.237 * NB. We expect that this struct is smaller than a page. 6.238 */ 6.239 typedef struct shared_info_st { 6.240 @@ -150,36 +187,34 @@ typedef struct shared_info_st { 6.241 * registers, and executing 'iret'. 6.242 * This callback is provided with an extended stack frame, augmented 6.243 * with saved values for segment registers %ds and %es: 6.244 - * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] 6.245 + * %ds, %es, %eip, %cs, %eflags [, %oldesp, %oldss] 6.246 * Code segment is the default flat selector. 6.247 * FAULTS WHEN CALLING THIS HANDLER WILL TERMINATE THE DOMAIN!!! 6.248 */ 6.249 unsigned long failsafe_address; 6.250 6.251 - /* 6.252 - * Time: 6.253 - * The following abstractions are exposed: System Time, Wall Clock 6.254 - * Time, Domain Virtual Time. Domains can access Cycle counter time 6.255 - * directly. 6.256 - * XXX RN: Need something to pass NTP scaling to GuestOS. 6.257 + /* 6.258 + * Time: The following abstractions are exposed: System Time, Clock Time, 6.259 + * Domain Virtual Time. Domains can access Cycle counter time directly. 6.260 + * XXX RN: Need something to pass NTP scaling to GuestOS. 6.261 */ 6.262 6.263 - u64 cpu_freq; /* to calculate ticks -> real time */ 6.264 + u64 cpu_freq; /* to calculate ticks -> real time */ 6.265 6.266 - /* System Time */ 6.267 - long long system_time; /* in ns */ 6.268 - unsigned long st_timestamp; /* cyclecounter at last update */ 6.269 + /* System Time */ 6.270 + long long system_time; /* in ns */ 6.271 + unsigned long st_timestamp; /* cyclecounter at last update */ 6.272 6.273 - /* Wall Clock Time */ 6.274 - u32 wc_version; /* a version number for info below */ 6.275 - long tv_sec; /* essentially a struct timeval */ 6.276 - long tv_usec; 6.277 - long long wc_timestamp; /* system time at last update */ 6.278 - 6.279 - /* Domain Virtual Time */ 6.280 - unsigned long long domain_time; 6.281 + /* Wall Clock Time */ 6.282 + u32 wc_version; /* a version number for info below */ 6.283 + long tv_sec; /* essentially a struct timeval */ 6.284 + long tv_usec; 6.285 + long long wc_timestamp; /* system time at last update */ 6.286 + 6.287 + /* Domain Virtual Time */ 6.288 + unsigned long long domain_time; 6.289 6.290 - /* 6.291 + /* 6.292 * Timeout values: 6.293 * Allow a domain to specify a timeout value in system time and 6.294 * domain virtual time. 6.295 @@ -193,18 +228,20 @@ typedef struct shared_info_st { 6.296 * NB. We expect that this struct is smaller than a page. 6.297 */ 6.298 typedef struct start_info_st { 6.299 - unsigned long nr_pages; /* total pages allocated to this domain */ 6.300 - shared_info_t *shared_info; /* VIRTUAL address of shared info struct */ 6.301 - unsigned long pt_base; /* VIRTUAL address of page directory */ 6.302 - unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ 6.303 - unsigned long mod_len; /* size (bytes) of pre-loaded module */ 6.304 - net_ring_t *net_rings; /* network rings (VIRTUAL ADDRESS) */ 6.305 + unsigned long nr_pages; /* total pages allocated to this domain */ 6.306 + shared_info_t *shared_info; /* VIRTUAL address of shared info struct */ 6.307 + unsigned long pt_base; /* VIRTUAL address of page directory */ 6.308 + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ 6.309 + unsigned long mod_len; /* size (bytes) of pre-loaded module */ 6.310 + net_ring_t *net_rings; /* network rings (VIRTUAL ADDRESS) */ 6.311 int num_net_rings; 6.312 - unsigned long blk_ring; /* block io ring (MACHINE ADDRESS) */ 6.313 - unsigned char cmd_line[1]; /* variable-length */ 6.314 + unsigned long blk_ring; /* block io ring (MACHINE ADDRESS) */ 6.315 + unsigned char cmd_line[1]; /* variable-length */ 6.316 } start_info_t; 6.317 6.318 /* For use in guest OSes. */ 6.319 extern shared_info_t *HYPERVISOR_shared_info; 6.320 6.321 +#endif /* !__ASSEMBLY__ */ 6.322 + 6.323 #endif /* __HYPERVISOR_IF_H__ */
7.1 --- a/xen/include/xeno/config.h Fri Mar 14 18:21:09 2003 +0000 7.2 +++ b/xen/include/xeno/config.h Sat Mar 15 00:16:44 2003 +0000 7.3 @@ -114,7 +114,7 @@ 7.4 #define __GUEST_CS 0x11 7.5 #define __GUEST_DS 0x19 7.6 7.7 -#define NR_syscalls 255 7.8 +#define NR_syscalls 256 7.9 7.10 #define offsetof(_p,_f) ((unsigned long)&(((_p *)0)->_f)) 7.11 #define struct_cpy(_x,_y) (memcpy((_x),(_y),sizeof(*(_x))))
8.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c Fri Mar 14 18:21:09 2003 +0000 8.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/hypervisor.c Sat Mar 15 00:16:44 2003 +0000 8.3 @@ -13,6 +13,9 @@ 8.4 #include <asm/system.h> 8.5 #include <asm/ptrace.h> 8.6 8.7 +multicall_entry_t multicall_list[8]; 8.8 +int nr_multicall_ents = 0; 8.9 + 8.10 static unsigned long event_mask = 0; 8.11 8.12 void frobb(void) {}
9.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c Fri Mar 14 18:21:09 2003 +0000 9.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/process.c Sat Mar 15 00:16:44 2003 +0000 9.3 @@ -43,6 +43,7 @@ 9.4 #include <asm/i387.h> 9.5 #include <asm/desc.h> 9.6 #include <asm/mmu_context.h> 9.7 +#include <asm/multicall.h> 9.8 9.9 #include <linux/irq.h> 9.10 9.11 @@ -85,7 +86,7 @@ void cpu_idle (void) 9.12 9.13 while (1) { 9.14 while (!current->need_resched) 9.15 - HYPERVISOR_do_sched_op(NULL); 9.16 + HYPERVISOR_yield(); 9.17 schedule(); 9.18 check_pgt_cache(); 9.19 } 9.20 @@ -334,9 +335,28 @@ void __switch_to(struct task_struct *pre 9.21 struct thread_struct *prev = &prev_p->thread, 9.22 *next = &next_p->thread; 9.23 9.24 - unlazy_fpu(prev_p); 9.25 + /* 9.26 + * This is basically 'unlazy_fpu', except that we queue a multicall to 9.27 + * indicate FPU task switch, rather than synchronously trapping to Xen. 9.28 + */ 9.29 + if ( prev_p->flags & PF_USEDFPU ) 9.30 + { 9.31 + if ( cpu_has_fxsr ) 9.32 + asm volatile( "fxsave %0 ; fnclex" 9.33 + : "=m" (prev_p->thread.i387.fxsave) ); 9.34 + else 9.35 + asm volatile( "fnsave %0 ; fwait" 9.36 + : "=m" (prev_p->thread.i387.fsave) ); 9.37 + prev_p->flags &= ~PF_USEDFPU; 9.38 + queue_multicall0(__HYPERVISOR_fpu_taskswitch); 9.39 + } 9.40 9.41 - HYPERVISOR_stack_and_ldt_switch(__KERNEL_DS, next->esp0, 0); 9.42 + if ( next->esp0 != 0 ) 9.43 + queue_multicall2(__HYPERVISOR_stack_switch, __KERNEL_DS, next->esp0); 9.44 + 9.45 + /* EXECUTE ALL TASK SWITCH XEN SYSCALLS AT THIS POINT. */ 9.46 + execute_multicall_list(); 9.47 + sti(); /* matches 'cli' in switch_mm() */ 9.48 9.49 /* 9.50 * Save away %fs and %gs. No need to save %es and %ds, as
10.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c Fri Mar 14 18:21:09 2003 +0000 10.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/kernel/setup.c Sat Mar 15 00:16:44 2003 +0000 10.3 @@ -967,7 +967,7 @@ void __init cpu_init (void) 10.4 BUG(); 10.5 enter_lazy_tlb(&init_mm, current, nr); 10.6 10.7 - HYPERVISOR_stack_and_ldt_switch(__KERNEL_DS, current->thread.esp0, 0); 10.8 + HYPERVISOR_stack_switch(__KERNEL_DS, current->thread.esp0); 10.9 10.10 /* Force FPU initialization. */ 10.11 current->flags &= ~PF_USEDFPU;
11.1 --- a/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c Fri Mar 14 18:21:09 2003 +0000 11.2 +++ b/xenolinux-2.4.21-pre4-sparse/arch/xeno/mm/hypervisor.c Sat Mar 15 00:16:44 2003 +0000 11.3 @@ -11,6 +11,7 @@ 11.4 #include <asm/hypervisor.h> 11.5 #include <asm/page.h> 11.6 #include <asm/pgtable.h> 11.7 +#include <asm/multicall.h> 11.8 11.9 /* 11.10 * This suffices to protect us if we ever move to SMP domains. 11.11 @@ -85,6 +86,28 @@ static void DEBUG_disallow_pt_read(unsig 11.12 */ 11.13 unsigned long pt_baseptr; 11.14 11.15 +/* 11.16 + * MULTICALL_flush_page_update_queue: 11.17 + * This is a version of the flush which queues as part of a multicall. 11.18 + */ 11.19 +void MULTICALL_flush_page_update_queue(void) 11.20 +{ 11.21 + unsigned long flags; 11.22 + spin_lock_irqsave(&update_lock, flags); 11.23 + if ( idx != 0 ) 11.24 + { 11.25 +#if PT_UPDATE_DEBUG > 1 11.26 + printk("Flushing %d entries from pt update queue\n", idx); 11.27 +#endif 11.28 +#if PT_UPDATE_DEBUG > 0 11.29 + DEBUG_allow_pt_reads(); 11.30 +#endif 11.31 + queue_multicall2(__HYPERVISOR_pt_update, update_queue, idx); 11.32 + idx = 0; 11.33 + } 11.34 + spin_unlock_irqrestore(&update_lock, flags); 11.35 +} 11.36 + 11.37 static inline void __flush_page_update_queue(void) 11.38 { 11.39 #if PT_UPDATE_DEBUG > 1
12.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h Fri Mar 14 18:21:09 2003 +0000 12.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/hypervisor.h Sat Mar 15 00:16:44 2003 +0000 12.3 @@ -129,6 +129,7 @@ static inline int flush_page_update_queu 12.4 return idx; 12.5 } 12.6 #define XENO_flush_page_update_queue() (_flush_page_update_queue()) 12.7 +void MULTICALL_flush_page_update_queue(void); 12.8 12.9 12.10 /* 12.11 @@ -183,14 +184,24 @@ static inline int HYPERVISOR_set_gdt(uns 12.12 return ret; 12.13 } 12.14 12.15 -static inline int HYPERVISOR_stack_and_ldt_switch( 12.16 - unsigned long ss, unsigned long esp, unsigned long ldts) 12.17 +static inline int HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) 12.18 { 12.19 int ret; 12.20 __asm__ __volatile__ ( 12.21 TRAP_INSTR 12.22 - : "=a" (ret) : "0" (__HYPERVISOR_stack_and_ldt_switch), 12.23 - "b" (ss), "c" (esp), "d" (ldts) ); 12.24 + : "=a" (ret) : "0" (__HYPERVISOR_stack_switch), 12.25 + "b" (ss), "c" (esp) : "memory" ); 12.26 + 12.27 + return ret; 12.28 +} 12.29 + 12.30 +static inline int HYPERVISOR_ldt_switch(unsigned long ldts) 12.31 +{ 12.32 + int ret; 12.33 + __asm__ __volatile__ ( 12.34 + TRAP_INSTR 12.35 + : "=a" (ret) : "0" (__HYPERVISOR_ldt_switch), 12.36 + "b" (ldts) : "memory" ); 12.37 12.38 return ret; 12.39 } 12.40 @@ -215,13 +226,12 @@ static inline int HYPERVISOR_fpu_taskswi 12.41 return ret; 12.42 } 12.43 12.44 -static inline int HYPERVISOR_do_sched_op(void *sched_op) 12.45 +static inline int HYPERVISOR_yield(void) 12.46 { 12.47 int ret; 12.48 __asm__ __volatile__ ( 12.49 TRAP_INSTR 12.50 - : "=a" (ret) : "0" (__HYPERVISOR_sched_op), 12.51 - "b" (sched_op) ); 12.52 + : "=a" (ret) : "0" (__HYPERVISOR_yield) ); 12.53 12.54 return ret; 12.55 } 12.56 @@ -296,7 +306,7 @@ static inline int HYPERVISOR_update_desc 12.57 int ret; 12.58 __asm__ __volatile__ ( 12.59 TRAP_INSTR 12.60 - : "=a" (ret) : "0" (__HYPERVISOR_set_gdt), 12.61 + : "=a" (ret) : "0" (__HYPERVISOR_update_descriptor), 12.62 "b" (pa), "c" (word1), "d" (word2) ); 12.63 12.64 return ret; 12.65 @@ -324,4 +334,15 @@ static inline int HYPERVISOR_dom_mem_op( 12.66 return ret; 12.67 } 12.68 12.69 +static inline int HYPERVISOR_multicall(void *call_list, int nr_calls) 12.70 +{ 12.71 + int ret; 12.72 + __asm__ __volatile__ ( 12.73 + TRAP_INSTR 12.74 + : "=a" (ret) : "0" (__HYPERVISOR_multicall), 12.75 + "b" (call_list), "c" (nr_calls) : "memory" ); 12.76 + 12.77 + return ret; 12.78 +} 12.79 + 12.80 #endif /* __HYPERVISOR_H__ */
13.1 --- a/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h Fri Mar 14 18:21:09 2003 +0000 13.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/mmu_context.h Sat Mar 15 00:16:44 2003 +0000 13.3 @@ -5,6 +5,7 @@ 13.4 #include <asm/desc.h> 13.5 #include <asm/atomic.h> 13.6 #include <asm/pgalloc.h> 13.7 +#include <asm/multicall.h> 13.8 13.9 /* 13.10 * possibly do the LDT unload here? 13.11 @@ -33,6 +34,7 @@ extern pgd_t *cur_pgd; 13.12 13.13 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu) 13.14 { 13.15 + cli(); /* protect flush_update_queue multicall */ 13.16 if (prev != next) { 13.17 /* stop flush ipis for the previous mm */ 13.18 clear_bit(cpu, &prev->cpu_vm_mask); 13.19 @@ -50,7 +52,7 @@ static inline void switch_mm(struct mm_s 13.20 /* Re-load page tables */ 13.21 cur_pgd = next->pgd; 13.22 queue_pt_switch(__pa(cur_pgd)); 13.23 - XENO_flush_page_update_queue(); 13.24 + MULTICALL_flush_page_update_queue(); 13.25 } 13.26 #ifdef CONFIG_SMP 13.27 else { 13.28 @@ -70,6 +72,10 @@ static inline void switch_mm(struct mm_s 13.29 } 13.30 13.31 #define activate_mm(prev, next) \ 13.32 - switch_mm((prev),(next),NULL,smp_processor_id()) 13.33 +do { \ 13.34 + switch_mm((prev),(next),NULL,smp_processor_id()); \ 13.35 + execute_multicall_list(); \ 13.36 + sti(); /* matches 'cli' in switch_mm() */ \ 13.37 +} while ( 0 ) 13.38 13.39 #endif
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/xenolinux-2.4.21-pre4-sparse/include/asm-xeno/multicall.h Sat Mar 15 00:16:44 2003 +0000 14.3 @@ -0,0 +1,45 @@ 14.4 +/****************************************************************************** 14.5 + * multicall.h 14.6 + */ 14.7 + 14.8 +#ifndef __MULTICALL_H__ 14.9 +#define __MULTICALL_H__ 14.10 + 14.11 +#include <asm/hypervisor.h> 14.12 + 14.13 +extern multicall_entry_t multicall_list[]; 14.14 +extern int nr_multicall_ents; 14.15 + 14.16 +static inline void queue_multicall0(unsigned long op) 14.17 +{ 14.18 + int i = nr_multicall_ents; 14.19 + multicall_list[i].op = op; 14.20 + nr_multicall_ents = i+1; 14.21 +} 14.22 + 14.23 +static inline void queue_multicall1(unsigned long op, unsigned long arg1) 14.24 +{ 14.25 + int i = nr_multicall_ents; 14.26 + multicall_list[i].op = op; 14.27 + multicall_list[i].args[0] = arg1; 14.28 + nr_multicall_ents = i+1; 14.29 +} 14.30 + 14.31 +static inline void queue_multicall2( 14.32 + unsigned long op, unsigned long arg1, unsigned long arg2) 14.33 +{ 14.34 + int i = nr_multicall_ents; 14.35 + multicall_list[i].op = op; 14.36 + multicall_list[i].args[0] = arg1; 14.37 + multicall_list[i].args[1] = arg2; 14.38 + nr_multicall_ents = i+1; 14.39 +} 14.40 + 14.41 +static inline void execute_multicall_list(void) 14.42 +{ 14.43 + if ( unlikely(nr_multicall_ents == 0) ) return; 14.44 + (void)HYPERVISOR_multicall(multicall_list, nr_multicall_ents); 14.45 + nr_multicall_ents = 0; 14.46 +} 14.47 + 14.48 +#endif /* __MULTICALL_H__ */