ia64/xen-unstable
changeset 4373:8396f6da60b4
bitkeeper revision 1.1236.1.154 (4249c430s6iKHaP4AAIWnJQScN1CyA)
Fix lazy state switching when context-switching to/from the idle
domain. Track which domain's state is on each CPU and, for each
domain, which CPUs are running on its page tables.
Signed-off-by: Keir Fraser <keir@xensource.com>
Fix lazy state switching when context-switching to/from the idle
domain. Track which domain's state is on each CPU and, for each
domain, which CPUs are running on its page tables.
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kaf24@firebug.cl.cam.ac.uk |
---|---|
date | Tue Mar 29 21:10:08 2005 +0000 (2005-03-29) |
parents | d58e731924a7 |
children | 817e74623cf4 |
files | xen/arch/ia64/xenmisc.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/mm.c xen/arch/x86/shadow.c xen/arch/x86/smp.c xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_64/mm.c xen/common/dom0_ops.c xen/common/page_alloc.c xen/common/schedule.c xen/include/asm-x86/mm.h xen/include/public/xen.h xen/include/xen/sched.h xen/include/xen/smp.h |
line diff
1.1 --- a/xen/arch/ia64/xenmisc.c Tue Mar 29 14:52:44 2005 +0000 1.2 +++ b/xen/arch/ia64/xenmisc.c Tue Mar 29 21:10:08 2005 +0000 1.3 @@ -53,7 +53,7 @@ platform_is_hp_ski(void) 1.4 } 1.5 1.6 /* calls in xen/common code that are unused on ia64 */ 1.7 -void synchronise_pagetables(unsigned long cpu_mask) { return; } 1.8 +void synchronise_execution_state(unsigned long cpu_mask) { } 1.9 1.10 int grant_table_create(struct domain *d) { return 0; } 1.11 void grant_table_destroy(struct domain *d)
2.1 --- a/xen/arch/x86/domain.c Tue Mar 29 14:52:44 2005 +0000 2.2 +++ b/xen/arch/x86/domain.c Tue Mar 29 21:10:08 2005 +0000 2.3 @@ -45,13 +45,18 @@ 2.4 static int opt_noreboot = 0; 2.5 boolean_param("noreboot", opt_noreboot); 2.6 2.7 +struct percpu_ctxt { 2.8 + struct exec_domain *curr_ed; 2.9 +} __cacheline_aligned; 2.10 +static struct percpu_ctxt percpu_ctxt[NR_CPUS]; 2.11 + 2.12 static void default_idle(void) 2.13 { 2.14 - __cli(); 2.15 + local_irq_disable(); 2.16 if ( !softirq_pending(smp_processor_id()) ) 2.17 safe_halt(); 2.18 else 2.19 - __sti(); 2.20 + local_irq_enable(); 2.21 } 2.22 2.23 static __attribute_used__ void idle_loop(void) 2.24 @@ -73,6 +78,8 @@ void startup_cpu_idle_loop(void) 2.25 { 2.26 /* Just some sanity to ensure that the scheduler is set up okay. */ 2.27 ASSERT(current->domain->id == IDLE_DOMAIN_ID); 2.28 + percpu_ctxt[smp_processor_id()].curr_ed = current; 2.29 + set_bit(smp_processor_id(), ¤t->domain->cpuset); 2.30 domain_unpause_by_systemcontroller(current->domain); 2.31 raise_softirq(SCHEDULE_SOFTIRQ); 2.32 do_softirq(); 2.33 @@ -110,7 +117,7 @@ void machine_restart(char * __unused) 2.34 safe_halt(); 2.35 } 2.36 2.37 - __sti(); 2.38 + local_irq_enable(); 2.39 2.40 /* Ensure we are the boot CPU. */ 2.41 if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid ) 2.42 @@ -307,10 +314,10 @@ unsigned long alloc_monitor_pagetable(st 2.43 struct pfn_info *mmfn_info; 2.44 struct domain *d = ed->domain; 2.45 2.46 - ASSERT(!pagetable_val(ed->arch.monitor_table)); /* we should only get called once */ 2.47 + ASSERT(pagetable_val(ed->arch.monitor_table) == 0); 2.48 2.49 mmfn_info = alloc_domheap_page(NULL); 2.50 - ASSERT( mmfn_info ); 2.51 + ASSERT(mmfn_info != NULL); 2.52 2.53 mmfn = (unsigned long) (mmfn_info - frame_table); 2.54 mpl2e = (l2_pgentry_t *) map_domain_mem(mmfn << PAGE_SHIFT); 2.55 @@ -326,7 +333,7 @@ unsigned long alloc_monitor_pagetable(st 2.56 2.57 ed->arch.monitor_vtable = mpl2e; 2.58 2.59 - // map the phys_to_machine map into the Read-Only MPT space for this domain 2.60 + /* Map the p2m map into the Read-Only MPT space for this domain. */ 2.61 mpl2e[l2_table_offset(RO_MPT_VIRT_START)] = 2.62 mk_l2_pgentry(pagetable_val(ed->arch.phys_table) | __PAGE_HYPERVISOR); 2.63 2.64 @@ -578,19 +585,10 @@ void toggle_guest_mode(struct exec_domai 2.65 : "=r" (__r) : "r" (value), "0" (__r) );\ 2.66 __r; }) 2.67 2.68 -static void switch_segments( 2.69 - struct xen_regs *regs, struct exec_domain *p, struct exec_domain *n) 2.70 +static void load_segments(struct exec_domain *p, struct exec_domain *n) 2.71 { 2.72 int all_segs_okay = 1; 2.73 2.74 - if ( !is_idle_task(p->domain) ) 2.75 - { 2.76 - __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_ctxt.ds) ); 2.77 - __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_ctxt.es) ); 2.78 - __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_ctxt.fs) ); 2.79 - __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_ctxt.gs) ); 2.80 - } 2.81 - 2.82 /* Either selector != 0 ==> reload. */ 2.83 if ( unlikely(p->arch.user_ctxt.ds | 2.84 n->arch.user_ctxt.ds) ) 2.85 @@ -654,7 +652,8 @@ static void switch_segments( 2.86 2.87 if ( unlikely(!all_segs_okay) ) 2.88 { 2.89 - unsigned long *rsp = 2.90 + struct xen_regs *regs = get_execution_context(); 2.91 + unsigned long *rsp = 2.92 (n->arch.flags & TF_kernel_mode) ? 2.93 (unsigned long *)regs->rsp : 2.94 (unsigned long *)n->arch.kernel_sp; 2.95 @@ -689,6 +688,24 @@ static void switch_segments( 2.96 } 2.97 } 2.98 2.99 +static void save_segments(struct exec_domain *p) 2.100 +{ 2.101 + __asm__ __volatile__ ( "movl %%ds,%0" : "=m" (p->arch.user_ctxt.ds) ); 2.102 + __asm__ __volatile__ ( "movl %%es,%0" : "=m" (p->arch.user_ctxt.es) ); 2.103 + __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (p->arch.user_ctxt.fs) ); 2.104 + __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (p->arch.user_ctxt.gs) ); 2.105 +} 2.106 + 2.107 +static void clear_segments(void) 2.108 +{ 2.109 + __asm__ __volatile__ ( 2.110 + "movl %0,%%ds; " 2.111 + "movl %0,%%es; " 2.112 + "movl %0,%%fs; " 2.113 + "movl %0,%%gs; swapgs; movl %0,%%gs" 2.114 + : : "r" (0) ); 2.115 +} 2.116 + 2.117 long do_switch_to_user(void) 2.118 { 2.119 struct xen_regs *regs = get_execution_context(); 2.120 @@ -720,80 +737,96 @@ long do_switch_to_user(void) 2.121 2.122 #elif defined(__i386__) 2.123 2.124 -#define switch_segments(_r, _p, _n) ((void)0) 2.125 +#define load_segments(_p, _n) ((void)0) 2.126 +#define save_segments(_p) ((void)0) 2.127 +#define clear_segments() ((void)0) 2.128 2.129 #endif 2.130 2.131 -/* 2.132 - * This special macro can be used to load a debugging register 2.133 - */ 2.134 #define loaddebug(_ed,_reg) \ 2.135 - __asm__("mov %0,%%db" #_reg \ 2.136 - : /* no output */ \ 2.137 - :"r" ((_ed)->debugreg[_reg])) 2.138 + __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_ed)->debugreg[_reg])) 2.139 2.140 -void context_switch(struct exec_domain *prev_p, struct exec_domain *next_p) 2.141 +static void __context_switch(void) 2.142 { 2.143 -#ifdef __i386__ 2.144 - struct tss_struct *tss = init_tss + smp_processor_id(); 2.145 -#endif 2.146 execution_context_t *stack_ec = get_execution_context(); 2.147 + unsigned int cpu = smp_processor_id(); 2.148 + struct exec_domain *p = percpu_ctxt[cpu].curr_ed; 2.149 + struct exec_domain *n = current; 2.150 2.151 - __cli(); 2.152 - 2.153 - /* Switch guest general-register state. */ 2.154 - if ( !is_idle_task(prev_p->domain) ) 2.155 + if ( !is_idle_task(p->domain) ) 2.156 { 2.157 - memcpy(&prev_p->arch.user_ctxt, 2.158 + memcpy(&p->arch.user_ctxt, 2.159 stack_ec, 2.160 sizeof(*stack_ec)); 2.161 - unlazy_fpu(prev_p); 2.162 - CLEAR_FAST_TRAP(&prev_p->arch); 2.163 + unlazy_fpu(p); 2.164 + CLEAR_FAST_TRAP(&p->arch); 2.165 + save_segments(p); 2.166 + } 2.167 + 2.168 + memcpy(stack_ec, 2.169 + &n->arch.user_ctxt, 2.170 + sizeof(*stack_ec)); 2.171 + 2.172 + /* Maybe switch the debug registers. */ 2.173 + if ( unlikely(n->arch.debugreg[7]) ) 2.174 + { 2.175 + loaddebug(&n->arch, 0); 2.176 + loaddebug(&n->arch, 1); 2.177 + loaddebug(&n->arch, 2); 2.178 + loaddebug(&n->arch, 3); 2.179 + /* no 4 and 5 */ 2.180 + loaddebug(&n->arch, 6); 2.181 + loaddebug(&n->arch, 7); 2.182 } 2.183 2.184 - if ( !is_idle_task(next_p->domain) ) 2.185 + if ( !VMX_DOMAIN(n) ) 2.186 { 2.187 - memcpy(stack_ec, 2.188 - &next_p->arch.user_ctxt, 2.189 - sizeof(*stack_ec)); 2.190 - 2.191 - /* Maybe switch the debug registers. */ 2.192 - if ( unlikely(next_p->arch.debugreg[7]) ) 2.193 - { 2.194 - loaddebug(&next_p->arch, 0); 2.195 - loaddebug(&next_p->arch, 1); 2.196 - loaddebug(&next_p->arch, 2); 2.197 - loaddebug(&next_p->arch, 3); 2.198 - /* no 4 and 5 */ 2.199 - loaddebug(&next_p->arch, 6); 2.200 - loaddebug(&next_p->arch, 7); 2.201 - } 2.202 - 2.203 - if ( !VMX_DOMAIN(next_p) ) 2.204 - { 2.205 - SET_FAST_TRAP(&next_p->arch); 2.206 + SET_FAST_TRAP(&n->arch); 2.207 2.208 #ifdef __i386__ 2.209 + { 2.210 /* Switch the kernel ring-1 stack. */ 2.211 - tss->esp1 = next_p->arch.kernel_sp; 2.212 - tss->ss1 = next_p->arch.kernel_ss; 2.213 + struct tss_struct *tss = &init_tss[cpu]; 2.214 + tss->esp1 = n->arch.kernel_sp; 2.215 + tss->ss1 = n->arch.kernel_ss; 2.216 + } 2.217 #endif 2.218 - } 2.219 - 2.220 - /* Switch page tables. */ 2.221 - write_ptbase(next_p); 2.222 } 2.223 2.224 - set_current(next_p); 2.225 + set_bit(cpu, &n->domain->cpuset); 2.226 + write_ptbase(n); 2.227 + clear_bit(cpu, &p->domain->cpuset); 2.228 + 2.229 + __asm__ __volatile__ ( "lgdt %0" : "=m" (*n->arch.gdt) ); 2.230 2.231 - __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt)); 2.232 + percpu_ctxt[cpu].curr_ed = n; 2.233 +} 2.234 + 2.235 + 2.236 +void context_switch(struct exec_domain *prev, struct exec_domain *next) 2.237 +{ 2.238 + struct exec_domain *realprev; 2.239 + 2.240 + local_irq_disable(); 2.241 2.242 - __sti(); 2.243 + set_current(next); 2.244 2.245 - if ( !VMX_DOMAIN(next_p) ) 2.246 + if ( ((realprev = percpu_ctxt[smp_processor_id()]. curr_ed) == next) || 2.247 + is_idle_task(next->domain) ) 2.248 + { 2.249 + local_irq_enable(); 2.250 + } 2.251 + else 2.252 { 2.253 - load_LDT(next_p); 2.254 - switch_segments(stack_ec, prev_p, next_p); 2.255 + __context_switch(); 2.256 + 2.257 + local_irq_enable(); 2.258 + 2.259 + if ( !VMX_DOMAIN(next) ) 2.260 + { 2.261 + load_LDT(next); 2.262 + load_segments(realprev, next); 2.263 + } 2.264 } 2.265 2.266 /* 2.267 @@ -802,13 +835,27 @@ void context_switch(struct exec_domain * 2.268 * 'prev' (after this point, a dying domain's info structure may be freed 2.269 * without warning). 2.270 */ 2.271 - clear_bit(EDF_RUNNING, &prev_p->ed_flags); 2.272 + clear_bit(EDF_RUNNING, &prev->ed_flags); 2.273 2.274 - schedule_tail(next_p); 2.275 + schedule_tail(next); 2.276 2.277 BUG(); 2.278 } 2.279 2.280 +static void __synchronise_lazy_execstate(void *unused) 2.281 +{ 2.282 + if ( percpu_ctxt[smp_processor_id()].curr_ed != current ) 2.283 + { 2.284 + __context_switch(); 2.285 + load_LDT(current); 2.286 + clear_segments(); 2.287 + } 2.288 +} 2.289 +void synchronise_lazy_execstate(unsigned long cpuset) 2.290 +{ 2.291 + smp_subset_call_function(__synchronise_lazy_execstate, NULL, 1, cpuset); 2.292 +} 2.293 + 2.294 unsigned long __hypercall_create_continuation( 2.295 unsigned int op, unsigned int nr_args, ...) 2.296 { 2.297 @@ -947,13 +994,11 @@ void domain_relinquish_memory(struct dom 2.298 { 2.299 struct exec_domain *ed; 2.300 2.301 - /* Ensure that noone is running over the dead domain's page tables. */ 2.302 - synchronise_pagetables(~0UL); 2.303 + BUG_ON(d->cpuset != 0); 2.304 2.305 /* Release device mappings of other domains */ 2.306 gnttab_release_dev_mappings( d->grant_table ); 2.307 2.308 - 2.309 /* Exit shadow mode before deconstructing final guest page table. */ 2.310 shadow_mode_disable(d); 2.311
3.1 --- a/xen/arch/x86/domain_build.c Tue Mar 29 14:52:44 2005 +0000 3.2 +++ b/xen/arch/x86/domain_build.c Tue Mar 29 21:10:08 2005 +0000 3.3 @@ -421,7 +421,7 @@ int construct_dom0(struct domain *d, 3.4 update_pagetables(ed); 3.5 3.6 /* Install the new page tables. */ 3.7 - __cli(); 3.8 + local_irq_disable(); 3.9 write_ptbase(ed); 3.10 3.11 /* Copy the OS image and free temporary buffer. */ 3.12 @@ -498,7 +498,7 @@ int construct_dom0(struct domain *d, 3.13 3.14 /* Reinstate the caller's page tables. */ 3.15 write_ptbase(current); 3.16 - __sti(); 3.17 + local_irq_enable(); 3.18 3.19 #if defined(__i386__) 3.20 /* Destroy low mappings - they were only for our convenience. */
4.1 --- a/xen/arch/x86/mm.c Tue Mar 29 14:52:44 2005 +0000 4.2 +++ b/xen/arch/x86/mm.c Tue Mar 29 21:10:08 2005 +0000 4.3 @@ -1147,16 +1147,13 @@ int get_page_type(struct pfn_info *page, 4.4 * may be unnecessary (e.g., page was GDT/LDT) but those 4.5 * circumstances should be very rare. 4.6 */ 4.7 - struct exec_domain *ed; 4.8 - unsigned long mask = 0; 4.9 - for_each_exec_domain ( page_get_owner(page), ed ) 4.10 - mask |= 1 << ed->processor; 4.11 - mask = tlbflush_filter_cpuset(mask, page->tlbflush_timestamp); 4.12 - 4.13 - if ( unlikely(mask != 0) ) 4.14 + unsigned long cpuset = tlbflush_filter_cpuset( 4.15 + page_get_owner(page)->cpuset, page->tlbflush_timestamp); 4.16 + 4.17 + if ( unlikely(cpuset != 0) ) 4.18 { 4.19 perfc_incrc(need_flush_tlb_flush); 4.20 - flush_tlb_mask(mask); 4.21 + flush_tlb_mask(cpuset); 4.22 } 4.23 4.24 /* We lose existing type, back pointer, and validity. */ 4.25 @@ -2842,7 +2839,7 @@ void audit_domain(struct domain *d) 4.26 4.27 if ( d != current->domain ) 4.28 domain_pause(d); 4.29 - synchronise_pagetables(~0UL); 4.30 + synchronise_lazy_execstate(~0UL); 4.31 4.32 printk("pt base=%lx sh_info=%x\n", 4.33 pagetable_val(d->exec_domain[0]->arch.guest_table)>>PAGE_SHIFT,
5.1 --- a/xen/arch/x86/shadow.c Tue Mar 29 14:52:44 2005 +0000 5.2 +++ b/xen/arch/x86/shadow.c Tue Mar 29 21:10:08 2005 +0000 5.3 @@ -384,7 +384,6 @@ int shadow_mode_control(struct domain *d 5.4 } 5.5 5.6 domain_pause(d); 5.7 - synchronise_pagetables(~0UL); 5.8 5.9 shadow_lock(d); 5.10
6.1 --- a/xen/arch/x86/smp.c Tue Mar 29 14:52:44 2005 +0000 6.2 +++ b/xen/arch/x86/smp.c Tue Mar 29 21:10:08 2005 +0000 6.3 @@ -59,9 +59,7 @@ 6.4 */ 6.5 6.6 /* 6.7 - * the following functions deal with sending IPIs between CPUs. 6.8 - * 6.9 - * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. 6.10 + * The following functions deal with sending IPIs between CPUs. 6.11 */ 6.12 6.13 static inline int __prepare_ICR (unsigned int shortcut, int vector) 6.14 @@ -82,22 +80,22 @@ static inline void __send_IPI_shortcut(u 6.15 * of the value read we use an atomic rmw access to avoid costly 6.16 * cli/sti. Otherwise we use an even cheaper single atomic write 6.17 * to the APIC. 6.18 - */ 6.19 + */ 6.20 unsigned int cfg; 6.21 6.22 /* 6.23 - * Wait for idle. 6.24 - */ 6.25 + * Wait for idle. 6.26 + */ 6.27 apic_wait_icr_idle(); 6.28 6.29 /* 6.30 - * No need to touch the target chip field 6.31 - */ 6.32 + * No need to touch the target chip field 6.33 + */ 6.34 cfg = __prepare_ICR(shortcut, vector); 6.35 6.36 /* 6.37 - * Send the IPI. The write to APIC_ICR fires this off. 6.38 - */ 6.39 + * Send the IPI. The write to APIC_ICR fires this off. 6.40 + */ 6.41 apic_write_around(APIC_ICR, cfg); 6.42 } 6.43 6.44 @@ -111,106 +109,44 @@ static inline void send_IPI_mask(int mas 6.45 unsigned long cfg; 6.46 unsigned long flags; 6.47 6.48 - __save_flags(flags); 6.49 - __cli(); 6.50 + local_irq_save(flags); 6.51 6.52 - 6.53 /* 6.54 * Wait for idle. 6.55 */ 6.56 apic_wait_icr_idle(); 6.57 - 6.58 + 6.59 /* 6.60 * prepare target chip field 6.61 */ 6.62 cfg = __prepare_ICR2(mask); 6.63 apic_write_around(APIC_ICR2, cfg); 6.64 - 6.65 + 6.66 /* 6.67 * program the ICR 6.68 */ 6.69 cfg = __prepare_ICR(0, vector); 6.70 - 6.71 + 6.72 /* 6.73 * Send the IPI. The write to APIC_ICR fires this off. 6.74 */ 6.75 apic_write_around(APIC_ICR, cfg); 6.76 6.77 - __restore_flags(flags); 6.78 + local_irq_restore(flags); 6.79 } 6.80 6.81 static inline void send_IPI_allbutself(int vector) 6.82 { 6.83 /* 6.84 - * if there are no other CPUs in the system then 6.85 - * we get an APIC send error if we try to broadcast. 6.86 - * thus we have to avoid sending IPIs in this case. 6.87 + * If there are no other CPUs in the system then we get an APIC send error 6.88 + * if we try to broadcast. thus we have to avoid sending IPIs in this case. 6.89 */ 6.90 - if (!(smp_num_cpus > 1)) 6.91 + if ( smp_num_cpus <= 1 ) 6.92 return; 6.93 6.94 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); 6.95 } 6.96 6.97 -/* 6.98 - * ********* XEN NOTICE ********** 6.99 - * I've left the following comments lying around as they look liek they might 6.100 - * be useful to get multiprocessor guest OSes going. However, I suspect the 6.101 - * issues we face will be quite different so I've ripped out all the 6.102 - * TLBSTATE logic (I didn't understand it anyway :-). These comments do 6.103 - * not apply to Xen, therefore! -- Keir (8th Oct 2003). 6.104 - */ 6.105 -/* 6.106 - * Smarter SMP flushing macros. 6.107 - * c/o Linus Torvalds. 6.108 - * 6.109 - * These mean you can really definitely utterly forget about 6.110 - * writing to user space from interrupts. (Its not allowed anyway). 6.111 - * 6.112 - * Optimizations Manfred Spraul <manfred@colorfullife.com> 6.113 - * 6.114 - * The flush IPI assumes that a thread switch happens in this order: 6.115 - * [cpu0: the cpu that switches] 6.116 - * 1) switch_mm() either 1a) or 1b) 6.117 - * 1a) thread switch to a different mm 6.118 - * 1a1) clear_bit(cpu, &old_mm.cpu_vm_mask); 6.119 - * Stop ipi delivery for the old mm. This is not synchronized with 6.120 - * the other cpus, but smp_invalidate_interrupt ignore flush ipis 6.121 - * for the wrong mm, and in the worst case we perform a superflous 6.122 - * tlb flush. 6.123 - * 1a2) set cpu_tlbstate to TLBSTATE_OK 6.124 - * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 6.125 - * was in lazy tlb mode. 6.126 - * 1a3) update cpu_tlbstate[].active_mm 6.127 - * Now cpu0 accepts tlb flushes for the new mm. 6.128 - * 1a4) set_bit(cpu, &new_mm.cpu_vm_mask); 6.129 - * Now the other cpus will send tlb flush ipis. 6.130 - * 1a4) change cr3. 6.131 - * 1b) thread switch without mm change 6.132 - * cpu_tlbstate[].active_mm is correct, cpu0 already handles 6.133 - * flush ipis. 6.134 - * 1b1) set cpu_tlbstate to TLBSTATE_OK 6.135 - * 1b2) test_and_set the cpu bit in cpu_vm_mask. 6.136 - * Atomically set the bit [other cpus will start sending flush ipis], 6.137 - * and test the bit. 6.138 - * 1b3) if the bit was 0: leave_mm was called, flush the tlb. 6.139 - * 2) switch %%esp, ie current 6.140 - * 6.141 - * The interrupt must handle 2 special cases: 6.142 - * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm. 6.143 - * - the cpu performs speculative tlb reads, i.e. even if the cpu only 6.144 - * runs in kernel space, the cpu could load tlb entries for user space 6.145 - * pages. 6.146 - * 6.147 - * The good news is that cpu_tlbstate is local to each cpu, no 6.148 - * write/read ordering problems. 6.149 - * 6.150 - * TLB flush IPI: 6.151 - * 6.152 - * 1) Flush the tlb entries if the cpu uses the mm that's being flushed. 6.153 - * 2) Leave the mm if we are in the lazy tlb mode. 6.154 - */ 6.155 - 6.156 static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; 6.157 static unsigned long flush_cpumask; 6.158 6.159 @@ -226,21 +162,19 @@ void flush_tlb_mask(unsigned long mask) 6.160 { 6.161 ASSERT(local_irq_is_enabled()); 6.162 6.163 - if ( mask & (1 << smp_processor_id()) ) 6.164 + if ( mask & (1UL << smp_processor_id()) ) 6.165 { 6.166 local_flush_tlb(); 6.167 - mask &= ~(1 << smp_processor_id()); 6.168 + mask &= ~(1UL << smp_processor_id()); 6.169 } 6.170 6.171 if ( mask != 0 ) 6.172 { 6.173 spin_lock(&flush_lock); 6.174 - 6.175 flush_cpumask = mask; 6.176 send_IPI_mask(mask, INVALIDATE_TLB_VECTOR); 6.177 while ( flush_cpumask != 0 ) 6.178 cpu_relax(); 6.179 - 6.180 spin_unlock(&flush_lock); 6.181 } 6.182 } 6.183 @@ -254,7 +188,8 @@ void new_tlbflush_clock_period(void) 6.184 if ( smp_num_cpus > 1 ) 6.185 { 6.186 spin_lock(&flush_lock); 6.187 - flush_cpumask = ((1 << smp_num_cpus) - 1) & ~(1 << smp_processor_id()); 6.188 + flush_cpumask = (1UL << smp_num_cpus) - 1; 6.189 + flush_cpumask &= ~(1UL << smp_processor_id()); 6.190 send_IPI_allbutself(INVALIDATE_TLB_VECTOR); 6.191 while ( flush_cpumask != 0 ) 6.192 cpu_relax(); 6.193 @@ -266,124 +201,138 @@ void new_tlbflush_clock_period(void) 6.194 tlbflush_clock++; 6.195 } 6.196 6.197 -static void flush_tlb_all_pge_ipi(void* info) 6.198 +static void flush_tlb_all_pge_ipi(void *info) 6.199 { 6.200 __flush_tlb_pge(); 6.201 } 6.202 6.203 void flush_tlb_all_pge(void) 6.204 { 6.205 - smp_call_function (flush_tlb_all_pge_ipi,0,1,1); 6.206 + smp_call_function(flush_tlb_all_pge_ipi, 0, 1, 1); 6.207 __flush_tlb_pge(); 6.208 } 6.209 6.210 void smp_send_event_check_mask(unsigned long cpu_mask) 6.211 { 6.212 - cpu_mask &= ~(1<<smp_processor_id()); 6.213 + cpu_mask &= ~(1UL << smp_processor_id()); 6.214 if ( cpu_mask != 0 ) 6.215 send_IPI_mask(cpu_mask, EVENT_CHECK_VECTOR); 6.216 } 6.217 6.218 /* 6.219 - * Structure and data for smp_call_function(). This is designed to minimise 6.220 - * static memory requirements. It also looks cleaner. 6.221 + * Structure and data for smp_call_function(). 6.222 */ 6.223 -static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; 6.224 6.225 struct call_data_struct { 6.226 void (*func) (void *info); 6.227 void *info; 6.228 - atomic_t started; 6.229 - atomic_t finished; 6.230 + unsigned long started; 6.231 + unsigned long finished; 6.232 int wait; 6.233 }; 6.234 6.235 -static struct call_data_struct * call_data; 6.236 +static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; 6.237 +static struct call_data_struct *call_data; 6.238 6.239 /* 6.240 - * this function sends a 'generic call function' IPI to all other CPUs 6.241 - * in the system. 6.242 + * Run a function on all other CPUs. 6.243 + * @func: The function to run. This must be fast and non-blocking. 6.244 + * @info: An arbitrary pointer to pass to the function. 6.245 + * @wait: If true, spin until function has completed on other CPUs. 6.246 + * Returns: 0 on success, else a negative status code. 6.247 */ 6.248 - 6.249 -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, 6.250 - int wait) 6.251 -/* 6.252 - * [SUMMARY] Run a function on all other CPUs. 6.253 - * <func> The function to run. This must be fast and non-blocking. 6.254 - * <info> An arbitrary pointer to pass to the function. 6.255 - * <nonatomic> currently unused. 6.256 - * <wait> If true, wait (atomically) until function has completed on other CPUs. 6.257 - * [RETURNS] 0 on success, else a negative status code. Does not return until 6.258 - * remote CPUs are nearly ready to execute <<func>> or are or have executed. 6.259 - * 6.260 - * You must not call this function with disabled interrupts or from a 6.261 - * hardware interrupt handler, or bottom halfs. 6.262 - */ 6.263 +int smp_call_function( 6.264 + void (*func) (void *info), void *info, int unused, int wait) 6.265 { 6.266 struct call_data_struct data; 6.267 - int cpus = smp_num_cpus-1; 6.268 + unsigned long cpuset; 6.269 6.270 - if (!cpus) 6.271 + ASSERT(local_irq_is_enabled()); 6.272 + 6.273 + cpuset = ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id()); 6.274 + if ( cpuset == 0 ) 6.275 return 0; 6.276 6.277 data.func = func; 6.278 data.info = info; 6.279 - atomic_set(&data.started, 0); 6.280 + data.started = data.finished = 0; 6.281 data.wait = wait; 6.282 - if (wait) 6.283 - atomic_set(&data.finished, 0); 6.284 - 6.285 - ASSERT(local_irq_is_enabled()); 6.286 6.287 spin_lock(&call_lock); 6.288 6.289 call_data = &data; 6.290 wmb(); 6.291 - /* Send a message to all other CPUs and wait for them to respond */ 6.292 + 6.293 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 6.294 6.295 - /* Wait for response */ 6.296 - while (atomic_read(&data.started) != cpus) 6.297 - barrier(); 6.298 - 6.299 - if (wait) 6.300 - while (atomic_read(&data.finished) != cpus) 6.301 - barrier(); 6.302 + while ( (wait ? data.finished : data.started) != cpuset ) 6.303 + cpu_relax(); 6.304 6.305 spin_unlock(&call_lock); 6.306 6.307 return 0; 6.308 } 6.309 6.310 -static void stop_this_cpu (void * dummy) 6.311 +/* Run a function on a subset of CPUs (may include local CPU). */ 6.312 +int smp_subset_call_function( 6.313 + void (*func) (void *info), void *info, int wait, unsigned long cpuset) 6.314 { 6.315 - /* 6.316 - * Remove this CPU: 6.317 - */ 6.318 - clear_bit(smp_processor_id(), &cpu_online_map); 6.319 - __cli(); 6.320 - disable_local_APIC(); 6.321 - for(;;) __asm__("hlt"); 6.322 + struct call_data_struct data; 6.323 + 6.324 + ASSERT(local_irq_is_enabled()); 6.325 + 6.326 + if ( cpuset & (1UL << smp_processor_id()) ) 6.327 + { 6.328 + local_irq_disable(); 6.329 + (*func)(info); 6.330 + local_irq_enable(); 6.331 + } 6.332 + 6.333 + cpuset &= ((1UL << smp_num_cpus) - 1) & ~(1UL << smp_processor_id()); 6.334 + if ( cpuset == 0 ) 6.335 + return 0; 6.336 + 6.337 + data.func = func; 6.338 + data.info = info; 6.339 + data.started = data.finished = 0; 6.340 + data.wait = wait; 6.341 + 6.342 + spin_lock(&call_lock); 6.343 + 6.344 + call_data = &data; 6.345 + wmb(); 6.346 + 6.347 + send_IPI_mask(cpuset, CALL_FUNCTION_VECTOR); 6.348 + 6.349 + while ( (wait ? data.finished : data.started) != cpuset ) 6.350 + cpu_relax(); 6.351 + 6.352 + spin_unlock(&call_lock); 6.353 + 6.354 + return 0; 6.355 } 6.356 6.357 -/* 6.358 - * this function calls the 'stop' function on all other CPUs in the system. 6.359 - */ 6.360 +static void stop_this_cpu (void *dummy) 6.361 +{ 6.362 + clear_bit(smp_processor_id(), &cpu_online_map); 6.363 + 6.364 + disable_local_APIC(); 6.365 + 6.366 + for ( ; ; ) 6.367 + __asm__ __volatile__ ( "hlt" ); 6.368 +} 6.369 6.370 void smp_send_stop(void) 6.371 { 6.372 + /* Stop all other CPUs in the system. */ 6.373 smp_call_function(stop_this_cpu, NULL, 1, 0); 6.374 smp_num_cpus = 1; 6.375 6.376 - __cli(); 6.377 + local_irq_disable(); 6.378 disable_local_APIC(); 6.379 - __sti(); 6.380 + local_irq_enable(); 6.381 } 6.382 6.383 -/* 6.384 - * Nothing to do, as all the work is done automatically when 6.385 - * we return from the interrupt. 6.386 - */ 6.387 asmlinkage void smp_event_check_interrupt(void) 6.388 { 6.389 ack_APIC_irq(); 6.390 @@ -394,23 +343,20 @@ asmlinkage void smp_call_function_interr 6.391 { 6.392 void (*func) (void *info) = call_data->func; 6.393 void *info = call_data->info; 6.394 - int wait = call_data->wait; 6.395 6.396 ack_APIC_irq(); 6.397 perfc_incrc(ipis); 6.398 6.399 - /* 6.400 - * Notify initiating CPU that I've grabbed the data and am 6.401 - * about to execute the function 6.402 - */ 6.403 - mb(); 6.404 - atomic_inc(&call_data->started); 6.405 - /* 6.406 - * At this point the info structure may be out of scope unless wait==1 6.407 - */ 6.408 - (*func)(info); 6.409 - if (wait) { 6.410 + if ( call_data->wait ) 6.411 + { 6.412 + (*func)(info); 6.413 mb(); 6.414 - atomic_inc(&call_data->finished); 6.415 + set_bit(smp_processor_id(), &call_data->finished); 6.416 + } 6.417 + else 6.418 + { 6.419 + mb(); 6.420 + set_bit(smp_processor_id(), &call_data->started); 6.421 + (*func)(info); 6.422 } 6.423 }
7.1 --- a/xen/arch/x86/x86_32/mm.c Tue Mar 29 14:52:44 2005 +0000 7.2 +++ b/xen/arch/x86/x86_32/mm.c Tue Mar 29 21:10:08 2005 +0000 7.3 @@ -180,22 +180,6 @@ void subarch_init_memory(struct domain * 7.4 } 7.5 } 7.6 7.7 -/* 7.8 - * Allows shooting down of borrowed page-table use on specific CPUs. 7.9 - * Specifically, we borrow page tables when running the idle domain. 7.10 - */ 7.11 -static void __synchronise_pagetables(void *mask) 7.12 -{ 7.13 - struct exec_domain *ed = current; 7.14 - if ( ((unsigned long)mask & (1 << ed->processor)) && 7.15 - is_idle_task(ed->domain) ) 7.16 - write_ptbase(ed); 7.17 -} 7.18 -void synchronise_pagetables(unsigned long cpu_mask) 7.19 -{ 7.20 - __synchronise_pagetables((void *)cpu_mask); 7.21 - smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1); 7.22 -} 7.23 7.24 long do_stack_switch(unsigned long ss, unsigned long esp) 7.25 {
8.1 --- a/xen/arch/x86/x86_64/mm.c Tue Mar 29 14:52:44 2005 +0000 8.2 +++ b/xen/arch/x86/x86_64/mm.c Tue Mar 29 21:10:08 2005 +0000 8.3 @@ -236,23 +236,6 @@ void subarch_init_memory(struct domain * 8.4 } 8.5 } 8.6 8.7 -/* 8.8 - * Allows shooting down of borrowed page-table use on specific CPUs. 8.9 - * Specifically, we borrow page tables when running the idle domain. 8.10 - */ 8.11 -static void __synchronise_pagetables(void *mask) 8.12 -{ 8.13 - struct exec_domain *ed = current; 8.14 - if ( ((unsigned long)mask & (1 << ed->processor)) && 8.15 - is_idle_task(ed->domain) ) 8.16 - write_ptbase(ed); 8.17 -} 8.18 -void synchronise_pagetables(unsigned long cpu_mask) 8.19 -{ 8.20 - __synchronise_pagetables((void *)cpu_mask); 8.21 - smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1); 8.22 -} 8.23 - 8.24 long do_stack_switch(unsigned long ss, unsigned long esp) 8.25 { 8.26 if ( (ss & 3) != 3 )
9.1 --- a/xen/common/dom0_ops.c Tue Mar 29 14:52:44 2005 +0000 9.2 +++ b/xen/common/dom0_ops.c Tue Mar 29 21:10:08 2005 +0000 9.3 @@ -266,7 +266,6 @@ long do_dom0_op(dom0_op_t *u_dom0_op) 9.4 else 9.5 { 9.6 exec_domain_pause(ed); 9.7 - synchronise_pagetables(~0UL); 9.8 if ( ed->processor != (cpu % smp_num_cpus) ) 9.9 set_bit(EDF_MIGRATED, &ed->ed_flags); 9.10 set_bit(EDF_CPUPINNED, &ed->ed_flags);
10.1 --- a/xen/common/page_alloc.c Tue Mar 29 14:52:44 2005 +0000 10.2 +++ b/xen/common/page_alloc.c Tue Mar 29 21:10:08 2005 +0000 10.3 @@ -534,8 +534,6 @@ void free_domheap_pages(struct pfn_info 10.4 { 10.5 int i, drop_dom_ref; 10.6 struct domain *d = page_get_owner(pg); 10.7 - struct exec_domain *ed; 10.8 - int cpu_mask = 0; 10.9 10.10 ASSERT(!in_irq()); 10.11 10.12 @@ -557,14 +555,11 @@ void free_domheap_pages(struct pfn_info 10.13 /* NB. May recursively lock from domain_relinquish_memory(). */ 10.14 spin_lock_recursive(&d->page_alloc_lock); 10.15 10.16 - for_each_exec_domain ( d, ed ) 10.17 - cpu_mask |= 1 << ed->processor; 10.18 - 10.19 for ( i = 0; i < (1 << order); i++ ) 10.20 { 10.21 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0); 10.22 pg[i].tlbflush_timestamp = tlbflush_current_time(); 10.23 - pg[i].u.free.cpu_mask = cpu_mask; 10.24 + pg[i].u.free.cpu_mask = d->cpuset; 10.25 list_del(&pg[i].list); 10.26 } 10.27
11.1 --- a/xen/common/schedule.c Tue Mar 29 14:52:44 2005 +0000 11.2 +++ b/xen/common/schedule.c Tue Mar 29 21:10:08 2005 +0000 11.3 @@ -192,7 +192,6 @@ void sched_add_domain(struct exec_domain 11.4 11.5 void sched_rem_domain(struct exec_domain *ed) 11.6 { 11.7 - 11.8 rem_ac_timer(&ed->timer); 11.9 SCHED_OP(rem_task, ed); 11.10 TRACE_3D(TRC_SCHED_DOM_REM, ed->domain->id, ed->eid, ed);
12.1 --- a/xen/include/asm-x86/mm.h Tue Mar 29 14:52:44 2005 +0000 12.2 +++ b/xen/include/asm-x86/mm.h Tue Mar 29 21:10:08 2005 +0000 12.3 @@ -206,12 +206,6 @@ static inline int get_page_and_type(stru 12.4 int check_descriptor(struct desc_struct *d); 12.5 12.6 /* 12.7 - * Use currently-executing domain's pagetables on the specified CPUs. 12.8 - * i.e., stop borrowing someone else's tables if you are the idle domain. 12.9 - */ 12.10 -void synchronise_pagetables(unsigned long cpu_mask); 12.11 - 12.12 -/* 12.13 * The MPT (machine->physical mapping table) is an array of word-sized 12.14 * values, indexed on machine frame number. It is expected that guest OSes 12.15 * will use it to store a "physical" frame number to give the appearance of
13.1 --- a/xen/include/public/xen.h Tue Mar 29 14:52:44 2005 +0000 13.2 +++ b/xen/include/public/xen.h Tue Mar 29 21:10:08 2005 +0000 13.3 @@ -124,11 +124,11 @@ 13.4 * ptr[:2] -- Machine address of new page-table base to install in MMU 13.5 * when in user space. 13.6 * 13.7 - * val[7:0] == MMUEXT_TLB_FLUSH: 13.8 - * No additional arguments. 13.9 + * val[7:0] == MMUEXT_TLB_FLUSH_LOCAL: 13.10 + * No additional arguments. Flushes local TLB. 13.11 * 13.12 - * val[7:0] == MMUEXT_INVLPG: 13.13 - * ptr[:2] -- Linear address to be flushed from the TLB. 13.14 + * val[7:0] == MMUEXT_INVLPG_LOCAL: 13.15 + * ptr[:2] -- Linear address to be flushed from the local TLB. 13.16 * 13.17 * val[7:0] == MMUEXT_FLUSH_CACHE: 13.18 * No additional arguments. Writes back and flushes cache contents. 13.19 @@ -154,6 +154,12 @@ 13.20 * val[7:0] == MMUEXT_REASSIGN_PAGE: 13.21 * ptr[:2] -- A machine address within the page to be reassigned to the FD. 13.22 * (NB. page must currently belong to the calling domain). 13.23 + * 13.24 + * val[7:0] == MMUEXT_TLB_FLUSH_MULTI: 13.25 + * Flush TLBs of VCPUs specified in @mask. 13.26 + * 13.27 + * val[7:0] == MMUEXT_INVLPG_MULTI: 13.28 + * ptr[:2] -- Linear address to be flushed from TLB of VCPUs in @mask. 13.29 */ 13.30 #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ 13.31 #define MMU_MACHPHYS_UPDATE 2 /* ptr = MA of frame to modify entry for */ 13.32 @@ -164,8 +170,8 @@ 13.33 #define MMUEXT_PIN_L4_TABLE 3 /* ptr = MA of frame to pin */ 13.34 #define MMUEXT_UNPIN_TABLE 4 /* ptr = MA of frame to unpin */ 13.35 #define MMUEXT_NEW_BASEPTR 5 /* ptr = MA of new pagetable base */ 13.36 -#define MMUEXT_TLB_FLUSH 6 /* ptr = NULL */ 13.37 -#define MMUEXT_INVLPG 7 /* ptr = VA to invalidate */ 13.38 +#define MMUEXT_TLB_FLUSH_LOCAL 6 /* ptr = NULL */ 13.39 +#define MMUEXT_INVLPG_LOCAL 7 /* ptr = VA to invalidate */ 13.40 #define MMUEXT_FLUSH_CACHE 8 13.41 #define MMUEXT_SET_LDT 9 /* ptr = VA of table; val = # entries */ 13.42 #define MMUEXT_SET_FOREIGNDOM 10 /* val[31:16] = dom */ 13.43 @@ -173,6 +179,8 @@ 13.44 #define MMUEXT_TRANSFER_PAGE 12 /* ptr = MA of frame; val[31:16] = dom */ 13.45 #define MMUEXT_REASSIGN_PAGE 13 13.46 #define MMUEXT_NEW_USER_BASEPTR 14 13.47 +#define MMUEXT_TLB_FLUSH_MULTI 15 /* ptr = NULL; mask = VCPUs to flush */ 13.48 +#define MMUEXT_INVLPG_MULTI 16 /* ptr = VA to inval.; mask = VCPUs */ 13.49 #define MMUEXT_CMD_MASK 255 13.50 #define MMUEXT_CMD_SHIFT 8 13.51 13.52 @@ -180,6 +188,9 @@ 13.53 #define UVMF_FLUSH_TLB 1 /* Flush entire TLB. */ 13.54 #define UVMF_INVLPG 2 /* Flush the VA mapping being updated. */ 13.55 13.56 +/* Backwards source compatibility. */ 13.57 +#define MMUEXT_TLB_FLUSH MMUEXT_TLB_FLUSH_LOCAL 13.58 +#define MMUEXT_INVLPG MMUEXT_INVLPG_LOCAL 13.59 13.60 /* 13.61 * Commands to HYPERVISOR_sched_op(). 13.62 @@ -257,8 +268,9 @@ typedef u16 domid_t; 13.63 */ 13.64 typedef struct 13.65 { 13.66 - memory_t ptr; /* Machine address of PTE. */ 13.67 - memory_t val; /* New contents of PTE. */ 13.68 + memory_t ptr; /* Machine address of PTE. */ 13.69 + memory_t val; /* New contents of PTE. */ 13.70 + /*unsigned long mask;*/ /* VCPU mask (certain extended commands). */ 13.71 } PACKED mmu_update_t; 13.72 13.73 /*
14.1 --- a/xen/include/xen/sched.h Tue Mar 29 14:52:44 2005 +0000 14.2 +++ b/xen/include/xen/sched.h Tue Mar 29 21:10:08 2005 +0000 14.3 @@ -143,6 +143,9 @@ struct domain 14.4 14.5 struct exec_domain *exec_domain[MAX_VIRT_CPUS]; 14.6 14.7 + /* Bitmask of CPUs on which this domain is running. */ 14.8 + unsigned long cpuset; 14.9 + 14.10 struct arch_domain arch; 14.11 }; 14.12 14.13 @@ -250,6 +253,12 @@ void init_idle_task(void); 14.14 void domain_wake(struct exec_domain *d); 14.15 void domain_sleep(struct exec_domain *d); 14.16 14.17 +/* 14.18 + * Force loading of currently-executing domain state on the specified set 14.19 + * of CPUs. This is used to counteract lazy state switching where required. 14.20 + */ 14.21 +void synchronise_lazy_execstate(unsigned long cpuset); 14.22 + 14.23 extern void context_switch( 14.24 struct exec_domain *prev, 14.25 struct exec_domain *next); 14.26 @@ -330,14 +339,21 @@ static inline void exec_domain_pause(str 14.27 ASSERT(ed != current); 14.28 atomic_inc(&ed->pausecnt); 14.29 domain_sleep(ed); 14.30 + synchronise_lazy_execstate(ed->domain->cpuset & (1UL << ed->processor)); 14.31 } 14.32 14.33 static inline void domain_pause(struct domain *d) 14.34 { 14.35 struct exec_domain *ed; 14.36 14.37 - for_each_exec_domain(d, ed) 14.38 - exec_domain_pause(ed); 14.39 + for_each_exec_domain( d, ed ) 14.40 + { 14.41 + ASSERT(ed != current); 14.42 + atomic_inc(&ed->pausecnt); 14.43 + domain_sleep(ed); 14.44 + } 14.45 + 14.46 + synchronise_lazy_execstate(d->cpuset); 14.47 } 14.48 14.49 static inline void exec_domain_unpause(struct exec_domain *ed) 14.50 @@ -351,7 +367,7 @@ static inline void domain_unpause(struct 14.51 { 14.52 struct exec_domain *ed; 14.53 14.54 - for_each_exec_domain(d, ed) 14.55 + for_each_exec_domain( d, ed ) 14.56 exec_domain_unpause(ed); 14.57 } 14.58 14.59 @@ -361,30 +377,26 @@ static inline void exec_domain_unblock(s 14.60 domain_wake(ed); 14.61 } 14.62 14.63 -static inline void domain_unblock(struct domain *d) 14.64 -{ 14.65 - struct exec_domain *ed; 14.66 - 14.67 - for_each_exec_domain(d, ed) 14.68 - exec_domain_unblock(ed); 14.69 -} 14.70 - 14.71 static inline void domain_pause_by_systemcontroller(struct domain *d) 14.72 { 14.73 struct exec_domain *ed; 14.74 14.75 - for_each_exec_domain(d, ed) { 14.76 + for_each_exec_domain ( d, ed ) 14.77 + { 14.78 ASSERT(ed != current); 14.79 if ( !test_and_set_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) 14.80 domain_sleep(ed); 14.81 } 14.82 + 14.83 + synchronise_lazy_execstate(d->cpuset); 14.84 } 14.85 14.86 static inline void domain_unpause_by_systemcontroller(struct domain *d) 14.87 { 14.88 struct exec_domain *ed; 14.89 14.90 - for_each_exec_domain(d, ed) { 14.91 + for_each_exec_domain ( d, ed ) 14.92 + { 14.93 if ( test_and_clear_bit(EDF_CTRLPAUSE, &ed->ed_flags) ) 14.94 domain_wake(ed); 14.95 }
15.1 --- a/xen/include/xen/smp.h Tue Mar 29 14:52:44 2005 +0000 15.2 +++ b/xen/include/xen/smp.h Tue Mar 29 21:10:08 2005 +0000 15.3 @@ -43,8 +43,10 @@ extern void smp_commence(void); 15.4 /* 15.5 * Call a function on all other processors 15.6 */ 15.7 -extern int smp_call_function (void (*func) (void *info), void *info, 15.8 - int retry, int wait); 15.9 +extern int smp_call_function( 15.10 + void (*func) (void *info), void *info, int retry, int wait); 15.11 +extern int smp_subset_call_function( 15.12 + void (*func) (void *info), void *info, int wait, unsigned long cpuset); 15.13 15.14 /* 15.15 * True once the per process idle is forked 15.16 @@ -84,7 +86,8 @@ extern volatile int smp_msg_id; 15.17 #define kernel_lock() 15.18 #define cpu_logical_map(cpu) 0 15.19 #define cpu_number_map(cpu) 0 15.20 -#define smp_call_function(func,info,retry,wait) ({ 0; }) 15.21 +#define smp_call_function(func,info,retry,wait) 0 15.22 +#define smp_subset_call_function(f,i,w,c) ({ if ( (c&1) ) (*f)(i); 0; }) 15.23 #define cpu_online_map 1 15.24 15.25 #endif