ia64/xen-unstable

changeset 13469:d12ea0bfecce

[IA64] Implement eager save, lazy restore FP algorithm

Signed-off-by: Anthony Xu <anthony.xu@intel.com>
author awilliam@xenbuild2.aw
date Tue Jan 23 10:52:07 2007 -0700 (2007-01-23)
parents 8bfc6e85eeba
children b194a9f3eba2
files xen/arch/ia64/asm-offsets.c xen/arch/ia64/asm-xsi-offsets.c xen/arch/ia64/vmx/optvfault.S xen/arch/ia64/vmx/vmx_process.c xen/arch/ia64/vmx/vmx_vcpu.c xen/arch/ia64/xen/domain.c xen/arch/ia64/xen/faults.c xen/arch/ia64/xen/hyperprivop.S xen/arch/ia64/xen/vcpu.c xen/include/asm-ia64/domain.h xen/include/asm-ia64/linux-xen/asm/percpu.h xen/include/asm-ia64/vcpu.h xen/include/public/arch-ia64.h
line diff
     1.1 --- a/xen/arch/ia64/asm-offsets.c	Tue Jan 23 09:45:26 2007 -0700
     1.2 +++ b/xen/arch/ia64/asm-offsets.c	Tue Jan 23 10:52:07 2007 -0700
     1.3 @@ -57,6 +57,7 @@ void foo(void)
     1.4  
     1.5  	DEFINE(IA64_VCPU_DOMAIN_OFFSET, offsetof (struct vcpu, domain));
     1.6  	DEFINE(IA64_VCPU_HYPERCALL_CONTINUATION_OFS, offsetof (struct vcpu, arch.hypercall_continuation));
     1.7 +	DEFINE(IA64_VCPU_FP_PSR_OFFSET, offsetof (struct vcpu, arch.fp_psr));
     1.8  	DEFINE(IA64_VCPU_META_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_rr0));
     1.9  	DEFINE(IA64_VCPU_META_SAVED_RR0_OFFSET, offsetof (struct vcpu, arch.metaphysical_saved_rr0));
    1.10  	DEFINE(IA64_VCPU_BREAKIMM_OFFSET, offsetof (struct vcpu, arch.breakimm));
     2.1 --- a/xen/arch/ia64/asm-xsi-offsets.c	Tue Jan 23 09:45:26 2007 -0700
     2.2 +++ b/xen/arch/ia64/asm-xsi-offsets.c	Tue Jan 23 10:52:07 2007 -0700
     2.3 @@ -61,6 +61,8 @@ void foo(void)
     2.4  	DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir);
     2.5  	DEFINE_MAPPED_REG_OFS(XSI_ITV_OFS, itv);
     2.6  	DEFINE_MAPPED_REG_OFS(XSI_PTA_OFS, pta);
     2.7 +	DEFINE_MAPPED_REG_OFS(XSI_VPSR_DFH_OFS, vpsr_dfh);
     2.8 +	DEFINE_MAPPED_REG_OFS(XSI_HPSR_DFH_OFS, hpsr_dfh);
     2.9  	DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled);
    2.10  	DEFINE_MAPPED_REG_OFS(XSI_VPSR_PP_OFS, vpsr_pp);
    2.11  	DEFINE_MAPPED_REG_OFS(XSI_METAPHYS_OFS, metaphysical_mode);
     3.1 --- a/xen/arch/ia64/vmx/optvfault.S	Tue Jan 23 09:45:26 2007 -0700
     3.2 +++ b/xen/arch/ia64/vmx/optvfault.S	Tue Jan 23 10:52:07 2007 -0700
     3.3 @@ -192,6 +192,13 @@ GLOBAL_ENTRY(vmx_asm_rsm)
     3.4      ;;   
     3.5      st8 [r17]=r19
     3.6      and r20=r20,r28
     3.7 +    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
     3.8 +    ;;
     3.9 +    ld8 r27=[r27]
    3.10 +    ;;
    3.11 +    tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
    3.12 +    ;;
    3.13 +    (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
    3.14      ;;
    3.15      mov cr.ipsr=r20
    3.16      tbit.nz p6,p0=r23,0
    3.17 @@ -360,6 +367,14 @@ vmx_asm_mov_to_psr_1:
    3.18      add r20=r19,r20
    3.19      mov b0=r24
    3.20      ;;
    3.21 +    adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
    3.22 +    ;;
    3.23 +    ld8 r27=[r27]
    3.24 +    ;;
    3.25 +    tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
    3.26 +    ;;
    3.27 +    (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
    3.28 +    ;;
    3.29      mov cr.ipsr=r20
    3.30      cmp.ne p6,p0=r0,r0
    3.31      ;;
     4.1 --- a/xen/arch/ia64/vmx/vmx_process.c	Tue Jan 23 09:45:26 2007 -0700
     4.2 +++ b/xen/arch/ia64/vmx/vmx_process.c	Tue Jan 23 10:52:07 2007 -0700
     4.3 @@ -79,36 +79,56 @@ static u64 vec2off[68] = {0x0,0x400,0x80
     4.4  
     4.5  
     4.6  void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
     4.7 -                              u64 vector, REGS *regs)
     4.8 +                              u64 vec, REGS *regs)
     4.9  {
    4.10 -    u64 status;
    4.11 +    u64 status, vector;
    4.12      VCPU *vcpu = current;
    4.13      u64 vpsr = VCPU(vcpu, vpsr);
    4.14 -    vector=vec2off[vector];
    4.15 +    
    4.16 +    vector = vec2off[vec];
    4.17      if(!(vpsr&IA64_PSR_IC)&&(vector!=IA64_DATA_NESTED_TLB_VECTOR)){
    4.18          panic_domain(regs, "Guest nested fault vector=%lx!\n", vector);
    4.19      }
    4.20 -    else{ // handle fpswa emulation
    4.21 +
    4.22 +    switch (vec) {
    4.23 +
    4.24 +    case 25:	// IA64_DISABLED_FPREG_VECTOR
    4.25 +
    4.26 +        if (FP_PSR(vcpu) & IA64_PSR_DFH) {
    4.27 +            FP_PSR(vcpu) = IA64_PSR_MFH;
    4.28 +            if (__ia64_per_cpu_var(fp_owner) != vcpu)
    4.29 +                __ia64_load_fpu(vcpu->arch._thread.fph);
    4.30 +        }
    4.31 +        if (!(VCPU(vcpu, vpsr) & IA64_PSR_DFH)) {
    4.32 +            regs->cr_ipsr &= ~IA64_PSR_DFH;
    4.33 +            return;
    4.34 +        }
    4.35 +
    4.36 +        break;       
    4.37 +        
    4.38 +    case 32:	// IA64_FP_FAULT_VECTOR
    4.39 +        // handle fpswa emulation
    4.40          // fp fault
    4.41 -        if (vector == IA64_FP_FAULT_VECTOR) {
    4.42 -            status = handle_fpu_swa(1, regs, isr);
    4.43 -            if (!status) {
    4.44 -                vcpu_increment_iip(vcpu);
    4.45 -                return;
    4.46 -            } else if (IA64_RETRY == status)
    4.47 -                return;
    4.48 -        }
    4.49 +        status = handle_fpu_swa(1, regs, isr);
    4.50 +        if (!status) {
    4.51 +            vcpu_increment_iip(vcpu);
    4.52 +            return;
    4.53 +        } else if (IA64_RETRY == status)
    4.54 +            return;
    4.55 +        break;
    4.56 +
    4.57 +    case 33:	// IA64_FP_TRAP_VECTOR
    4.58          //fp trap
    4.59 -        else if (vector == IA64_FP_TRAP_VECTOR) {
    4.60 -            status = handle_fpu_swa(0, regs, isr);
    4.61 -            if (!status)
    4.62 -                return;
    4.63 -            else if (IA64_RETRY == status) {
    4.64 -                vcpu_decrement_iip(vcpu);
    4.65 -                return;
    4.66 -            }
    4.67 +        status = handle_fpu_swa(0, regs, isr);
    4.68 +        if (!status)
    4.69 +            return;
    4.70 +        else if (IA64_RETRY == status) {
    4.71 +            vcpu_decrement_iip(vcpu);
    4.72 +            return;
    4.73          }
    4.74 -    }
    4.75 +        break;
    4.76 +    
    4.77 +    } 
    4.78      VCPU(vcpu,isr)=isr;
    4.79      VCPU(vcpu,iipa) = regs->cr_iip;
    4.80      if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
     5.1 --- a/xen/arch/ia64/vmx/vmx_vcpu.c	Tue Jan 23 09:45:26 2007 -0700
     5.2 +++ b/xen/arch/ia64/vmx/vmx_vcpu.c	Tue Jan 23 10:52:07 2007 -0700
     5.3 @@ -141,6 +141,9 @@ vmx_vcpu_set_psr(VCPU *vcpu, unsigned lo
     5.4  
     5.5      regs->cr_ipsr = (regs->cr_ipsr & mask ) | ( value & (~mask) );
     5.6  
     5.7 +    if (FP_PSR(vcpu) & IA64_PSR_DFH)
     5.8 +        regs->cr_ipsr |= IA64_PSR_DFH;
     5.9 +
    5.10      check_mm_mode_switch(vcpu, old_psr, new_psr);
    5.11      return ;
    5.12  }
     6.1 --- a/xen/arch/ia64/xen/domain.c	Tue Jan 23 09:45:26 2007 -0700
     6.2 +++ b/xen/arch/ia64/xen/domain.c	Tue Jan 23 10:52:07 2007 -0700
     6.3 @@ -68,6 +68,8 @@ static void init_switch_stack(struct vcp
     6.4  DEFINE_PER_CPU(uint8_t *, current_psr_i_addr);
     6.5  DEFINE_PER_CPU(int *, current_psr_ic_addr);
     6.6  
     6.7 +DEFINE_PER_CPU(struct vcpu *, fp_owner);
     6.8 +
     6.9  #include <xen/sched-if.h>
    6.10  
    6.11  static void
    6.12 @@ -135,12 +137,44 @@ static void flush_vtlb_for_context_switc
    6.13  	}
    6.14  }
    6.15  
    6.16 +static void lazy_fp_switch(struct vcpu *prev, struct vcpu *next)
    6.17 +{
    6.18 +	/*
    6.19 +	 * Implement eager save, lazy restore
    6.20 +	 */
    6.21 +	if (!is_idle_vcpu(prev)) {
    6.22 +		if (VMX_DOMAIN(prev)) {
    6.23 +			if (FP_PSR(prev) & IA64_PSR_MFH) {
    6.24 +				__ia64_save_fpu(prev->arch._thread.fph);
    6.25 +				__ia64_per_cpu_var(fp_owner) = prev;
    6.26 +			}
    6.27 +		} else {
    6.28 +			if (PSCB(prev, hpsr_mfh)) {
    6.29 +				__ia64_save_fpu(prev->arch._thread.fph);
    6.30 +				__ia64_per_cpu_var(fp_owner) = prev;
    6.31 +			}
    6.32 +		}
    6.33 +	}
    6.34 +
    6.35 +	if (!is_idle_vcpu(next)) {
    6.36 +		if (VMX_DOMAIN(next)) {
    6.37 +			FP_PSR(next) = IA64_PSR_DFH;
    6.38 +			vcpu_regs(next)->cr_ipsr |= IA64_PSR_DFH;
    6.39 +		} else {
    6.40 +			PSCB(next, hpsr_dfh) = 1;
    6.41 +			PSCB(next, hpsr_mfh) = 0;
    6.42 +			vcpu_regs(next)->cr_ipsr |= IA64_PSR_DFH;
    6.43 +		}
    6.44 +	}
    6.45 +}
    6.46 +
    6.47  void schedule_tail(struct vcpu *prev)
    6.48  {
    6.49  	extern char ia64_ivt;
    6.50 -	context_saved(prev);
    6.51  
    6.52 +	context_saved(prev);
    6.53  	ia64_disable_vhpt_walker();
    6.54 +
    6.55  	if (VMX_DOMAIN(current)) {
    6.56  		vmx_do_launch(current);
    6.57  		migrate_timer(&current->arch.arch_vmx.vtm.vtm_timer,
    6.58 @@ -148,7 +182,7 @@ void schedule_tail(struct vcpu *prev)
    6.59  	} else {
    6.60  		ia64_set_iva(&ia64_ivt);
    6.61  		load_region_regs(current);
    6.62 -        	ia64_set_pta(vcpu_pta(current));
    6.63 +		ia64_set_pta(vcpu_pta(current));
    6.64  		vcpu_load_kernel_regs(current);
    6.65  		__ia64_per_cpu_var(current_psr_i_addr) = &current->domain->
    6.66  		  shared_info->vcpu_info[current->vcpu_id].evtchn_upcall_mask;
    6.67 @@ -165,64 +199,67 @@ void context_switch(struct vcpu *prev, s
    6.68  
    6.69      local_irq_save(spsr);
    6.70  
    6.71 -    if (!is_idle_domain(prev->domain)) 
    6.72 -        __ia64_save_fpu(prev->arch._thread.fph);
    6.73 -    if (!is_idle_domain(next->domain)) 
    6.74 -        __ia64_load_fpu(next->arch._thread.fph);
    6.75 -
    6.76      if (VMX_DOMAIN(prev)) {
    6.77 -	vmx_save_state(prev);
    6.78 -	if (!VMX_DOMAIN(next)) {
    6.79 -	    /* VMX domains can change the physical cr.dcr.
    6.80 -	     * Restore default to prevent leakage. */
    6.81 -	    ia64_setreg(_IA64_REG_CR_DCR, (IA64_DCR_DP | IA64_DCR_DK
    6.82 -	                   | IA64_DCR_DX | IA64_DCR_DR | IA64_DCR_PP
    6.83 -	                   | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
    6.84 -	}
    6.85 +        vmx_save_state(prev);
    6.86 +        if (!VMX_DOMAIN(next)) {
    6.87 +            /* VMX domains can change the physical cr.dcr.
    6.88 +             * Restore default to prevent leakage. */
    6.89 +            ia64_setreg(_IA64_REG_CR_DCR, (IA64_DCR_DP | IA64_DCR_DK
    6.90 +                           | IA64_DCR_DX | IA64_DCR_DR | IA64_DCR_PP
    6.91 +                           | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
    6.92 +        }
    6.93      }
    6.94      if (VMX_DOMAIN(next))
    6.95 -	vmx_load_state(next);
    6.96 +        vmx_load_state(next);
    6.97  
    6.98      ia64_disable_vhpt_walker();
    6.99 -    /*ia64_psr(ia64_task_regs(next))->dfh = !ia64_is_local_fpu_owner(next);*/
   6.100 +    lazy_fp_switch(prev, current);
   6.101 +
   6.102      prev = ia64_switch_to(next);
   6.103  
   6.104      /* Note: ia64_switch_to does not return here at vcpu initialization.  */
   6.105  
   6.106 -    //cpu_set(smp_processor_id(), current->domain->domain_dirty_cpumask);
   6.107 - 
   6.108 -    if (VMX_DOMAIN(current)){
   6.109 -	vmx_load_all_rr(current);
   6.110 -	migrate_timer(&current->arch.arch_vmx.vtm.vtm_timer,
   6.111 -	              current->processor);
   6.112 +    if (VMX_DOMAIN(current)) {
   6.113 +        vmx_load_all_rr(current);
   6.114 +        migrate_timer(&current->arch.arch_vmx.vtm.vtm_timer,
   6.115 +                      current->processor);
   6.116      } else {
   6.117 -	struct domain *nd;
   6.118 -    	extern char ia64_ivt;
   6.119 +        struct domain *nd;
   6.120 +        extern char ia64_ivt;
   6.121  
   6.122 -    	ia64_set_iva(&ia64_ivt);
   6.123 +        ia64_set_iva(&ia64_ivt);
   6.124  
   6.125 -	nd = current->domain;
   6.126 -    	if (!is_idle_domain(nd)) {
   6.127 -	    	load_region_regs(current);
   6.128 -		ia64_set_pta(vcpu_pta(current));
   6.129 -	    	vcpu_load_kernel_regs(current);
   6.130 -		vcpu_set_next_timer(current);
   6.131 -		if (vcpu_timer_expired(current))
   6.132 -			vcpu_pend_timer(current);
   6.133 -		__ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info->
   6.134 -		  vcpu_info[current->vcpu_id].evtchn_upcall_mask;
   6.135 -		__ia64_per_cpu_var(current_psr_ic_addr) =
   6.136 -		  (int *)(nd->arch.shared_info_va + XSI_PSR_IC_OFS);
   6.137 -    	} else {
   6.138 -		/* When switching to idle domain, only need to disable vhpt
   6.139 -		 * walker. Then all accesses happen within idle context will
   6.140 -		 * be handled by TR mapping and identity mapping.
   6.141 -		 */
   6.142 -		__ia64_per_cpu_var(current_psr_i_addr) = NULL;
   6.143 -		__ia64_per_cpu_var(current_psr_ic_addr) = NULL;
   6.144 +        nd = current->domain;
   6.145 +        if (!is_idle_domain(nd)) {
   6.146 +            load_region_regs(current);
   6.147 +            ia64_set_pta(vcpu_pta(current));
   6.148 +            vcpu_load_kernel_regs(current);
   6.149 +            vcpu_set_next_timer(current);
   6.150 +            if (vcpu_timer_expired(current))
   6.151 +                vcpu_pend_timer(current);
   6.152 +            __ia64_per_cpu_var(current_psr_i_addr) = &nd->shared_info->
   6.153 +                vcpu_info[current->vcpu_id].evtchn_upcall_mask;
   6.154 +            __ia64_per_cpu_var(current_psr_ic_addr) =
   6.155 +                (int *)(nd->arch.shared_info_va + XSI_PSR_IC_OFS);
   6.156 +        } else {
   6.157 +            /* When switching to idle domain, only need to disable vhpt
   6.158 +             * walker. Then all accesses happen within idle context will
   6.159 +             * be handled by TR mapping and identity mapping.
   6.160 +             */
   6.161 +            __ia64_per_cpu_var(current_psr_i_addr) = NULL;
   6.162 +            __ia64_per_cpu_var(current_psr_ic_addr) = NULL;
   6.163          }
   6.164      }
   6.165      local_irq_restore(spsr);
   6.166 +
   6.167 +    /* lazy fp */
   6.168 +    if (current->processor != current->arch.last_processor) {
   6.169 +        unsigned long *addr;
   6.170 +        addr = (unsigned long *)per_cpu_addr(fp_owner,
   6.171 +                                             current->arch.last_processor);
   6.172 +        ia64_cmpxchg(acq, addr, current, 0, 8);
   6.173 +    }
   6.174 +   
   6.175      flush_vtlb_for_context_switch(prev, current);
   6.176      context_saved(prev);
   6.177  }
     7.1 --- a/xen/arch/ia64/xen/faults.c	Tue Jan 23 09:45:26 2007 -0700
     7.2 +++ b/xen/arch/ia64/xen/faults.c	Tue Jan 23 10:52:07 2007 -0700
     7.3 @@ -92,6 +92,9 @@ void reflect_interruption(unsigned long 
     7.4  	regs->cr_iip = ((unsigned long)PSCBX(v, iva) + vector) & ~0xffUL;
     7.5  	regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
     7.6  
     7.7 +	if (PSCB(v, hpsr_dfh))
     7.8 +		regs->cr_ipsr |= IA64_PSR_DFH;  
     7.9 +	PSCB(v, vpsr_dfh) = 0;
    7.10  	v->vcpu_info->evtchn_upcall_mask = 1;
    7.11  	PSCB(v, interrupt_collection_enabled) = 0;
    7.12  
    7.13 @@ -152,6 +155,9 @@ void reflect_event(void)
    7.14  	regs->cr_iip = v->arch.event_callback_ip;
    7.15  	regs->cr_ipsr = (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
    7.16  
    7.17 +	if (PSCB(v, hpsr_dfh))
    7.18 +		regs->cr_ipsr |= IA64_PSR_DFH;
    7.19 +	PSCB(v, vpsr_dfh) = 0;
    7.20  	v->vcpu_info->evtchn_upcall_mask = 1;
    7.21  	PSCB(v, interrupt_collection_enabled) = 0;
    7.22  }
    7.23 @@ -261,6 +267,10 @@ void ia64_do_page_fault(unsigned long ad
    7.24  		    ((unsigned long)PSCBX(current, iva) + fault) & ~0xffUL;
    7.25  		regs->cr_ipsr =
    7.26  		    (regs->cr_ipsr & ~DELIVER_PSR_CLR) | DELIVER_PSR_SET;
    7.27 +
    7.28 +		if (PSCB(current, hpsr_dfh))
    7.29 +			regs->cr_ipsr |= IA64_PSR_DFH;  
    7.30 +		PSCB(current, vpsr_dfh) = 0;
    7.31  		perfc_incra(slow_reflect, fault >> 8);
    7.32  		return;
    7.33  	}
    7.34 @@ -608,6 +618,16 @@ ia64_handle_reflection(unsigned long ifa
    7.35  		vector = IA64_GENEX_VECTOR;
    7.36  		break;
    7.37  	case 25:
    7.38 +		if (PSCB(v, hpsr_dfh)) {
    7.39 +			PSCB(v, hpsr_dfh) = 0;
    7.40 +			PSCB(v, hpsr_mfh) = 1;
    7.41 +			if (__ia64_per_cpu_var(fp_owner) != v)
    7.42 +				__ia64_load_fpu(v->arch._thread.fph);
    7.43 +		}
    7.44 +		if (!PSCB(v, vpsr_dfh)) {
    7.45 +			regs->cr_ipsr &= ~IA64_PSR_DFH;
    7.46 +			return;
    7.47 +		}
    7.48  		vector = IA64_DISABLED_FPREG_VECTOR;
    7.49  		break;
    7.50  	case 26:
     8.1 --- a/xen/arch/ia64/xen/hyperprivop.S	Tue Jan 23 09:45:26 2007 -0700
     8.2 +++ b/xen/arch/ia64/xen/hyperprivop.S	Tue Jan 23 10:52:07 2007 -0700
     8.3 @@ -252,6 +252,10 @@ ENTRY(hyper_ssm_i)
     8.4  	movl r27=~DELIVER_PSR_CLR;;
     8.5  	or r29=r29,r28;;
     8.6  	and r29=r29,r27;;
     8.7 +	// set hpsr_dfh to ipsr
     8.8 +	adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
     8.9 +	ld1 r28=[r28];;
    8.10 +	dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
    8.11  	mov cr.ipsr=r29;;
    8.12  	// set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
    8.13  	extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
    8.14 @@ -269,6 +273,12 @@ ENTRY(hyper_ssm_i)
    8.15  	movl r22=THIS_CPU(current_psr_i_addr)
    8.16  	adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
    8.17  	ld8 r22=[r22]
    8.18 +	;;
    8.19 +	adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.20 +	ld1 r28=[r27];;
    8.21 +	st1 [r27]=r0
    8.22 +	dep r30=r28,r30,IA64_PSR_DFH_BIT,1
    8.23 +	;;
    8.24  	st8 [r21]=r30;;
    8.25  	// set shared_mem interrupt_delivery_enabled to 0
    8.26  	// set shared_mem interrupt_collection_enabled to 0
    8.27 @@ -607,6 +617,10 @@ ENTRY(fast_reflect)
    8.28  	movl r27=~(DELIVER_PSR_CLR|IA64_PSR_CPL0);;
    8.29  	or r29=r29,r28;;
    8.30  	and r29=r29,r27;;
    8.31 +	// set hpsr_dfh to ipsr
    8.32 +	adds r28=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.33 +	ld1 r28=[r28];;
    8.34 +	dep r29=r28,r29,IA64_PSR_DFH_BIT,1;;
    8.35  	mov cr.ipsr=r29;;
    8.36  	// set shared_mem ipsr (from ipsr in r30 with ipsr.ri already set)
    8.37  	extr.u r29=r30,IA64_PSR_CPL0_BIT,2;;
    8.38 @@ -629,7 +643,13 @@ ENTRY(fast_reflect)
    8.39  (p6)	dep r30=0,r30,IA64_PSR_I_BIT,1
    8.40  (p7)	dep r30=-1,r30,IA64_PSR_I_BIT,1 ;;
    8.41  	mov r22=1
    8.42 -	adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 ;;
    8.43 +	adds r21=XSI_IPSR_OFS-XSI_PSR_IC_OFS,r18 
    8.44 +	;;
    8.45 +	adds r27=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.46 +	ld1 r28=[r27];;
    8.47 +	st1 [r27]=r0
    8.48 +	dep r30=r28,r30,IA64_PSR_DFH_BIT,1
    8.49 +	;;
    8.50  	st8 [r21]=r30 ;;
    8.51  	// set shared_mem interrupt_delivery_enabled to 0
    8.52  	// set shared_mem interrupt_collection_enabled to 0
    8.53 @@ -1104,6 +1124,18 @@ just_do_rfi:
    8.54  	;;
    8.55  	or r21=r21,r20
    8.56  	;;
    8.57 +	adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.58 +	tbit.z p8,p9 = r21, IA64_PSR_DFH_BIT
    8.59 +	;;
    8.60 +	(p9) mov r27=1;;
    8.61 +	(p9) st1 [r20]=r27
    8.62 +	;;
    8.63 +	(p8) st1 [r20]=r0
    8.64 +	(p8) adds r20=XSI_HPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.65 +	(p8) ld1 r27=[r20]
    8.66 +	;;
    8.67 +	(p8) dep r21=r27,r21, IA64_PSR_DFH_BIT, 1
    8.68 +	;;
    8.69  	mov cr.ipsr=r21
    8.70  	adds r20=XSI_BANKNUM_OFS-XSI_PSR_IC_OFS,r18 ;;
    8.71  	ld4 r21=[r20];;
    8.72 @@ -1489,6 +1521,11 @@ ENTRY(hyper_get_psr)
    8.73  	ld1 r21=[r20];;
    8.74  	dep r8=r21,r8,IA64_PSR_I_BIT,1
    8.75  	;;
    8.76 +	// set vpsr.dfh
    8.77 +	adds r20=XSI_VPSR_DFH_OFS-XSI_PSR_IC_OFS,r18;;
    8.78 +	ld1 r21=[r20];;
    8.79 +	dep r8=r21,r8,IA64_PSR_DFH_BIT,1
    8.80 +	;;
    8.81  	mov r25=cr.iip
    8.82  	extr.u r26=r24,41,2 ;;
    8.83  	cmp.eq p6,p7=2,r26 ;;
     9.1 --- a/xen/arch/ia64/xen/vcpu.c	Tue Jan 23 09:45:26 2007 -0700
     9.2 +++ b/xen/arch/ia64/xen/vcpu.c	Tue Jan 23 10:52:07 2007 -0700
     9.3 @@ -141,6 +141,9 @@ void vcpu_init_regs(struct vcpu *v)
     9.4  		/* dt/rt/it:1;i/ic:1, si:1, vm/bn:1, ac:1 */
     9.5  		/* Need to be expanded as macro */
     9.6  		regs->cr_ipsr = 0x501008826008;
     9.7 +		/* lazy fp */
     9.8 +		FP_PSR(v) = IA64_PSR_DFH;
     9.9 +		regs->cr_ipsr |= IA64_PSR_DFH;
    9.10  	} else {
    9.11  		regs->cr_ipsr = ia64_getreg(_IA64_REG_PSR)
    9.12  		    | IA64_PSR_BITS_TO_SET | IA64_PSR_BN;
    9.13 @@ -148,6 +151,10 @@ void vcpu_init_regs(struct vcpu *v)
    9.14  				   | IA64_PSR_RI | IA64_PSR_IS);
    9.15  		// domain runs at PL2
    9.16  		regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT;
    9.17 +		// lazy fp 
    9.18 +		PSCB(v, hpsr_dfh) = 1;
    9.19 +		PSCB(v, hpsr_mfh) = 0;
    9.20 +		regs->cr_ipsr |= IA64_PSR_DFH;
    9.21  	}
    9.22  	regs->cr_ifs = 1UL << 63;	/* or clear? */
    9.23  	regs->ar_fpsr = FPSR_DEFAULT;
    9.24 @@ -265,8 +272,10 @@ IA64FAULT vcpu_reset_psr_sm(VCPU * vcpu,
    9.25  		      IA64_PSR_I | IA64_PSR_IC | IA64_PSR_DT |
    9.26  		      IA64_PSR_DFL | IA64_PSR_DFH))
    9.27  		return IA64_ILLOP_FAULT;
    9.28 -	if (imm.dfh)
    9.29 -		ipsr->dfh = 0;
    9.30 +	if (imm.dfh) {
    9.31 +		ipsr->dfh = PSCB(vcpu, hpsr_dfh);
    9.32 +		PSCB(vcpu, vpsr_dfh) = 0;
    9.33 +	}
    9.34  	if (imm.dfl)
    9.35  		ipsr->dfl = 0;
    9.36  	if (imm.pp) {
    9.37 @@ -320,8 +329,10 @@ IA64FAULT vcpu_set_psr_sm(VCPU * vcpu, u
    9.38  	    IA64_PSR_DT | IA64_PSR_DFL | IA64_PSR_DFH;
    9.39  	if (imm24 & ~mask)
    9.40  		return IA64_ILLOP_FAULT;
    9.41 -	if (imm.dfh)
    9.42 +	if (imm.dfh) {
    9.43 +		PSCB(vcpu, vpsr_dfh) = 1;
    9.44  		ipsr->dfh = 1;
    9.45 +	} 
    9.46  	if (imm.dfl)
    9.47  		ipsr->dfl = 1;
    9.48  	if (imm.pp) {
    9.49 @@ -386,8 +397,13 @@ IA64FAULT vcpu_set_psr_l(VCPU * vcpu, u6
    9.50  	//if (val & ~(IA64_PSR_PP | IA64_PSR_UP | IA64_PSR_SP))
    9.51  	//	return IA64_ILLOP_FAULT;
    9.52  	// however trying to set other bits can't be an error as it is in ssm
    9.53 -	if (newpsr.dfh)
    9.54 +	if (newpsr.dfh) {
    9.55  		ipsr->dfh = 1;
    9.56 +		PSCB(vcpu, vpsr_dfh) = 1;
    9.57 +	} else {
    9.58 +		ipsr->dfh = PSCB(vcpu, hpsr_dfh);
    9.59 +		PSCB(vcpu, vpsr_dfh) = 0;
    9.60 +	}       
    9.61  	if (newpsr.dfl)
    9.62  		ipsr->dfl = 1;
    9.63  	if (newpsr.pp) {
    9.64 @@ -466,6 +482,8 @@ IA64FAULT vcpu_get_psr(VCPU * vcpu, u64 
    9.65  		newpsr.pp = 1;
    9.66  	else
    9.67  		newpsr.pp = 0;
    9.68 +	newpsr.dfh = PSCB(vcpu, vpsr_dfh);
    9.69 +
    9.70  	*pval = *(unsigned long *)&newpsr;
    9.71  	*pval &= (MASK(0, 32) | MASK(35, 2));
    9.72  	return IA64_NO_FAULT;
    9.73 @@ -497,6 +515,7 @@ u64 vcpu_get_ipsr_int_state(VCPU * vcpu,
    9.74  	psr.ia64_psr.ic = PSCB(vcpu, interrupt_collection_enabled);
    9.75  	psr.ia64_psr.i = !vcpu->vcpu_info->evtchn_upcall_mask;
    9.76  	psr.ia64_psr.bn = PSCB(vcpu, banknum);
    9.77 +	psr.ia64_psr.dfh = PSCB(vcpu, vpsr_dfh);
    9.78  	psr.ia64_psr.dt = 1;
    9.79  	psr.ia64_psr.it = 1;
    9.80  	psr.ia64_psr.rt = 1;
    9.81 @@ -1343,6 +1362,12 @@ IA64FAULT vcpu_rfi(VCPU * vcpu)
    9.82  	if (psr.ia64_psr.cpl < 3)
    9.83  		psr.ia64_psr.cpl = 2;
    9.84  	int_enable = psr.ia64_psr.i;
    9.85 +	if (psr.ia64_psr.dfh) {
    9.86 +		PSCB(vcpu, vpsr_dfh) = 1;
    9.87 +	} else {
    9.88 +		psr.ia64_psr.dfh = PSCB(vcpu, hpsr_dfh);
    9.89 +		PSCB(vcpu, vpsr_dfh) = 0;
    9.90 +	}
    9.91  	if (psr.ia64_psr.ic)
    9.92  		PSCB(vcpu, interrupt_collection_enabled) = 1;
    9.93  	if (psr.ia64_psr.dt && psr.ia64_psr.rt && psr.ia64_psr.it)
    10.1 --- a/xen/include/asm-ia64/domain.h	Tue Jan 23 09:45:26 2007 -0700
    10.2 +++ b/xen/include/asm-ia64/domain.h	Tue Jan 23 10:52:07 2007 -0700
    10.3 @@ -175,6 +175,7 @@ struct arch_vcpu {
    10.4      unsigned long metaphysical_rr4;		// from arch_domain (so is pinned)
    10.5      unsigned long metaphysical_saved_rr0;	// from arch_domain (so is pinned)
    10.6      unsigned long metaphysical_saved_rr4;	// from arch_domain (so is pinned)
    10.7 +    unsigned long fp_psr;       // used for lazy float register
    10.8      int breakimm;			// from arch_domain (so is pinned)
    10.9      int starting_rid;		/* first RID assigned to domain */
   10.10      int ending_rid;		/* one beyond highest RID assigned to domain */
    11.1 --- a/xen/include/asm-ia64/linux-xen/asm/percpu.h	Tue Jan 23 09:45:26 2007 -0700
    11.2 +++ b/xen/include/asm-ia64/linux-xen/asm/percpu.h	Tue Jan 23 10:52:07 2007 -0700
    11.3 @@ -43,6 +43,9 @@ DECLARE_PER_CPU(unsigned long, local_per
    11.4  
    11.5  #define per_cpu(var, cpu)  (*RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
    11.6  #define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, __ia64_per_cpu_var(local_per_cpu_offset)))
    11.7 +#ifdef XEN
    11.8 +#define per_cpu_addr(var, cpu)  (RELOC_HIDE(&per_cpu__##var, __per_cpu_offset[cpu]))
    11.9 +#endif
   11.10  
   11.11  extern void percpu_modcopy(void *pcpudst, const void *src, unsigned long size);
   11.12  extern void setup_per_cpu_areas (void);
   11.13 @@ -67,6 +70,8 @@ extern void *per_cpu_init(void);
   11.14   */
   11.15  #define __ia64_per_cpu_var(var)	(per_cpu__##var)
   11.16  
   11.17 +DECLARE_PER_CPU(struct vcpu *, fp_owner);
   11.18 +
   11.19  #endif /* !__ASSEMBLY__ */
   11.20  
   11.21  #endif /* _ASM_IA64_PERCPU_H */
    12.1 --- a/xen/include/asm-ia64/vcpu.h	Tue Jan 23 09:45:26 2007 -0700
    12.2 +++ b/xen/include/asm-ia64/vcpu.h	Tue Jan 23 10:52:07 2007 -0700
    12.3 @@ -20,6 +20,7 @@ extern u64 cycle_to_ns(u64 cycle);
    12.4  #define VCPU(_v,_x)	(_v->arch.privregs->_x)
    12.5  #define PSCB(_v,_x)	VCPU(_v,_x)
    12.6  #define PSCBX(_v,_x)	(_v->arch._x)
    12.7 +#define FP_PSR(_v)	PSCBX(_v, fp_psr)
    12.8  
    12.9  #define SPURIOUS_VECTOR 0xf
   12.10  
    13.1 --- a/xen/include/public/arch-ia64.h	Tue Jan 23 09:45:26 2007 -0700
    13.2 +++ b/xen/include/public/arch-ia64.h	Tue Jan 23 10:52:07 2007 -0700
    13.3 @@ -288,7 +288,9 @@ struct mapped_regs {
    13.4              unsigned char *interrupt_mask_addr;
    13.5              int pending_interruption;
    13.6              unsigned char vpsr_pp;
    13.7 -            unsigned char reserved5_2[3];
    13.8 +            unsigned char vpsr_dfh;
    13.9 +            unsigned char hpsr_dfh;
   13.10 +            unsigned char hpsr_mfh;
   13.11              unsigned long reserved5_1[4];
   13.12              int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual
   13.13              int banknum; // 0 or 1, which virtual register bank is active