ia64/xen-unstable

changeset 16334:74b40a9f4c0a

[IA64] vti save-restore: clean up arch_get/set_info_guest()

- Update comment in copy_rbs()
- Don't warn when rbs_size = 0 for cpu initialization case.
- Remove struct vcpu_guest_context_regs::rbs_nat member which isn't used.
and add num_phys_stacked to struct vcpu_guest_context_regs.
so far rbs_nat and rbs_rnat isn't, so it is allowed to change the offset
of rbs_rnat.
- Add check when setting vRR[].
- Don't set vRR[] if val is zero.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Wed Nov 07 10:19:21 2007 -0700 (2007-11-07)
parents 828cb584c1cc
children 91575bb23d07
files xen/arch/ia64/xen/domain.c xen/include/public/arch-ia64.h
line diff
     1.1 --- a/xen/arch/ia64/xen/domain.c	Wed Nov 07 10:10:20 2007 -0700
     1.2 +++ b/xen/arch/ia64/xen/domain.c	Wed Nov 07 10:19:21 2007 -0700
     1.3 @@ -636,6 +636,7 @@ int arch_vcpu_reset(struct vcpu *v)
     1.4  	return 0;
     1.5  }
     1.6  
     1.7 +/* Here it is assumed that all of the CPUs has same RSE.N_STACKED_PHYS */
     1.8  static unsigned long num_phys_stacked;
     1.9  static int __init
    1.10  init_num_phys_stacked(void)
    1.11 @@ -822,8 +823,9 @@ void arch_get_info_guest(struct vcpu *v,
    1.12  	COPY_FPREG(&c.nat->regs.f[30], &sw->f30);
    1.13  	COPY_FPREG(&c.nat->regs.f[31], &sw->f31);
    1.14  
    1.15 -	for (i = 0; i < 96; i++)
    1.16 -		COPY_FPREG(&c.nat->regs.f[i + 32], &v->arch._thread.fph[i]);
    1.17 +	// f32 - f127
    1.18 +	memcpy(&c.nat->regs.f[32], &v->arch._thread.fph[0],
    1.19 +	       sizeof(v->arch._thread.fph));
    1.20  
    1.21  #define NATS_UPDATE(reg)						\
    1.22  	nats_update(&c.nat->regs.nats, (reg),				\
    1.23 @@ -939,6 +941,8 @@ void arch_get_info_guest(struct vcpu *v,
    1.24  		c.nat->regs.rbs_rnat &= ~((1UL << bottom_slot) - 1);
    1.25  	}
    1.26  
    1.27 +	c.nat->regs.num_phys_stacked = num_phys_stacked;
    1.28 +
    1.29  	if (VMX_DOMAIN(v))
    1.30  		c.nat->privregs_pfn = VGC_PRIVREGS_HVM;
    1.31  	else
    1.32 @@ -1101,7 +1105,6 @@ copy_rbs(struct vcpu* v, unsigned long* 
    1.33  	if (((unsigned long)dst_bsp & ~PAGE_MASK) > KERNEL_STACK_SIZE / 2)
    1.34  		goto out;
    1.35  
    1.36 -	//XXX TODO
    1.37  	// ia64_copy_rbs() uses real cpu's stack register.
    1.38  	// So it may fault with an Illigal Operation fault resulting
    1.39  	// in panic if rbs_size is too large to load compared to
    1.40 @@ -1113,10 +1116,9 @@ copy_rbs(struct vcpu* v, unsigned long* 
    1.41  	// we need to copy them by hand without loadrs and flushrs
    1.42  	// However even if we implement that, similar issue still occurs
    1.43  	// when running guest. CPU context restore routine issues loadrs
    1.44 -	// resulting in Illegal Operation fault. For such a case,
    1.45 -	// we need to emulate RSE store.
    1.46 -	// So it would be better to implement only RSE store emulation
    1.47 -	// and copy stacked registers directly into guest RBS.
    1.48 +	// resulting in Illegal Operation fault. And what if the vRSE is in
    1.49 +	// enforced lazy mode? We can't store any dirty stacked registers
    1.50 +	// into RBS without cover or br.call.
    1.51  	if (num_regs > num_phys_stacked) {
    1.52  		rc = -ENOSYS;
    1.53  		gdprintk(XENLOG_WARNING,
    1.54 @@ -1240,10 +1242,6 @@ int arch_set_info_guest(struct vcpu *v, 
    1.55  	
    1.56  	uregs->pr = c.nat->regs.pr;
    1.57  	uregs->b0 = c.nat->regs.b[0];
    1.58 -	if (((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
    1.59 -		gdprintk(XENLOG_INFO,
    1.60 -			 "rbs stack offset is different! xen 0x%x given 0x%x",
    1.61 -			 (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
    1.62  	num_regs = ia64_rse_num_regs((unsigned long*)c.nat->regs.ar.bspstore,
    1.63  				     (unsigned long*)c.nat->regs.ar.bsp);
    1.64  	rbs_size = (unsigned long)ia64_rse_skip_regs(rbs_bottom, num_regs) -
    1.65 @@ -1254,6 +1252,11 @@ int arch_set_info_guest(struct vcpu *v, 
    1.66  			 rbs_size, sizeof (c.nat->regs.rbs));
    1.67  		return -EINVAL;
    1.68  	}
    1.69 +	if (rbs_size > 0 &&
    1.70 +	    ((IA64_RBS_OFFSET / 8) % 64) != c.nat->regs.rbs_voff)
    1.71 +		gdprintk(XENLOG_INFO,
    1.72 +			 "rbs stack offset is different! xen 0x%x given 0x%x",
    1.73 +			 (IA64_RBS_OFFSET / 8) % 64, c.nat->regs.rbs_voff);
    1.74  	
    1.75  	/* Protection against crazy user code.  */
    1.76  	if (!was_initialised)
    1.77 @@ -1281,6 +1284,49 @@ int arch_set_info_guest(struct vcpu *v, 
    1.78  		}
    1.79  	}
    1.80  
    1.81 +	// inhibit save/restore between cpus of different RSE.N_STACKED_PHYS.
    1.82 +	// to avoid nasty issues.
    1.83 +	// 
    1.84 +	// The number of physical stacked general register(RSE.N_STACKED_PHYS)
    1.85 +	// isn't virtualized. Guest OS utilizes it via PAL_RSE_INFO call and
    1.86 +	// the value might be exported to user/user process.
    1.87 +	// (Linux does via /proc/cpuinfo)
    1.88 +	// The SDM says only that the number is cpu implementation specific.
    1.89 +	//
    1.90 +	// If the number of restoring cpu is different from one of saving cpu,
    1.91 +	// the following, or something worse, might happen.
    1.92 +	// - Xen VMM itself may panic when issuing loadrs to run guest with
    1.93 +	//   illegal operation fault
    1.94 +	//   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
    1.95 +	//   restoring CPU
    1.96 +	//   This case is detected to refuse restore by rbs_copy()
    1.97 +	// - guest kernel may panic with illegal operation fault
    1.98 +	//   When RSE.N_STACKED_PHYS of saving CPU > RSE.N_STACKED_PHYS of
    1.99 +	//   restoring CPU
   1.100 +	// - infomation leak from guest kernel to user process
   1.101 +	//   When RSE.N_STACKED_PHYS of saving CPU < RSE.N_STACKED_PHYS of
   1.102 +	//   restoring CPU
   1.103 +	//   Before returning to user process, kernel should zero clear all
   1.104 +	//   physical stacked resgisters to prevent kernel bits leak.
   1.105 +	//   It would be based on RSE.N_STACKED_PHYS (Linux does.).
   1.106 +	//   On the restored environtment the kernel clears only a part
   1.107 +	//   of the physical stacked registers.
   1.108 +	// - user processes or human operators would be confused.
   1.109 +	//   RSE.N_STACKED_PHYS might be exported to user process or human
   1.110 +	//   operators. Actually on linux it is exported via /proc/cpuinfo.
   1.111 +	//   user processes might use it.
   1.112 +	//   I don't know any concrete example, but it's possible in theory.
   1.113 +	//   e.g. thread libraly may allocate RBS area based on the value.
   1.114 +	//        (Fortunately glibc nptl doesn't)
   1.115 +	if (c.nat->regs.num_phys_stacked != 0 && /* COMPAT */
   1.116 +	    c.nat->regs.num_phys_stacked != num_phys_stacked) {
   1.117 +		gdprintk(XENLOG_WARNING,
   1.118 +			 "num phys stacked is different! "
   1.119 +			 "xen 0x%lx given 0x%lx",
   1.120 +			 num_phys_stacked, c.nat->regs.num_phys_stacked);
   1.121 +		return -EINVAL;
   1.122 +	}
   1.123 +
   1.124  	uregs->r1 = c.nat->regs.r[1];
   1.125  	uregs->r12 = c.nat->regs.r[12];
   1.126  	uregs->r13 = c.nat->regs.r[13];
   1.127 @@ -1342,9 +1388,9 @@ int arch_set_info_guest(struct vcpu *v, 
   1.128  	COPY_FPREG(&sw->f30, &c.nat->regs.f[30]);
   1.129  	COPY_FPREG(&sw->f31, &c.nat->regs.f[31]);
   1.130  
   1.131 -	for (i = 0; i < 96; i++)
   1.132 -		COPY_FPREG(&v->arch._thread.fph[i], &c.nat->regs.f[i + 32]);
   1.133 -
   1.134 +	// f32 - f127
   1.135 +	memcpy(&v->arch._thread.fph[0], &c.nat->regs.f[32],
   1.136 +	       sizeof(v->arch._thread.fph));
   1.137  
   1.138  #define UNAT_UPDATE(reg)					\
   1.139  	unat_update(&uregs->eml_unat, &uregs->r ## reg,		\
   1.140 @@ -1439,20 +1485,21 @@ int arch_set_info_guest(struct vcpu *v, 
   1.141  
   1.142  	/* rr[] must be set before setting itrs[] dtrs[] */
   1.143  	for (i = 0; i < 8; i++) {
   1.144 -		//XXX TODO integrity check.
   1.145 -		//    if invalid value is given, 
   1.146 -		//    vmx_load_all_rr() and load_region_regs()
   1.147 -		//    result in General exception, reserved register/field
   1.148 -		//    failt causing panicing xen.
   1.149 +		unsigned long rrval = c.nat->regs.rr[i];
   1.150 +		unsigned long reg = (unsigned long)i << 61;
   1.151 +		IA64FAULT fault = IA64_NO_FAULT;
   1.152 +
   1.153 +		if (rrval == 0)
   1.154 +			continue;
   1.155  		if (d->arch.is_vti) {
   1.156  			//without VGCF_EXTRA_REGS check,
   1.157  			//VTi domain doesn't boot.
   1.158  			if (c.nat->flags & VGCF_EXTRA_REGS)
   1.159 -				vmx_vcpu_set_rr(v, (unsigned long)i << 61,
   1.160 -						c.nat->regs.rr[i]);
   1.161 +				fault = vmx_vcpu_set_rr(v, reg, rrval);
   1.162  		} else
   1.163 -			vcpu_set_rr(v, (unsigned long)i << 61,
   1.164 -				    c.nat->regs.rr[i]);
   1.165 +			fault = vcpu_set_rr(v, reg, rrval);
   1.166 +		if (fault != IA64_NO_FAULT)
   1.167 +			return -EINVAL;
   1.168  	}
   1.169  
   1.170  	if (c.nat->flags & VGCF_EXTRA_REGS) {
     2.1 --- a/xen/include/public/arch-ia64.h	Wed Nov 07 10:10:20 2007 -0700
     2.2 +++ b/xen/include/public/arch-ia64.h	Wed Nov 07 10:19:21 2007 -0700
     2.3 @@ -417,8 +417,14 @@ struct vcpu_guest_context_regs {
     2.4           */
     2.5          unsigned int rbs_voff;
     2.6          unsigned long rbs[2048];
     2.7 -        unsigned long rbs_nat;
     2.8          unsigned long rbs_rnat;
     2.9 +
    2.10 +        /*
    2.11 +         * RSE.N_STACKED_PHYS via PAL_RSE_INFO
    2.12 +         * Strictly this isn't cpu context, but this value is necessary
    2.13 +         * for domain save/restore. So is here.
    2.14 +         */
    2.15 +        unsigned long num_phys_stacked;
    2.16  };
    2.17  
    2.18  struct vcpu_guest_context {