ia64/xen-unstable

view xen/arch/x86/mm/guest_walk.c @ 18830:c2a018cdb45d

Fix PSE PAT handling in guest walk.

Guest walk was currently checking for _PAGE_PSE_PAT flag in
guest_l2e_get_flags(). The problem is that this function only checks
for the first 12 bits of the PDE, while _PAGE_PSE_PAT is actually on bit
12 (that is the 13th bit). This caused _PAGE_PAT bit to never been set on
splintered L1s.

Signed-off-by: Gianluca Guida <gianluca.guida@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Nov 25 11:17:57 2008 +0000 (2008-11-25)
parents 7fb33d15dc9b
children
line source
1 /******************************************************************************
2 * arch/x86/mm/guest_walk.c
3 *
4 * Pagetable walker for guest memory accesses.
5 *
6 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 */
25 #include <xen/types.h>
26 #include <xen/mm.h>
27 #include <xen/paging.h>
28 #include <xen/domain_page.h>
29 #include <xen/sched.h>
30 #include <asm/page.h>
31 #include <asm/guest_pt.h>
34 /* Flags that are needed in a pagetable entry, with the sense of NX inverted */
35 static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
36 {
37 static uint32_t flags[] = {
38 /* I/F - Usr Wr */
39 /* 0 0 0 0 */ _PAGE_PRESENT,
40 /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW,
41 /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER,
42 /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
43 /* 0 1 0 0 */ _PAGE_PRESENT,
44 /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW,
45 /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER,
46 /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
47 /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
48 /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
49 /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
50 /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
51 /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
52 /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
53 /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
54 /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
55 };
57 /* Don't demand not-NX if the CPU wouldn't enforce it. */
58 if ( !guest_supports_nx(v) )
59 pfec &= ~PFEC_insn_fetch;
61 /* Don't demand R/W if the CPU wouldn't enforce it. */
62 if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v))
63 && !(pfec & PFEC_user_mode) )
64 pfec &= ~PFEC_write_access;
66 return flags[(pfec & 0x1f) >> 1];
67 }
69 /* Modify a guest pagetable entry to set the Accessed and Dirty bits.
70 * Returns non-zero if it actually writes to guest memory. */
71 static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
72 {
73 guest_intpte_t old, new;
75 old = *(guest_intpte_t *)walk_p;
76 new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
77 if ( old != new )
78 {
79 /* Write the new entry into the walk, and try to write it back
80 * into the guest table as well. If the guest table has changed
81 * under out feet then leave it alone. */
82 *(guest_intpte_t *)walk_p = new;
83 if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
84 return 1;
85 }
86 return 0;
87 }
90 /* Walk the guest pagetables, after the manner of a hardware walker. */
91 uint32_t
92 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
93 uint32_t pfec, mfn_t top_mfn, void *top_map)
94 {
95 struct domain *d = v->domain;
96 p2m_type_t p2mt;
97 guest_l1e_t *l1p = NULL;
98 guest_l2e_t *l2p = NULL;
99 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
100 guest_l3e_t *l3p = NULL;
101 guest_l4e_t *l4p;
102 #endif
103 uint32_t gflags, mflags, rc = 0;
104 int pse;
106 perfc_incr(guest_walk);
107 memset(gw, 0, sizeof(*gw));
108 gw->va = va;
110 /* Mandatory bits that must be set in every entry. We invert NX, to
111 * calculate as if there were an "X" bit that allowed access.
112 * We will accumulate, in rc, the set of flags that are missing. */
113 mflags = mandatory_flags(v, pfec);
115 #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
116 #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
118 /* Get the l4e from the top level table and check its flags*/
119 gw->l4mfn = top_mfn;
120 l4p = (guest_l4e_t *) top_map;
121 gw->l4e = l4p[guest_l4_table_offset(va)];
122 gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
123 rc |= ((gflags & mflags) ^ mflags);
124 if ( rc & _PAGE_PRESENT ) goto out;
126 /* Map the l3 table */
127 gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
128 if ( !p2m_is_ram(p2mt) )
129 {
130 rc |= _PAGE_PRESENT;
131 goto out;
132 }
133 ASSERT(mfn_valid(mfn_x(gw->l3mfn)));
135 /* Get the l3e and check its flags*/
136 l3p = map_domain_page(mfn_x(gw->l3mfn));
137 gw->l3e = l3p[guest_l3_table_offset(va)];
138 gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
139 rc |= ((gflags & mflags) ^ mflags);
140 if ( rc & _PAGE_PRESENT )
141 goto out;
143 #else /* PAE only... */
145 /* Get the l3e and check its flag */
146 gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)];
147 if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) )
148 {
149 rc |= _PAGE_PRESENT;
150 goto out;
151 }
153 #endif /* PAE or 64... */
155 /* Map the l2 table */
156 gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
157 if ( !p2m_is_ram(p2mt) )
158 {
159 rc |= _PAGE_PRESENT;
160 goto out;
161 }
162 ASSERT(mfn_valid(mfn_x(gw->l2mfn)));
164 /* Get the l2e */
165 l2p = map_domain_page(mfn_x(gw->l2mfn));
166 gw->l2e = l2p[guest_l2_table_offset(va)];
168 #else /* 32-bit only... */
170 /* Get l2e from the top level table */
171 gw->l2mfn = top_mfn;
172 l2p = (guest_l2e_t *) top_map;
173 gw->l2e = l2p[guest_l2_table_offset(va)];
175 #endif /* All levels... */
177 gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
178 rc |= ((gflags & mflags) ^ mflags);
179 if ( rc & _PAGE_PRESENT )
180 goto out;
182 pse = (guest_supports_superpages(v) &&
183 (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE));
185 if ( pse )
186 {
187 /* Special case: this guest VA is in a PSE superpage, so there's
188 * no guest l1e. We make one up so that the propagation code
189 * can generate a shadow l1 table. Start with the gfn of the
190 * first 4k-page of the superpage. */
191 gfn_t start = guest_l2e_get_gfn(gw->l2e);
192 /* Grant full access in the l1e, since all the guest entry's
193 * access controls are enforced in the shadow l2e. */
194 int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
195 _PAGE_ACCESSED|_PAGE_DIRTY);
196 /* Import cache-control bits. Note that _PAGE_PAT is actually
197 * _PAGE_PSE, and it is always set. We will clear it in case
198 * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */
199 flags |= (guest_l2e_get_flags(gw->l2e)
200 & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
201 if ( !(gfn_x(start) & 1) )
202 /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
203 flags &= ~_PAGE_PAT;
205 /* Increment the pfn by the right number of 4k pages.
206 * The ~0x1 is to mask out the PAT bit mentioned above. */
207 start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
208 gw->l1e = guest_l1e_from_gfn(start, flags);
209 gw->l1mfn = _mfn(INVALID_MFN);
210 }
211 else
212 {
213 /* Not a superpage: carry on and find the l1e. */
214 gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
215 if ( !p2m_is_ram(p2mt) )
216 {
217 rc |= _PAGE_PRESENT;
218 goto out;
219 }
220 ASSERT(mfn_valid(mfn_x(gw->l1mfn)));
221 l1p = map_domain_page(mfn_x(gw->l1mfn));
222 gw->l1e = l1p[guest_l1_table_offset(va)];
223 gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
224 rc |= ((gflags & mflags) ^ mflags);
225 }
227 /* Go back and set accessed and dirty bits only if the walk was a
228 * success. Although the PRMs say higher-level _PAGE_ACCESSED bits
229 * get set whenever a lower-level PT is used, at least some hardware
230 * walkers behave this way. */
231 if ( rc == 0 )
232 {
233 #if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
234 if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
235 paging_mark_dirty(d, mfn_x(gw->l4mfn));
236 if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
237 paging_mark_dirty(d, mfn_x(gw->l3mfn));
238 #endif
239 if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
240 (pse && (pfec & PFEC_write_access))) )
241 paging_mark_dirty(d, mfn_x(gw->l2mfn));
242 if ( !pse )
243 {
244 if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
245 (pfec & PFEC_write_access)) )
246 paging_mark_dirty(d, mfn_x(gw->l1mfn));
247 }
248 }
250 out:
251 #if GUEST_PAGING_LEVELS == 4
252 if ( l3p ) unmap_domain_page(l3p);
253 #endif
254 #if GUEST_PAGING_LEVELS >= 3
255 if ( l2p ) unmap_domain_page(l2p);
256 #endif
257 if ( l1p ) unmap_domain_page(l1p);
259 return rc;
260 }