put_page(page);
}
+static bool_t inc_linear_entries(struct page_info *pg)
+{
+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
+
+ do {
+ /*
+ * The check below checks for the "linear use" count being non-zero
+ * as well as overflow. Signed integer overflow is undefined behavior
+ * according to the C spec. However, as long as linear_pt_count is
+ * smaller in size than 'int', the arithmetic operation of the
+ * increment below won't overflow; rather the result will be truncated
+ * when stored. Ensure that this is always true.
+ */
+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
+ oc = nc++;
+ if ( nc <= 0 )
+ return 0;
+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
+ } while ( oc != nc );
+
+ return 1;
+}
+
+static void dec_linear_entries(struct page_info *pg)
+{
+ typeof(pg->linear_pt_count) oc;
+
+ oc = arch_fetch_and_add(&pg->linear_pt_count, -1);
+ ASSERT(oc > 0);
+}
+
+static bool_t inc_linear_uses(struct page_info *pg)
+{
+ typeof(pg->linear_pt_count) nc = read_atomic(&pg->linear_pt_count), oc;
+
+ do {
+ /* See the respective comment in inc_linear_entries(). */
+ BUILD_BUG_ON(sizeof(nc) >= sizeof(int));
+ oc = nc--;
+ if ( nc >= 0 )
+ return 0;
+ nc = cmpxchg(&pg->linear_pt_count, oc, nc);
+ } while ( oc != nc );
+
+ return 1;
+}
+
+static void dec_linear_uses(struct page_info *pg)
+{
+ typeof(pg->linear_pt_count) oc;
+
+ oc = arch_fetch_and_add(&pg->linear_pt_count, 1);
+ ASSERT(oc < 0);
+}
+
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
\
if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \
{ \
+ struct page_info *ptpg = mfn_to_page(pde_pfn); \
+ \
+ /* Make sure the page table belongs to the correct domain. */ \
+ if ( unlikely(page_get_owner(ptpg) != d) ) \
+ return 0; \
+ \
/* Make sure the mapped frame belongs to the correct domain. */ \
if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
return 0; \
\
/* \
- * Ensure that the mapped frame is an already-validated page table. \
+ * Ensure that the mapped frame is an already-validated page table \
+ * and is not itself having linear entries, as well as that the \
+ * containing page table is not iself in use as a linear page table \
+ * elsewhere. \
* If so, atomically increment the count (checking for overflow). \
*/ \
page = mfn_to_page(pfn); \
+ if ( !inc_linear_entries(ptpg) ) \
+ { \
+ put_page(page); \
+ return 0; \
+ } \
+ if ( !inc_linear_uses(page) ) \
+ { \
+ dec_linear_entries(ptpg); \
+ put_page(page); \
+ return 0; \
+ } \
y = page->u.inuse.type_info; \
do { \
x = y; \
unlikely((x & (PGT_type_mask|PGT_validated)) != \
(PGT_##level##_page_table|PGT_validated)) ) \
{ \
+ dec_linear_uses(page); \
+ dec_linear_entries(ptpg); \
put_page(page); \
return 0; \
} \
l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
} while ( 0 )
+static int _put_page_type(struct page_info *page, bool_t preemptible,
+ struct page_info *ptpg);
+
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
{
unsigned long pfn = l1e_get_pfn(l1e);
if ( l2e_get_flags(l2e) & _PAGE_PSE )
put_superpage(l2e_get_pfn(l2e));
else
- put_page_and_type(l2e_get_page(l2e));
+ {
+ struct page_info *pg = l2e_get_page(l2e);
+ int rc = _put_page_type(pg, 0, mfn_to_page(pfn));
+
+ ASSERT(!rc);
+ put_page(pg);
+ }
return 0;
}
-static int __put_page_type(struct page_info *, int preemptible);
-
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
int partial, bool_t defer)
{
struct page_info *pg;
+ int rc;
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
return 1;
if ( unlikely(partial > 0) )
{
ASSERT(!defer);
- return __put_page_type(pg, 1);
+ return _put_page_type(pg, 1, mfn_to_page(pfn));
}
if ( defer )
{
+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
current->arch.old_guest_table = pg;
return 0;
}
- return put_page_and_type_preemptible(pg);
+ rc = _put_page_type(pg, 1, mfn_to_page(pfn));
+ if ( likely(!rc) )
+ put_page(pg);
+
+ return rc;
}
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
int partial, bool_t defer)
{
+ int rc = 1;
+
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
{
if ( unlikely(partial > 0) )
{
ASSERT(!defer);
- return __put_page_type(pg, 1);
+ return _put_page_type(pg, 1, mfn_to_page(pfn));
}
if ( defer )
{
+ current->arch.old_guest_ptpg = mfn_to_page(pfn);
current->arch.old_guest_table = pg;
return 0;
}
- return put_page_and_type_preemptible(pg);
+ rc = _put_page_type(pg, 1, mfn_to_page(pfn));
+ if ( likely(!rc) )
+ put_page(pg);
}
- return 1;
+
+ return rc;
}
static int alloc_l1_table(struct page_info *page)
{
page->nr_validated_ptes = i;
page->partial_pte = 0;
+ current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
while ( i-- > 0 )
{
if ( current->arch.old_guest_table )
page->nr_validated_ptes++;
+ current->arch.old_guest_ptpg = NULL;
current->arch.old_guest_table = page;
}
}
}
-static int __put_final_page_type(
- struct page_info *page, unsigned long type, int preemptible)
+static int _put_final_page_type(struct page_info *page, unsigned long type,
+ bool_t preemptible, struct page_info *ptpg)
{
int rc = free_page_type(page, type, preemptible);
/* No need for atomic update of type_info here: noone else updates it. */
if ( rc == 0 )
{
+ if ( ptpg && PGT_type_equal(type, ptpg->u.inuse.type_info) )
+ {
+ dec_linear_uses(page);
+ dec_linear_entries(ptpg);
+ }
+ ASSERT(!page->linear_pt_count || page_get_owner(page)->is_dying);
/*
* Record TLB information for flush later. We do not stamp page tables
* when running in shadow mode:
}
-static int __put_page_type(struct page_info *page,
- int preemptible)
+static int _put_page_type(struct page_info *page, bool_t preemptible,
+ struct page_info *ptpg)
{
unsigned long nx, x, y = page->u.inuse.type_info;
int rc = 0;
x, nx)) != x) )
continue;
/* We cleared the 'valid bit' so we do the clean up. */
- rc = __put_final_page_type(page, x, preemptible);
+ rc = _put_final_page_type(page, x, preemptible, ptpg);
+ ptpg = NULL;
if ( x & PGT_partial )
put_page(page);
break;
}
+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
+ {
+ /*
+ * page_set_tlbflush_timestamp() accesses the same union
+ * linear_pt_count lives in. Unvalidated page table pages,
+ * however, should occur during domain destruction only
+ * anyway. Updating of linear_pt_count luckily is not
+ * necessary anymore for a dying domain.
+ */
+ ASSERT(page_get_owner(page)->is_dying);
+ ASSERT(page->linear_pt_count < 0);
+ ASSERT(ptpg->linear_pt_count > 0);
+ ptpg = NULL;
+ }
+
/*
* Record TLB information for flush later. We do not stamp page
* tables when running in shadow mode:
return -EINTR;
}
+ if ( ptpg && PGT_type_equal(x, ptpg->u.inuse.type_info) )
+ {
+ ASSERT(!rc);
+ dec_linear_uses(page);
+ dec_linear_entries(ptpg);
+ }
+
return rc;
}
page->nr_validated_ptes = 0;
page->partial_pte = 0;
}
+ page->linear_pt_count = 0;
rc = alloc_page_type(page, type, preemptible);
}
void put_page_type(struct page_info *page)
{
- int rc = __put_page_type(page, 0);
+ int rc = _put_page_type(page, 0, NULL);
ASSERT(rc == 0);
(void)rc;
}
int put_page_type_preemptible(struct page_info *page)
{
- return __put_page_type(page, 1);
+ return _put_page_type(page, 1, NULL);
}
int get_page_type_preemptible(struct page_info *page, unsigned long type)
if ( !v->arch.old_guest_table )
return 0;
- switch ( rc = put_page_and_type_preemptible(v->arch.old_guest_table) )
+ switch ( rc = _put_page_type(v->arch.old_guest_table, 1,
+ v->arch.old_guest_ptpg) )
{
case -EINTR:
case -ERESTART:
return -ERESTART;
+ case 0:
+ put_page(v->arch.old_guest_table);
}
v->arch.old_guest_table = NULL;
case -EINTR:
rc = -ERESTART;
case -ERESTART:
+ curr->arch.old_guest_ptpg = NULL;
curr->arch.old_guest_table = page;
break;
default:
if ( type == PGT_l1_page_table )
put_page_and_type(page);
else
+ {
+ curr->arch.old_guest_ptpg = NULL;
curr->arch.old_guest_table = page;
+ }
}
}
{
case -EINTR:
case -ERESTART:
+ curr->arch.old_guest_ptpg = NULL;
curr->arch.old_guest_table = page;
rc = 0;
break;
case -EINTR:
rc = -ERESTART;
case -ERESTART:
+ curr->arch.old_guest_ptpg = NULL;
curr->arch.old_guest_table = page;
okay = 0;
break;
u32 tlbflush_timestamp;
/*
- * When PGT_partial is true then this field is valid and indicates
- * that PTEs in the range [0, @nr_validated_ptes) have been validated.
- * An extra page reference must be acquired (or not dropped) whenever
- * PGT_partial gets set, and it must be dropped when the flag gets
- * cleared. This is so that a get() leaving a page in partially
+ * When PGT_partial is true then the first two fields are valid and
+ * indicate that PTEs in the range [0, @nr_validated_ptes) have been
+ * validated. An extra page reference must be acquired (or not dropped)
+ * whenever PGT_partial gets set, and it must be dropped when the flag
+ * gets cleared. This is so that a get() leaving a page in partially
* validated state (where the caller would drop the reference acquired
* due to the getting of the type [apparently] failing [-ERESTART])
* would not accidentally result in a page left with zero general
* put_page_from_lNe() (due to the apparent failure), and hence it
* must be dropped when the put operation is resumed (and completes),
* but it must not be acquired if picking up the page for validation.
+ *
+ * The 3rd field, @linear_pt_count, indicates
+ * - by a positive value, how many same-level page table entries a page
+ * table has,
+ * - by a negative value, in how many same-level page tables a page is
+ * in use.
*/
struct {
- u16 nr_validated_ptes;
- s8 partial_pte;
+ u16 nr_validated_ptes:PAGETABLE_ORDER + 1;
+ u16 :16 - PAGETABLE_ORDER - 1 - 2;
+ s16 partial_pte:2;
+ s16 linear_pt_count;
};
/*
#define PGT_count_width PG_shift(9)
#define PGT_count_mask ((1UL<<PGT_count_width)-1)
+/* Are the 'type mask' bits identical? */
+#define PGT_type_equal(x, y) (!(((x) ^ (y)) & PGT_type_mask))
+
/* Cleared when the owning guest 'frees' this page. */
#define _PGC_allocated PG_shift(1)
#define PGC_allocated PG_mask(1, 1)