~~~~~~~
Xen uses several locks to serialize access to the internal grant table state.
- grant_table->lock : lock used to prevent readers from accessing
+ grant_table->lock : rwlock used to prevent readers from accessing
inconsistent grant table state such as current
version, partially initialized active table pages,
etc.
active_grant_entry->lock : spinlock used to serialize modifications to
active entries
- The primary lock for the grant table is a spinlock. All functions
- that access members of struct grant_table must acquire the lock
- around critical sections.
+ The primary lock for the grant table is a read/write spinlock. All
+ functions that access members of struct grant_table must acquire a
+ read lock around critical sections. Any modification to the members
+ of struct grant_table (e.g., nr_status_frames, nr_grant_frames,
+ active frames, etc.) must only be made if the write lock is
+ held. These elements are read-mostly, and read critical sections can
+ be large, which makes a rwlock a good choice.
The maptrack free list is protected by its own spinlock. The maptrack
lock may be locked while holding the grant table lock.
Active entries are obtained by calling active_entry_acquire(gt, ref).
This function returns a pointer to the active entry after locking its
- spinlock. The caller must hold the grant table lock for the gt in
- question before calling active_entry_acquire(). This is because the
- grant table can be dynamically extended via gnttab_grow_table() while
- a domain is running and must be fully initialized. Once all access to
- the active entry is complete, release the lock by calling
- active_entry_release(act).
+ spinlock. The caller must hold the grant table read lock before
+ calling active_entry_acquire(). This is because the grant table can
+ be dynamically extended via gnttab_grow_table() while a domain is
+ running and must be fully initialized. Once all access to the active
+ entry is complete, release the lock by calling active_entry_release(act).
Summary of rules for locking:
active_entry_acquire() and active_entry_release() can only be
- called when holding the relevant grant table's lock. I.e.:
- spin_lock(>->lock);
+ called when holding the relevant grant table's read lock. I.e.:
+ read_lock(>->lock);
act = active_entry_acquire(gt, ref);
...
active_entry_release(act);
- spin_unlock(>->lock);
+ read_unlock(>->lock);
Active entries cannot be acquired while holding the maptrack lock.
Multiple active entries can be acquired while holding the grant table
- lock.
+ _write_ lock.
+
+ Maptrack entries are protected by the corresponding active entry
+ lock. As an exception, new maptrack entries may be populated without
+ holding the lock, provided the flags field is written last. This
+ requires any maptrack entry user validates the flags field as
+ non-zero first.
********************************************************************************
{
struct active_grant_entry *act;
- ASSERT(spin_is_locked(&t->lock));
+ ASSERT(rw_is_locked(&t->lock));
act = &_active_entry(t, e);
spin_lock(&act->lock);
static inline void
double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
{
+ /*
+ * See mapcount() for why the write lock is also required for the
+ * remote domain.
+ */
if ( lgt < rgt )
{
- spin_lock(&lgt->lock);
- spin_lock(&rgt->lock);
+ write_lock(&lgt->lock);
+ write_lock(&rgt->lock);
}
else
{
if ( lgt != rgt )
- spin_lock(&rgt->lock);
- spin_lock(&lgt->lock);
+ write_lock(&rgt->lock);
+ write_lock(&lgt->lock);
}
}
static inline void
double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
{
- spin_unlock(&lgt->lock);
+ write_unlock(&lgt->lock);
if ( lgt != rgt )
- spin_unlock(&rgt->lock);
+ write_unlock(&rgt->lock);
}
static inline int
{
unsigned int ref, max_iter;
- ASSERT(spin_is_locked(&rgt->lock));
+ ASSERT(rw_is_locked(&rgt->lock));
max_iter = min(*ref_count + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
*wrc = *rdc = 0;
/*
- * Must have the local domain's grant table lock when iterating
- * over its maptrack entries.
+ * Must have the local domain's grant table write lock when
+ * iterating over its maptrack entries.
*/
- ASSERT(spin_is_locked(&lgt->lock));
+ ASSERT(rw_is_write_locked(&lgt->lock));
/*
- * Must have the remote domain's grant table lock while counting
- * its active entries.
+ * Must have the remote domain's grant table write lock while
+ * counting its active entries.
*/
- ASSERT(spin_is_locked(&rd->grant_table->lock));
+ ASSERT(rw_is_write_locked(&rd->grant_table->lock));
for ( handle = 0; handle < lgt->maptrack_limit; handle++ )
{
grant_entry_v2_t *sha2;
grant_entry_header_t *shah;
uint16_t *status;
+ bool_t need_iommu;
led = current;
ld = led->domain;
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(unlock_out, GNTST_general_error,
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
/* pg may be set, with a refcount included, from __get_paged_frame */
if ( !pg )
goto undo_out;
}
- double_gt_lock(lgt, rgt);
-
- if ( gnttab_need_iommu_mapping(ld) )
+ need_iommu = gnttab_need_iommu_mapping(ld);
+ if ( need_iommu )
{
unsigned int wrc, rdc;
int err = 0;
+
+ double_gt_lock(lgt, rgt);
+
/* We're not translated, so we know that gmfns and mfns are
the same things, so the IOMMU entry is always 1-to-1. */
mapcount(lgt, rd, frame, &wrc, &rdc);
TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
+ /*
+ * All maptrack entry users check mt->flags first before using the
+ * other fields so just ensure the flags field is stored last.
+ *
+ * However, if gnttab_need_iommu_mapping() then this would race
+ * with a concurrent mapcount() call (on an unmap, for example)
+ * and a lock is required.
+ */
mt = &maptrack_entry(lgt, handle);
mt->domid = op->dom;
mt->ref = op->ref;
- mt->flags = op->flags;
+ wmb();
+ write_atomic(&mt->flags, op->flags);
- double_gt_unlock(lgt, rgt);
+ if ( need_iommu )
+ double_gt_unlock(lgt, rgt);
op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
op->handle = handle;
put_page(pg);
}
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, op->ref);
active_entry_release(act);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
op->status = rc;
put_maptrack_handle(lgt, handle);
rcu_unlock_domain(rd);
}
op->map = &maptrack_entry(lgt, op->handle);
- spin_lock(&lgt->lock);
- if ( unlikely(!op->map->flags) )
+ read_lock(&lgt->lock);
+
+ if ( unlikely(!read_atomic(&op->map->flags)) )
{
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
gdprintk(XENLOG_INFO, "Zero flags for handle (%d).\n", op->handle);
op->status = GNTST_bad_handle;
return;
}
dom = op->map->domid;
- spin_unlock(&lgt->lock);
+ read_unlock(&lgt->lock);
if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
{
TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom);
rgt = rd->grant_table;
- double_gt_lock(lgt, rgt);
- op->flags = op->map->flags;
+ read_lock(&rgt->lock);
+
+ op->flags = read_atomic(&op->map->flags);
if ( unlikely(!op->flags) || unlikely(op->map->domid != dom) )
{
gdprintk(XENLOG_WARNING, "Unstable handle %u\n", op->handle);
act->pin -= GNTPIN_hstw_inc;
}
- if ( gnttab_need_iommu_mapping(ld) )
+ act_release_out:
+ active_entry_release(act);
+ unmap_out:
+ read_unlock(&rgt->lock);
+
+ if ( rc == GNTST_okay && gnttab_need_iommu_mapping(ld) )
{
unsigned int wrc, rdc;
int err = 0;
+
+ double_gt_lock(lgt, rgt);
+
mapcount(lgt, rd, op->frame, &wrc, &rdc);
if ( (wrc + rdc) == 0 )
err = iommu_unmap_page(ld, op->frame);
else if ( wrc == 0 )
err = iommu_map_page(ld, op->frame, op->frame, IOMMUF_readable);
+
+ double_gt_unlock(lgt, rgt);
+
if ( err )
- {
rc = GNTST_general_error;
- goto act_release_out;
- }
}
/* If just unmapped a writable mapping, mark as dirtied */
- if ( !(op->flags & GNTMAP_readonly) )
+ if ( rc == GNTST_okay && !(op->flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
- act_release_out:
- active_entry_release(act);
- unmap_out:
- double_gt_unlock(lgt, rgt);
-
op->status = rc;
rcu_unlock_domain(rd);
}
rcu_lock_domain(rd);
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
goto unlock_out;
act_release_out:
active_entry_release(act);
unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( put_handle )
{
gt->nr_status_frames = 0;
}
+/*
+ * Grow the grant table. The caller must hold the grant table's
+ * write lock before calling this function.
+ */
int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
{
- /* d's grant table lock must be held by the caller */
-
struct grant_table *gt = d->grant_table;
unsigned int i, j;
}
gt = d->grant_table;
- spin_lock(>->lock);
+ write_lock(>->lock);
if ( gt->gt_version == 0 )
gt->gt_version = 1;
}
out3:
- spin_unlock(>->lock);
+ write_unlock(>->lock);
out2:
rcu_unlock_domain(d);
out1:
goto query_out_unlock;
}
- spin_lock(&d->grant_table->lock);
+ read_lock(&d->grant_table->lock);
op.nr_frames = nr_grant_frames(d->grant_table);
op.max_nr_frames = max_grant_frames;
op.status = GNTST_okay;
- spin_unlock(&d->grant_table->lock);
+ read_unlock(&d->grant_table->lock);
query_out_unlock:
union grant_combo scombo, prev_scombo, new_scombo;
int retries = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
{
scombo = prev_scombo;
}
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 1;
fail:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return 0;
}
struct gnttab_transfer gop;
unsigned long mfn;
unsigned int max_bitsize;
+ struct active_grant_entry *act;
for ( i = 0; i < count; i++ )
{
TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
/* Tell the guest about its new page frame. */
- spin_lock(&e->grant_table->lock);
+ read_lock(&e->grant_table->lock);
+ act = active_entry_acquire(e->grant_table, gop.ref);
if ( e->grant_table->gt_version == 1 )
{
shared_entry_header(e->grant_table, gop.ref)->flags |=
GTF_transfer_completed;
- spin_unlock(&e->grant_table->lock);
+ active_entry_release(act);
+ read_unlock(&e->grant_table->lock);
rcu_unlock_domain(e);
released_read = 0;
released_write = 0;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
}
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
if ( td != rd )
{
*page = NULL;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
if ( rgt->gt_version == 0 )
PIN_FAIL(gt_unlock_out, GNTST_general_error,
* here and reacquire
*/
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
rc = __acquire_grant_for_copy(td, trans_gref, rd->domain_id,
readonly, &grant_frame, page,
&trans_page_off, &trans_length, 0);
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, gref);
if ( rc != GNTST_okay ) {
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
}
__fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
put_page(*page);
return __acquire_grant_for_copy(rd, gref, ldom, readonly,
frame, page, page_off, length,
*frame = act->frame;
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
unlock_out_clear:
active_entry_release(act);
gt_unlock_out:
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
return rc;
}
if ( gt->gt_version == op.version )
goto out;
- spin_lock(>->lock);
+ write_lock(>->lock);
/* Make sure that the grant table isn't currently in use when we
change the version number, except for the first 8 entries which
are allowed to be in use (xenstore/xenconsole keeps them mapped).
gt->gt_version = op.version;
out_unlock:
- spin_unlock(>->lock);
+ write_unlock(>->lock);
out:
op.version = gt->gt_version;
op.status = GNTST_okay;
- spin_lock(>->lock);
+ read_lock(>->lock);
for ( i = 0; i < op.nr_frames; i++ )
{
op.status = GNTST_bad_virt_addr;
}
- spin_unlock(>->lock);
+ read_unlock(>->lock);
out2:
rcu_unlock_domain(d);
out1:
struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
- spin_lock(>->lock);
+ write_lock(>->lock);
if ( gt->gt_version == 0 )
PIN_FAIL(out, GNTST_general_error, "grant table not yet set up\n");
active_entry_release(act_b);
if ( act_a != NULL )
active_entry_release(act_a);
- spin_unlock(>->lock);
+ write_unlock(>->lock);
rcu_unlock_domain(d);
if ( d != owner )
{
- spin_lock(&owner->grant_table->lock);
+ read_lock(&owner->grant_table->lock);
ret = grant_map_exists(d, owner->grant_table, mfn, ref_count);
if ( ret != 0 )
{
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
rcu_unlock_domain(d);
put_page(page);
return ret;
ret = 0;
if ( d != owner )
- spin_unlock(&owner->grant_table->lock);
+ read_unlock(&owner->grant_table->lock);
unmap_domain_page(v);
put_page(page);
goto no_mem_0;
/* Simple stuff. */
- spin_lock_init(&t->lock);
+ rwlock_init(&t->lock);
spin_lock_init(&t->maptrack_lock);
t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES;
}
rgt = rd->grant_table;
- spin_lock(&rgt->lock);
+ read_lock(&rgt->lock);
act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
- spin_unlock(&rgt->lock);
+ read_unlock(&rgt->lock);
rcu_unlock_domain(rd);
printk(" -------- active -------- -------- shared --------\n");
printk("[ref] localdom mfn pin localdom gmfn flags\n");
- spin_lock(>->lock);
+ read_lock(>->lock);
if ( gt->gt_version == 0 )
goto out;
}
out:
- spin_unlock(>->lock);
+ read_unlock(>->lock);
if ( first )
printk("grant-table for remote domain:%5d ... "