- VM_ALLOC_NOCREAT will grab without creating a page.
- vm_page_grab_valid() will grab and page in if necessary.
- vm_page_busy_acquire() automates some busy acquire loops.
Discussed with: alc, kib, markj
Tested by: pho (part of larger branch)
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D21546
obj = vp->v_object;
zfs_vmobject_assert_wlocked(obj);
- for (;;) {
- if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
- pp->valid) {
- if (vm_page_xbusied(pp)) {
- /*
- * Reference the page before unlocking and
- * sleeping so that the page daemon is less
- * likely to reclaim it.
- */
- vm_page_reference(pp);
- vm_page_sleep_if_xbusy(pp, "zfsmwb");
- continue;
- }
- vm_page_sbusy(pp);
- } else if (pp != NULL) {
- ASSERT(!pp->valid);
- pp = NULL;
- }
-
- if (pp != NULL) {
- ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
- vm_object_pip_add(obj, 1);
- pmap_remove_write(pp);
- if (nbytes != 0)
- vm_page_clear_dirty(pp, off, nbytes);
- }
- break;
+ vm_page_grab_valid(&pp, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
+ VM_ALLOC_SBUSY | VM_ALLOC_NORMAL | VM_ALLOC_IGN_SBUSY);
+ if (pp != NULL) {
+ ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
+ vm_object_pip_add(obj, 1);
+ pmap_remove_write(pp);
+ if (nbytes != 0)
+ vm_page_clear_dirty(pp, off, nbytes);
}
return (pp);
}
page_wire(vnode_t *vp, int64_t start)
{
vm_object_t obj;
- vm_page_t pp;
+ vm_page_t m;
obj = vp->v_object;
zfs_vmobject_assert_wlocked(obj);
- for (;;) {
- if ((pp = vm_page_lookup(obj, OFF_TO_IDX(start))) != NULL &&
- pp->valid) {
- if (vm_page_xbusied(pp)) {
- /*
- * Reference the page before unlocking and
- * sleeping so that the page daemon is less
- * likely to reclaim it.
- */
- vm_page_reference(pp);
- vm_page_sleep_if_xbusy(pp, "zfsmwb");
- continue;
- }
-
- ASSERT3U(pp->valid, ==, VM_PAGE_BITS_ALL);
- vm_page_wire(pp);
- } else
- pp = NULL;
- break;
- }
- return (pp);
+ vm_page_grab_valid(&m, obj, OFF_TO_IDX(start), VM_ALLOC_NOCREAT |
+ VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY | VM_ALLOC_NOBUSY);
+ return (m);
}
static void
panic("GFP_NOWAIT is unimplemented");
VM_OBJECT_WLOCK(obj);
- page = vm_page_grab(obj, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
- VM_ALLOC_WIRED);
- if (page->valid != VM_PAGE_BITS_ALL) {
- vm_page_xbusy(page);
- if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
- rv = vm_pager_get_pages(obj, &page, 1, NULL, NULL);
- if (rv != VM_PAGER_OK) {
- vm_page_unwire_noq(page);
- vm_page_free(page);
- VM_OBJECT_WUNLOCK(obj);
- return (ERR_PTR(-EINVAL));
- }
- MPASS(page->valid == VM_PAGE_BITS_ALL);
- } else {
- pmap_zero_page(page);
- page->valid = VM_PAGE_BITS_ALL;
- page->dirty = 0;
- }
- vm_page_xunbusy(page);
- }
+ rv = vm_page_grab_valid(&page, obj, pindex, VM_ALLOC_NORMAL |
+ VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
VM_OBJECT_WUNLOCK(obj);
+ if (rv != VM_PAGER_OK)
+ return (ERR_PTR(-EINVAL));
return (page);
}
}
VM_OBJECT_WLOCK(vm_obj);
- if (vm_page_busied(m)) {
- vm_page_sleep_if_busy(m, "ttmpbs");
+ if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0) {
ttm_mem_io_unlock(man);
ttm_bo_unreserve(bo);
goto retry;
m1 = vm_page_lookup(vm_obj, OFF_TO_IDX(offset));
if (m1 == NULL) {
if (vm_page_insert(m, vm_obj, OFF_TO_IDX(offset))) {
+ vm_page_xunbusy(m);
VM_OBJECT_WUNLOCK(vm_obj);
vm_wait(vm_obj);
VM_OBJECT_WLOCK(vm_obj);
bo, m, m1, (uintmax_t)offset));
}
m->valid = VM_PAGE_BITS_ALL;
- vm_page_xbusy(m);
if (*mres != NULL) {
KASSERT(*mres != m, ("losing %p %p", *mres, m));
vm_page_free(*mres);
m = vm_page_lookup(vm_obj, i);
if (m == NULL)
continue;
- if (vm_page_sleep_if_busy(m, "ttm_unm"))
+ if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
goto retry;
cdev_pager_free_page(vm_obj, m);
}
VM_OBJECT_WLOCK(obj);
vm_object_pip_add(obj, 1);
for (i = 0; i < ttm->num_pages; ++i) {
- from_page = vm_page_grab(obj, i, VM_ALLOC_NORMAL);
- if (from_page->valid != VM_PAGE_BITS_ALL) {
- if (vm_pager_has_page(obj, i, NULL, NULL)) {
- rv = vm_pager_get_pages(obj, &from_page, 1,
- NULL, NULL);
- if (rv != VM_PAGER_OK) {
- vm_page_free(from_page);
- ret = -EIO;
- goto err_ret;
- }
- } else
- vm_page_zero_invalid(from_page, TRUE);
+ rv = vm_page_grab_valid(&from_page, obj, i,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY);
+ if (rv != VM_PAGER_OK) {
+ ret = -EIO;
+ goto err_ret;
}
- vm_page_xunbusy(from_page);
to_page = ttm->pages[i];
if (unlikely(to_page == NULL)) {
ret = -ENOMEM;
m = vm_page_lookup(gmap->map->mem, i);
if (m == NULL)
continue;
- if (vm_page_sleep_if_busy(m, "pcmdum"))
+ if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
goto retry;
cdev_pager_free_page(gmap->map->mem, m);
}
m = vm_page_lookup(map->mem, i);
if (m == NULL)
continue;
- if (vm_page_sleep_if_busy(m, "pcmdum"))
+ if (vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL) == 0)
goto retry;
cdev_pager_free_page(map->mem, m);
}
* lock to page out tobj's pages because tobj is a OBJT_SWAP
* type object.
*/
- m = vm_page_grab(obj, idx, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
- VM_ALLOC_WIRED);
- if (m->valid != VM_PAGE_BITS_ALL) {
- vm_page_xbusy(m);
- if (vm_pager_has_page(obj, idx, NULL, NULL)) {
- rv = vm_pager_get_pages(obj, &m, 1, NULL, NULL);
- if (rv != VM_PAGER_OK) {
- printf(
- "uiomove_object: vm_obj %p idx %jd valid %x pager error %d\n",
- obj, idx, m->valid, rv);
- vm_page_unwire_noq(m);
- vm_page_free(m);
- VM_OBJECT_WUNLOCK(obj);
- return (EIO);
- }
- } else
- vm_page_zero_invalid(m, TRUE);
- vm_page_xunbusy(m);
+ rv = vm_page_grab_valid(&m, obj, idx,
+ VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_NOBUSY);
+ if (rv != VM_PAGER_OK) {
+ VM_OBJECT_WUNLOCK(obj);
+ printf("uiomove_object: vm_obj %p idx %jd pager error %d\n",
+ obj, idx, rv);
+ return (EIO);
}
VM_OBJECT_WUNLOCK(obj);
error = uiomove_fromphys(&m, offset, tlen, uio);
{
vm_page_t m;
vm_pindex_t pindex;
- int rv;
- VM_OBJECT_WLOCK(object);
pindex = OFF_TO_IDX(offset);
- m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
- VM_ALLOC_WIRED);
- if (m->valid != VM_PAGE_BITS_ALL) {
- vm_page_xbusy(m);
- rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
- if (rv != VM_PAGER_OK) {
- vm_page_unwire_noq(m);
- vm_page_free(m);
- m = NULL;
- goto out;
- }
- vm_page_xunbusy(m);
- }
-out:
+ VM_OBJECT_WLOCK(object);
+ (void)vm_page_grab_valid(&m, object, pindex,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY | VM_ALLOC_WIRED);
VM_OBJECT_WUNLOCK(object);
return (m);
}
VM_OBJECT_ASSERT_WLOCKED(object);
for (pindex = start; pindex < end; pindex++) {
- m = vm_page_grab(object, pindex, VM_ALLOC_NORMAL);
- if (m->valid != VM_PAGE_BITS_ALL) {
- rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
- if (rv != VM_PAGER_OK) {
- vm_page_free(m);
- break;
- }
- }
+ rv = vm_page_grab_valid(&m, object, pindex, VM_ALLOC_NORMAL);
+ if (rv != VM_PAGER_OK)
+ break;
+
/*
* Keep "m" busy because a subsequent iteration may unlock
* the object.
vm_page_aflag_set(m, PGA_REFERENCED);
}
+/*
+ * vm_page_busy_acquire:
+ *
+ * Acquire the busy lock as described by VM_ALLOC_* flags. Will loop
+ * and drop the object lock if necessary.
+ */
+int
+vm_page_busy_acquire(vm_page_t m, int allocflags)
+{
+ vm_object_t obj;
+ u_int x;
+ bool locked;
+
+ /*
+ * The page-specific object must be cached because page
+ * identity can change during the sleep, causing the
+ * re-lock of a different object.
+ * It is assumed that a reference to the object is already
+ * held by the callers.
+ */
+ obj = m->object;
+ for (;;) {
+ if ((allocflags & VM_ALLOC_SBUSY) == 0) {
+ if (vm_page_tryxbusy(m))
+ return (TRUE);
+ } else {
+ if (vm_page_trysbusy(m))
+ return (TRUE);
+ }
+ if ((allocflags & VM_ALLOC_NOWAIT) != 0)
+ return (FALSE);
+ if (obj != NULL) {
+ locked = VM_OBJECT_WOWNED(obj);
+ } else {
+ MPASS(vm_page_wired(m));
+ locked = FALSE;
+ }
+ sleepq_lock(m);
+ x = m->busy_lock;
+ if (x == VPB_UNBUSIED ||
+ ((allocflags & VM_ALLOC_SBUSY) != 0 &&
+ (x & VPB_BIT_SHARED) != 0) ||
+ ((x & VPB_BIT_WAITERS) == 0 &&
+ !atomic_cmpset_int(&m->busy_lock, x,
+ x | VPB_BIT_WAITERS))) {
+ sleepq_release(m);
+ continue;
+ }
+ if (locked)
+ VM_OBJECT_WUNLOCK(obj);
+ sleepq_add(m, NULL, "vmpba", 0, 0);
+ sleepq_wait(m, PVM);
+ if (locked)
+ VM_OBJECT_WLOCK(obj);
+ MPASS(m->object == obj || m->object == NULL);
+ if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
+ return (FALSE);
+ }
+}
+
/*
* vm_page_busy_downgrade:
*
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
- vm_page_aflag_set(m, PGA_REFERENCED);
+ if ((allocflags & VM_ALLOC_NOCREAT) == 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
vm_page_busy_sleep(m, "pgrbwt", (allocflags &
VM_ALLOC_IGN_SBUSY) != 0);
VM_OBJECT_WLOCK(object);
+ if ((allocflags & VM_ALLOC_WAITFAIL) != 0)
+ return (NULL);
goto retrylookup;
} else {
if ((allocflags & VM_ALLOC_WIRED) != 0)
if ((allocflags &
(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY)) == 0)
vm_page_xbusy(m);
- if ((allocflags & VM_ALLOC_SBUSY) != 0)
+ else if ((allocflags & VM_ALLOC_SBUSY) != 0)
vm_page_sbusy(m);
return (m);
}
}
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0)
+ return (NULL);
m = vm_page_alloc(object, pindex, pflags);
if (m == NULL) {
if ((allocflags & VM_ALLOC_NOWAIT) != 0)
return (m);
}
+/*
+ * Grab a page and make it valid, paging in if necessary. Pages missing from
+ * their pager are zero filled and validated.
+ */
+int
+vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags)
+{
+ vm_page_t m;
+ bool sleep, xbusy;
+ int pflags;
+ int rv;
+
+ KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
+ (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
+ ("vm_page_grab_valid: VM_ALLOC_SBUSY/VM_ALLOC_IGN_SBUSY mismatch"));
+ KASSERT((allocflags &
+ (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL | VM_ALLOC_ZERO)) == 0,
+ ("vm_page_grab_valid: Invalid flags 0x%X", allocflags));
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY);
+ pflags |= VM_ALLOC_WAITFAIL;
+
+retrylookup:
+ xbusy = false;
+ if ((m = vm_page_lookup(object, pindex)) != NULL) {
+ /*
+ * If the page is fully valid it can only become invalid
+ * with the object lock held. If it is not valid it can
+ * become valid with the busy lock held. Therefore, we
+ * may unnecessarily lock the exclusive busy here if we
+ * race with I/O completion not using the object lock.
+ * However, we will not end up with an invalid page and a
+ * shared lock.
+ */
+ if (m->valid != VM_PAGE_BITS_ALL ||
+ (allocflags & (VM_ALLOC_IGN_SBUSY | VM_ALLOC_SBUSY)) == 0) {
+ sleep = !vm_page_tryxbusy(m);
+ xbusy = true;
+ } else
+ sleep = !vm_page_trysbusy(m);
+ if (sleep) {
+ /*
+ * Reference the page before unlocking and
+ * sleeping so that the page daemon is less
+ * likely to reclaim it.
+ */
+ if ((allocflags & VM_ALLOC_NOCREAT) == 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ vm_page_busy_sleep(m, "pgrbwt", (allocflags &
+ VM_ALLOC_IGN_SBUSY) != 0);
+ VM_OBJECT_WLOCK(object);
+ goto retrylookup;
+ }
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0 &&
+ m->valid != VM_PAGE_BITS_ALL) {
+ if (xbusy)
+ vm_page_xunbusy(m);
+ else
+ vm_page_sunbusy(m);
+ *mp = NULL;
+ return (VM_PAGER_FAIL);
+ }
+ if ((allocflags & VM_ALLOC_WIRED) != 0)
+ vm_page_wire(m);
+ if (m->valid == VM_PAGE_BITS_ALL)
+ goto out;
+ } else if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
+ *mp = NULL;
+ return (VM_PAGER_FAIL);
+ } else if ((m = vm_page_alloc(object, pindex, pflags)) != NULL) {
+ xbusy = true;
+ } else {
+ goto retrylookup;
+ }
+
+ vm_page_assert_xbusied(m);
+ MPASS(xbusy);
+ if (vm_pager_has_page(object, pindex, NULL, NULL)) {
+ rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
+ if (rv != VM_PAGER_OK) {
+ if (allocflags & VM_ALLOC_WIRED)
+ vm_page_unwire_noq(m);
+ vm_page_free(m);
+ *mp = NULL;
+ return (rv);
+ }
+ MPASS(m->valid == VM_PAGE_BITS_ALL);
+ } else {
+ vm_page_zero_invalid(m, TRUE);
+ }
+out:
+ if ((allocflags & VM_ALLOC_NOBUSY) != 0) {
+ if (xbusy)
+ vm_page_xunbusy(m);
+ else
+ vm_page_sunbusy(m);
+ }
+ if ((allocflags & VM_ALLOC_SBUSY) != 0 && xbusy)
+ vm_page_busy_downgrade(m);
+ *mp = m;
+ return (VM_PAGER_OK);
+}
+
/*
* Return the specified range of pages from the given object. For each
* page offset within the range, if a page already exists within the object
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
- vm_page_aflag_set(m, PGA_REFERENCED);
+ if ((allocflags & VM_ALLOC_NOCREAT) == 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
vm_page_busy_sleep(m, "grbmaw", (allocflags &
VM_ALLOC_IGN_SBUSY) != 0);
VM_OBJECT_WLOCK(object);
if ((allocflags & VM_ALLOC_SBUSY) != 0)
vm_page_sbusy(m);
} else {
+ if ((allocflags & VM_ALLOC_NOCREAT) != 0)
+ break;
m = vm_page_alloc_after(object, pindex + i,
pflags | VM_ALLOC_COUNT(count - i), mpred);
if (m == NULL) {
#define VM_ALLOC_ZERO 0x0040 /* (acfgp) Allocate a prezeroed page */
#define VM_ALLOC_NOOBJ 0x0100 /* (acg) No associated object */
#define VM_ALLOC_NOBUSY 0x0200 /* (acgp) Do not excl busy the page */
+#define VM_ALLOC_NOCREAT 0x0400 /* (gp) Don't create a page */
#define VM_ALLOC_IGN_SBUSY 0x1000 /* (gp) Ignore shared busy flag */
#define VM_ALLOC_NODUMP 0x2000 /* (ag) don't include in dump */
#define VM_ALLOC_SBUSY 0x4000 /* (acgp) Shared busy the page */
#define PS_ALL_VALID 0x2
#define PS_NONE_BUSY 0x4
+int vm_page_busy_acquire(vm_page_t m, int allocflags);
void vm_page_busy_downgrade(vm_page_t m);
void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
void vm_page_free(vm_page_t m);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
vm_page_t *ma, int count);
+int vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,
+ int allocflags);
void vm_page_deactivate(vm_page_t);
void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);