bp, bp->b_vp, bp->b_flags);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)),
("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
+ KASSERT((bp->b_flags & B_VMIO) != 0 || (bp->b_flags & B_NOREUSE) == 0,
+ ("brelse: non-VMIO buffer marked NOREUSE"));
if (BUF_LOCKRECURSED(bp)) {
/*
allocbuf(bp, 0);
}
- if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0) {
+ if ((bp->b_flags & (B_INVAL | B_RELBUF)) != 0 ||
+ (bp->b_flags & (B_DELWRI | B_NOREUSE)) == B_NOREUSE) {
allocbuf(bp, 0);
+ bp->b_flags &= ~B_NOREUSE;
if (bp->b_vp != NULL)
brelvp(bp);
}
if ((bp->b_flags & B_DELWRI) == 0 &&
(bp->b_xflags & BX_VNDIRTY))
panic("bqrelse: not dirty");
+ if ((bp->b_flags & B_NOREUSE) != 0) {
+ brelse(bp);
+ return;
+ }
qindex = QUEUE_CLEAN;
}
binsfree(bp, qindex);
freed = false;
if (!freed) {
/*
- * In order to maintain LRU page ordering, put
- * the page at the tail of the inactive queue.
+ * If the page is unlikely to be reused, let the
+ * VM know. Otherwise, maintain LRU page
+ * ordering and put the page at the tail of the
+ * inactive queue.
*/
- vm_page_deactivate(m);
+ if ((bp->b_flags & B_NOREUSE) != 0)
+ vm_page_deactivate_noreuse(m);
+ else
+ vm_page_deactivate(m);
}
}
vm_page_unlock(m);
* Note: we no longer distinguish between VMIO and non-VMIO
* buffers.
*/
- KASSERT((bp->b_flags & B_DELWRI) == 0,
- ("delwri buffer %p found in queue %d", bp, qindex));
+ KASSERT((bp->b_flags & (B_DELWRI | B_NOREUSE)) == 0,
+ ("invalid buffer %p flags %#x found in queue %d", bp, bp->b_flags,
+ qindex));
/*
* When recycling a clean buffer we have to truncate it and
int
vop_stdadvise(struct vop_advise_args *ap)
{
+ struct buf *bp;
+ struct buflists *bl;
struct vnode *vp;
+ daddr_t bn, startn, endn;
off_t start, end;
- int error;
+ int bsize, error;
vp = ap->a_vp;
switch (ap->a_advice) {
error = 0;
break;
case POSIX_FADV_DONTNEED:
- /*
- * Flush any open FS buffers and then remove pages
- * from the backing VM object. Using vinvalbuf() here
- * is a bit heavy-handed as it flushes all buffers for
- * the given vnode, not just the buffers covering the
- * requested range.
- */
error = 0;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_iflag & VI_DOOMED) {
VOP_UNLOCK(vp, 0);
break;
}
- vinvalbuf(vp, V_CLEANONLY, 0, 0);
+
+ /*
+ * Deactivate pages in the specified range from the backing VM
+ * object. Pages that are resident in the buffer cache will
+ * remain wired until their corresponding buffers are released
+ * below.
+ */
if (vp->v_object != NULL) {
start = trunc_page(ap->a_start);
end = round_page(ap->a_end);
VM_OBJECT_WLOCK(vp->v_object);
- vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
+ vm_object_page_noreuse(vp->v_object, OFF_TO_IDX(start),
OFF_TO_IDX(end));
VM_OBJECT_WUNLOCK(vp->v_object);
}
+
+ BO_RLOCK(&vp->v_bufobj);
+ bsize = vp->v_bufobj.bo_bsize;
+ startn = ap->a_start / bsize;
+ if (ap->a_end == OFF_MAX) {
+ endn = -1;
+ bl = &vp->v_bufobj.bo_clean.bv_hd;
+ if (!TAILQ_EMPTY(bl))
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ bl = &vp->v_bufobj.bo_dirty.bv_hd;
+ if (!TAILQ_EMPTY(bl) &&
+ endn < TAILQ_LAST(bl, buflists)->b_lblkno)
+ endn = TAILQ_LAST(bl, buflists)->b_lblkno;
+ } else
+ endn = ap->a_end / bsize;
+ BO_RUNLOCK(&vp->v_bufobj);
+ /*
+ * In the VMIO case, use the B_NOREUSE flag to hint that the
+ * pages backing each buffer in the range are unlikely to be
+ * reused. Dirty buffers will have the hint applied once
+ * they've been written.
+ */
+ for (bn = startn; bn <= endn; bn++) {
+ bp = getblk(vp, bn, bsize, 0, 0, GB_NOCREAT |
+ GB_UNMAPPED);
+ if (bp == NULL)
+ continue;
+ bp->b_flags |= B_RELBUF;
+ if (vp->v_object != NULL)
+ bp->b_flags |= B_NOREUSE;
+ brelse(bp);
+ }
VOP_UNLOCK(vp, 0);
break;
default:
new->fa_advice = advice;
new->fa_start = offset;
new->fa_end = end;
- new->fa_prevstart = 0;
- new->fa_prevend = 0;
fp->f_advice = new;
new = fa;
}
struct thread *td;
{
struct vnode *vp;
- struct mtx *mtxp;
+ off_t orig_offset;
int error, ioflag;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
/* Disable read-ahead for random I/O. */
break;
}
- offset = uio->uio_offset;
+ orig_offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_read(active_cred, fp->f_cred, vp);
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ orig_offset != uio->uio_offset)
/*
- * Use POSIX_FADV_DONTNEED to flush clean pages and
- * buffers for the backing file after a
- * POSIX_FADV_NOREUSE read(2). To optimize the common
- * case of using POSIX_FADV_NOREUSE with sequential
- * access, track the previous implicit DONTNEED
- * request and grow this request to include the
- * current read(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously read regions of the
- * file. This allows filesystem blocks that are
- * accessed by multiple calls to read(2) to be flushed
- * once the last read(2) finishes.
+ * Use POSIX_FADV_DONTNEED to flush pages and buffers
+ * for the backing file after a POSIX_FADV_NOREUSE
+ * read(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
+ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
return (error);
}
{
struct vnode *vp;
struct mount *mp;
- struct mtx *mtxp;
+ off_t orig_offset;
int error, ioflag, lock_flags;
int advice;
- off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
/* XXX: Is this correct? */
break;
}
- offset = uio->uio_offset;
+ orig_offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
if (vp->v_type != VCHR)
vn_finished_write(mp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
- offset != uio->uio_offset) {
+ orig_offset != uio->uio_offset)
/*
- * Use POSIX_FADV_DONTNEED to flush clean pages and
- * buffers for the backing file after a
- * POSIX_FADV_NOREUSE write(2). To optimize the
- * common case of using POSIX_FADV_NOREUSE with
- * sequential access, track the previous implicit
- * DONTNEED request and grow this request to include
- * the current write(2) in addition to the previous
- * DONTNEED. With purely sequential access this will
- * cause the DONTNEED requests to continously grow to
- * cover all of the previously written regions of the
- * file.
- *
- * Note that the blocks just written are almost
- * certainly still dirty, so this only works when
- * VOP_ADVISE() calls from subsequent writes push out
- * the data written by this write(2) once the backing
- * buffers are clean. However, as compared to forcing
- * IO_DIRECT, this gives much saner behavior. Write
- * clustering is still allowed, and clean pages are
- * merely moved to the cache page queue rather than
- * outright thrown away. This means a subsequent
- * read(2) can still avoid hitting the disk if the
- * pages have not been reclaimed.
- *
- * This does make POSIX_FADV_NOREUSE largely useless
- * with non-sequential access. However, sequential
- * access is the more common use case and the flag is
- * merely advisory.
+ * Use POSIX_FADV_DONTNEED to flush pages and buffers
+ * for the backing file after a POSIX_FADV_NOREUSE
+ * write(2).
*/
- start = offset;
- end = uio->uio_offset - 1;
- mtxp = mtx_pool_find(mtxpool_sleep, fp);
- mtx_lock(mtxp);
- if (fp->f_advice != NULL &&
- fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
- if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
- start = fp->f_advice->fa_prevstart;
- else if (fp->f_advice->fa_prevstart != 0 &&
- fp->f_advice->fa_prevstart == end + 1)
- end = fp->f_advice->fa_prevend;
- fp->f_advice->fa_prevstart = start;
- fp->f_advice->fa_prevend = end;
- }
- mtx_unlock(mtxp);
- error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
- }
-
+ error = VOP_ADVISE(vp, orig_offset, uio->uio_offset - 1,
+ POSIX_FADV_DONTNEED);
unlock:
return (error);
}
#define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
#define B_DONE 0x00000200 /* I/O completed. */
#define B_EINTR 0x00000400 /* I/O was interrupted */
-#define B_00000800 0x00000800 /* Available flag. */
+#define B_NOREUSE 0x00000800 /* Contents not reused once released. */
#define B_00001000 0x00001000 /* Available flag. */
#define B_INVAL 0x00002000 /* Does not contain valid info. */
#define B_BARRIER 0x00004000 /* Write this and all preceeding first. */
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26dirty\25b20" \
"\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
- "\15b12\14b11\13eintr\12done\11persist\10delwri" \
+ "\15b12\14noreuse\13eintr\12done\11persist\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
/*
int fa_advice; /* (f) FADV_* type. */
off_t fa_start; /* (f) Region start. */
off_t fa_end; /* (f) Region end. */
- off_t fa_prevstart; /* (f) Previous NOREUSE start. */
- off_t fa_prevend; /* (f) Previous NOREUSE end. */
};
struct file {
}
/*
- * vm_object_page_cache:
+ * vm_object_page_noreuse:
*
- * For the given object, attempt to move the specified clean
- * pages to the cache queue. If a page is wired for any reason,
- * then it will not be changed. Pages are specified by the given
- * range ["start", "end"). As a special case, if "end" is zero,
- * then the range extends from "start" to the end of the object.
- * Any mappings to the specified pages are removed before the
- * pages are moved to the cache queue.
+ * For the given object, attempt to move the specified pages to
+ * the head of the inactive queue. This bypasses regular LRU
+ * operation and allows the pages to be reused quickly under memory
+ * pressure. If a page is wired for any reason, then it will not
+ * be queued. Pages are specified by the range ["start", "end").
+ * As a special case, if "end" is zero, then the range extends from
+ * "start" to the end of the object.
*
* This operation should only be performed on objects that
* contain non-fictitious, managed pages.
* The object must be locked.
*/
void
-vm_object_page_cache(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
+vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
{
struct mtx *mtx, *new_mtx;
vm_page_t p, next;
VM_OBJECT_ASSERT_WLOCKED(object);
KASSERT((object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0,
- ("vm_object_page_cache: illegal object %p", object));
+ ("vm_object_page_noreuse: illegal object %p", object));
if (object->resident_page_count == 0)
return;
p = vm_page_find_least(object, start);
mtx = new_mtx;
mtx_lock(mtx);
}
- vm_page_try_to_cache(p);
+ vm_page_deactivate_noreuse(p);
}
if (mtx != NULL)
mtx_unlock(mtx);
void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_madvise(vm_object_t, vm_pindex_t, vm_pindex_t, int);
-void vm_object_page_cache(vm_object_t object, vm_pindex_t start,
- vm_pindex_t end);
boolean_t vm_object_page_clean(vm_object_t object, vm_ooffset_t start,
vm_ooffset_t end, int flags);
+void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start,
+ vm_pindex_t end);
void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
vm_pindex_t end, int options);
boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
_vm_page_deactivate(m, 0);
}
+/*
+ * Move the specified page to the inactive queue with the expectation
+ * that it is unlikely to be reused.
+ *
+ * The page must be locked.
+ */
+void
+vm_page_deactivate_noreuse(vm_page_t m)
+{
+
+ _vm_page_deactivate(m, 1);
+}
+
/*
* vm_page_try_to_cache:
*
/*
* vm_page_advise
*
- * Deactivate or do nothing, as appropriate. This routine is used
- * by madvise() and vop_stdadvise().
+ * Deactivate or do nothing, as appropriate.
*
* The object and page must be locked.
*/
int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_deactivate (vm_page_t);
+void vm_page_deactivate_noreuse(vm_page_t);
void vm_page_dequeue(vm_page_t m);
void vm_page_dequeue_locked(vm_page_t m);
vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);