]> xenbits.xensource.com Git - freebsd.git/commitdiff
The VFS-level clustering code collects together sequential blocks
authormckusick <mckusick@FreeBSD.org>
Tue, 17 Sep 2019 17:44:50 +0000 (17:44 +0000)
committermckusick <mckusick@FreeBSD.org>
Tue, 17 Sep 2019 17:44:50 +0000 (17:44 +0000)
by issuing delayed-writes (bdwrite()) until a non-sequential block
is written or the maximum cluster size is reached. At that point
it collects the delayed buffers together (using bread()) to write
them in a single operation. The assumption was that since we just
looked at them they will still be in memory so there is no need to
check for a read error from bread(). Very occationally (apparently
every 10-hours or so when being pounded by Peter Holm's tests)
this assumption is wrong.

The fix is to check for errors from bread() and fail the cluster
write thus falling back to the default individual flushing of any
still dirty buffers.

Reported by: Peter Holm and Chuck Silvers
Reviewed by: kib
MFC after:   3 days

sys/kern/vfs_cluster.c

index 6ff0b1c3b327207d7ef2c1f38d2f1d6d881e75ba..21efe900eea0363f52e2dcfda4dbd8853e61dfbf 100644 (file)
@@ -718,6 +718,14 @@ cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount,
                                struct cluster_save *buflist;
 
                                buflist = cluster_collectbufs(vp, bp, gbflags);
+                               if (buflist == NULL) {
+                                       /*
+                                        * Cluster build failed so just write
+                                        * it now.
+                                        */
+                                       bawrite(bp);
+                                       return;
+                               }
                                endbp = &buflist->bs_children
                                    [buflist->bs_nchildren - 1];
                                if (VOP_REALLOCBLKS(vp, buflist)) {
@@ -1056,7 +1064,7 @@ cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags)
        struct cluster_save *buflist;
        struct buf *bp;
        daddr_t lbn;
-       int i, len;
+       int i, j, len, error;
 
        len = vp->v_lastw - vp->v_cstart + 1;
        buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
@@ -1064,8 +1072,18 @@ cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags)
        buflist->bs_nchildren = 0;
        buflist->bs_children = (struct buf **) (buflist + 1);
        for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
-               (void)bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
+               error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED,
                    gbflags, &bp);
+               if (error != 0) {
+                       /*
+                        * If read fails, release collected buffers
+                        * and return failure.
+                        */
+                       for (j = 0; j < i; j++)
+                               brelse(buflist->bs_children[j]);
+                       free(buflist, M_SEGMENT);
+                       return (NULL);
+               }
                buflist->bs_children[i] = bp;
                if (bp->b_blkno == bp->b_lblkno)
                        VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno,