int freeclusterchain(struct msdosfsmount *pmp, u_long startchain);
int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags);
void fc_purge(struct denode *dep, u_int frcn);
-int markvoldirty(struct msdosfsmount *pmp, int dirty);
+int markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade);
+
+static inline int
+markvoldirty(struct msdosfsmount *pmp, bool dirty)
+{
+ return (markvoldirty_upgrade(pmp, dirty, false));
+}
#endif /* _KERNEL || MAKEFS */
#endif /* !_FS_MSDOSFS_FAT_H_ */
* ? (other errors from called routines)
*/
int
-markvoldirty(struct msdosfsmount *pmp, int dirty)
+markvoldirty_upgrade(struct msdosfsmount *pmp, bool dirty, bool rw_upgrade)
{
struct buf *bp;
u_long bn, bo, bsize, byteoffset, fatval;
if (FAT12(pmp))
return (0);
- /* Can't change the bit on a read-only filesystem. */
- if (pmp->pm_flags & MSDOSFSMNT_RONLY)
+ /*
+ * Can't change the bit on a read-only filesystem, except as part of
+ * ro->rw upgrade.
+ */
+ if ((pmp->pm_flags & MSDOSFSMNT_RONLY) != 0 && !rw_upgrade)
return (EROFS);
/*
putushort(&bp->b_data[bo], fatval);
}
+ /*
+ * The concern here is that a devvp may be readonly, without reporting
+ * itself as such through the usual channels. In that case, we'd like
+ * it if attempting to mount msdosfs rw didn't panic the system.
+ *
+ * markvoldirty is invoked as the first write on backing devvps when
+ * either msdosfs is mounted for the first time, or a ro mount is
+ * upgraded to rw.
+ *
+ * In either event, if a write error occurs dirtying the volume:
+ * - No user data has been permitted to be written to cache yet.
+ * - We can abort the high-level operation (mount, or ro->rw) safely.
+ * - We don't derive any benefit from leaving a zombie dirty buf in
+ * the cache that can not be cleaned or evicted.
+ *
+ * So, mark B_INVALONERR to have bwrite() -> brelse() detect that
+ * condition and force-invalidate our write to the block if it occurs.
+ *
+ * PR 210316 provides more context on the discovery and diagnosis of
+ * the problem, as well as earlier attempts to solve it.
+ */
+ bp->b_flags |= B_INVALONERR;
+
/* Write out the modified FAT block synchronously. */
return (bwrite(bp));
}
if (error)
return (error);
+ /* Now that the volume is modifiable, mark it dirty. */
+ error = markvoldirty_upgrade(pmp, true, true);
+ if (error) {
+ /*
+ * If dirtying the superblock failed, drop GEOM
+ * 'w' refs (we're still RO).
+ */
+ g_topology_lock();
+ (void)g_access(pmp->pm_cp, 0, -1, 0);
+ g_topology_unlock();
+
+ return (error);
+ }
+
pmp->pm_fmod = 1;
pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
MNT_ILOCK(mp);
mp->mnt_flag &= ~MNT_RDONLY;
MNT_IUNLOCK(mp);
-
- /* Now that the volume is modifiable, mark it dirty. */
- error = markvoldirty(pmp, 1);
- if (error)
- return (error);
}
}
/*
if (ronly)
pmp->pm_flags |= MSDOSFSMNT_RONLY;
else {
- if ((error = markvoldirty(pmp, 1)) != 0) {
- (void)markvoldirty(pmp, 0);
+ if ((error = markvoldirty(pmp, 1)) != 0)
goto error_exit;
- }
pmp->pm_fmod = 1;
}
mp->mnt_data = pmp;
BO_UNLOCK(bp->b_bufobj);
bdirty(bp);
}
+
+ if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
+ (bp->b_flags & B_INVALONERR)) {
+ /*
+ * Forced invalidation of dirty buffer contents, to be used
+ * after a failed write in the rare case that the loss of the
+ * contents is acceptable. The buffer is invalidated and
+ * freed.
+ */
+ bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
+ bp->b_flags &= ~(B_ASYNC | B_CACHE);
+ }
+
if (bp->b_iocmd == BIO_WRITE && (bp->b_ioflags & BIO_ERROR) &&
(bp->b_error != ENXIO || !LIST_EMPTY(&bp->b_dep)) &&
!(bp->b_flags & B_INVAL)) {
* may not be used with the stage 1 data write under NFS
* but may be used for the commit rpc portion.
*
+ * B_INVALONERR This flag is set on dirty buffers. It specifies that a
+ * write error should forcibly invalidate the buffer
+ * contents. This flag should be used with caution, as it
+ * discards data. It is incompatible with B_ASYNC.
+ *
* B_VMIO Indicates that the buffer is tied into an VM object.
* The buffer's data is always PAGE_SIZE aligned even
* if b_bufsize and b_bcount are not. ( b_bufsize is
#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
#define B_MALLOC 0x00010000 /* malloced b_data */
#define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
-#define B_00040000 0x00040000 /* Available flag. */
+#define B_INVALONERR 0x00040000 /* Invalidate on write error. */
#define B_00080000 0x00080000 /* Available flag. */
#define B_00100000 0x00100000 /* Available flag. */
#define B_00200000 0x00200000 /* Available flag. */
#define PRINT_BUF_FLAGS "\20\40remfree\37cluster\36vmio\35ram\34managed" \
"\33paging\32infreecnt\31nocopy\30b23\27relbuf\26b21\25b20" \
- "\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
+ "\24b19\23invalonerr\22clusterok\21malloc\20nocache\17b14\16inval" \
"\15reuse\14noreuse\13eintr\12done\11b8\10delwri" \
"\7validsuspwrt\6cache\5deferred\4direct\3async\2needcommit\1age"
#include <sys/errno.h>
#include <sys/vnode.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/errno.h>
#include <assert.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <sys/param.h>
#include <sys/errno.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/mount.h>
#include <errno.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include <fcntl.h>
+#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <time.h>