/* If the root directory of the 'tmp' file system is not yet
* allocated, this must be the request to do it. */
MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR));
- KASSERT(tmp->tm_root == NULL || mp->mnt_writeopcount > 0,
- ("creating node not under vn_start_write"));
MPASS(IFF(type == VLNK, target != NULL));
MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL));
}
if (vfs_op_thread_enter(mp)) {
if (mp == vp->v_mount)
- MNT_REF_UNLOCKED(mp);
+ vfs_mp_count_add_pcpu(mp, ref, 1);
else
mp = NULL;
vfs_op_thread_exit(mp);
lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
mp->mnt_thread_in_ops_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
M_WAITOK | M_ZERO);
+ mp->mnt_ref_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
+ M_WAITOK | M_ZERO);
+ mp->mnt_lockref_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
+ M_WAITOK | M_ZERO);
+ mp->mnt_writeopcount_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
+ M_WAITOK | M_ZERO);
mp->mnt_ref = 0;
mp->mnt_vfs_ops = 1;
return (0);
struct mount *mp;
mp = (struct mount *)mem;
+ uma_zfree_pcpu(pcpu_zone_int, mp->mnt_writeopcount_pcpu);
+ uma_zfree_pcpu(pcpu_zone_int, mp->mnt_lockref_pcpu);
+ uma_zfree_pcpu(pcpu_zone_int, mp->mnt_ref_pcpu);
uma_zfree_pcpu(pcpu_zone_int, mp->mnt_thread_in_ops_pcpu);
lockdestroy(&mp->mnt_explock);
mtx_destroy(&mp->mnt_listmtx);
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
if (vfs_op_thread_enter(mp)) {
- MNT_REF_UNLOCKED(mp);
+ vfs_mp_count_add_pcpu(mp, ref, 1);
vfs_op_thread_exit(mp);
return;
}
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
if (vfs_op_thread_enter(mp)) {
- MNT_REL_UNLOCKED(mp);
+ vfs_mp_count_sub_pcpu(mp, ref, 1);
vfs_op_thread_exit(mp);
return;
}
if (mp->mnt_vfs_ops == 0)
panic("%s: entered with zero vfs_ops\n", __func__);
+ vfs_assert_mount_counters(mp);
+
MNT_ILOCK(mp);
mp->mnt_kern_flag |= MNTK_REFEXPIRE;
if (mp->mnt_kern_flag & MNTK_MWAIT) {
void
vfs_op_enter(struct mount *mp)
{
+ int cpu;
MNT_ILOCK(mp);
mp->mnt_vfs_ops++;
*/
atomic_thread_fence_seq_cst();
vfs_op_barrier_wait(mp);
+ /*
+ * Paired with a fence in vfs_op_thread_exit().
+ */
+ atomic_thread_fence_acq();
+ CPU_FOREACH(cpu) {
+ mp->mnt_ref +=
+ zpcpu_replace_cpu(mp->mnt_ref_pcpu, 0, cpu);
+ mp->mnt_lockref +=
+ zpcpu_replace_cpu(mp->mnt_lockref_pcpu, 0, cpu);
+ mp->mnt_writeopcount +=
+ zpcpu_replace_cpu(mp->mnt_writeopcount_pcpu, 0, cpu);
+ }
MNT_IUNLOCK(mp);
+ vfs_assert_mount_counters(mp);
}
void
}
}
+#ifdef DIAGNOSTIC
+void
+vfs_assert_mount_counters(struct mount *mp)
+{
+ int cpu;
+
+ if (mp->mnt_vfs_ops == 0)
+ return;
+
+ CPU_FOREACH(cpu) {
+ if (*(int *)zpcpu_get_cpu(mp->mnt_ref_pcpu, cpu) != 0 ||
+ *(int *)zpcpu_get_cpu(mp->mnt_lockref_pcpu, cpu) != 0 ||
+ *(int *)zpcpu_get_cpu(mp->mnt_writeopcount_pcpu, cpu) != 0)
+ vfs_dump_mount_counters(mp);
+ }
+}
+
+void
+vfs_dump_mount_counters(struct mount *mp)
+{
+ int cpu, *count;
+ int ref, lockref, writeopcount;
+
+ printf("%s: mp %p vfs_ops %d\n", __func__, mp, mp->mnt_vfs_ops);
+
+ printf(" ref : ");
+ ref = mp->mnt_ref;
+ CPU_FOREACH(cpu) {
+ count = zpcpu_get_cpu(mp->mnt_ref_pcpu, cpu);
+ printf("%d ", *count);
+ ref += *count;
+ }
+ printf("\n");
+ printf(" lockref : ");
+ lockref = mp->mnt_lockref;
+ CPU_FOREACH(cpu) {
+ count = zpcpu_get_cpu(mp->mnt_lockref_pcpu, cpu);
+ printf("%d ", *count);
+ lockref += *count;
+ }
+ printf("\n");
+ printf("writeopcount: ");
+ writeopcount = mp->mnt_writeopcount;
+ CPU_FOREACH(cpu) {
+ count = zpcpu_get_cpu(mp->mnt_writeopcount_pcpu, cpu);
+ printf("%d ", *count);
+ writeopcount += *count;
+ }
+ printf("\n");
+
+ printf("counter struct total\n");
+ printf("ref %-5d %-5d\n", mp->mnt_ref, ref);
+ printf("lockref %-5d %-5d\n", mp->mnt_lockref, lockref);
+ printf("writeopcount %-5d %-5d\n", mp->mnt_writeopcount, writeopcount);
+
+ panic("invalid counts on struct mount");
+}
+#endif
+
+int
+vfs_mount_fetch_counter(struct mount *mp, enum mount_counter which)
+{
+ int *base, *pcpu;
+ int cpu, sum;
+
+ switch (which) {
+ case MNT_COUNT_REF:
+ base = &mp->mnt_ref;
+ pcpu = mp->mnt_ref_pcpu;
+ break;
+ case MNT_COUNT_LOCKREF:
+ base = &mp->mnt_lockref;
+ pcpu = mp->mnt_lockref_pcpu;
+ break;
+ case MNT_COUNT_WRITEOPCOUNT:
+ base = &mp->mnt_writeopcount;
+ pcpu = mp->mnt_writeopcount_pcpu;
+ break;
+ }
+
+ sum = *base;
+ CPU_FOREACH(cpu) {
+ sum += *(int *)zpcpu_get_cpu(pcpu, cpu);
+ }
+ return (sum);
+}
+
/*
* Do the actual filesystem unmount.
*/
MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
MPASS((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0);
MPASS((mp->mnt_kern_flag & MNTK_REFEXPIRE) == 0);
- MNT_REF_UNLOCKED(mp);
- atomic_add_int(&mp->mnt_lockref, 1);
+ vfs_mp_count_add_pcpu(mp, ref, 1);
+ vfs_mp_count_add_pcpu(mp, lockref, 1);
vfs_op_thread_exit(mp);
if (flags & MBF_MNTLSTLOCK)
mtx_unlock(&mountlist_mtx);
}
MNT_ILOCK(mp);
+ vfs_assert_mount_counters(mp);
MNT_REF(mp);
/*
* If mount point is currently being unmounted, sleep until the
}
if (flags & MBF_MNTLSTLOCK)
mtx_unlock(&mountlist_mtx);
- atomic_add_int(&mp->mnt_lockref, 1);
+ mp->mnt_lockref++;
MNT_IUNLOCK(mp);
return (0);
}
if (vfs_op_thread_enter(mp)) {
MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
- c = atomic_fetchadd_int(&mp->mnt_lockref, -1) - 1;
- KASSERT(c >= 0, ("%s: negative mnt_lockref %d\n", __func__, c));
- MNT_REL_UNLOCKED(mp);
+ vfs_mp_count_sub_pcpu(mp, lockref, 1);
+ vfs_mp_count_sub_pcpu(mp, ref, 1);
vfs_op_thread_exit(mp);
return;
}
MNT_ILOCK(mp);
+ vfs_assert_mount_counters(mp);
MNT_REL(mp);
- c = atomic_fetchadd_int(&mp->mnt_lockref, -1) - 1;
- KASSERT(c >= 0, ("%s: negative mnt_lockref %d\n", __func__, c));
+ c = --mp->mnt_lockref;
+ if (mp->mnt_vfs_ops == 0) {
+ MPASS((mp->mnt_kern_flag & MNTK_DRAINING) == 0);
+ MNT_IUNLOCK(mp);
+ return;
+ }
+ if (c < 0)
+ vfs_dump_mount_counters(mp);
if (c == 0 && (mp->mnt_kern_flag & MNTK_DRAINING) != 0) {
MPASS(mp->mnt_kern_flag & MNTK_UNMOUNT);
CTR1(KTR_VFS, "%s: waking up waiters", __func__);
if (jailed(mp->mnt_cred))
db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
db_printf(" }\n");
- db_printf(" mnt_ref = %d\n", mp->mnt_ref);
+ db_printf(" mnt_ref = %d (with %d in the struct)\n",
+ vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref);
db_printf(" mnt_gen = %d\n", mp->mnt_gen);
db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
db_printf(" mnt_activevnodelistsize = %d\n",
mp->mnt_activevnodelistsize);
- db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount);
+ db_printf(" mnt_writeopcount = %d (with %d in the struct)\n",
+ vfs_mount_fetch_counter(mp, MNT_COUNT_WRITEOPCOUNT), mp->mnt_writeopcount);
db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max);
db_printf(" mnt_hashseed = %u\n", mp->mnt_hashseed);
- db_printf(" mnt_lockref = %d\n", mp->mnt_lockref);
+ db_printf(" mnt_lockref = %d (with %d in the struct)\n",
+ vfs_mount_fetch_counter(mp, MNT_COUNT_LOCKREF), mp->mnt_lockref);
db_printf(" mnt_secondary_writes = %d\n", mp->mnt_secondary_writes);
db_printf(" mnt_secondary_accwrites = %d\n",
mp->mnt_secondary_accwrites);
if (__predict_true(!mplocked) && (flags & V_XSLEEP) == 0 &&
vfs_op_thread_enter(mp)) {
MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0);
- atomic_add_int(&mp->mnt_writeopcount, 1);
+ vfs_mp_count_add_pcpu(mp, writeopcount, 1);
vfs_op_thread_exit(mp);
return (0);
}
}
if (flags & V_XSLEEP)
goto unlock;
- atomic_add_int(&mp->mnt_writeopcount, 1);
+ mp->mnt_writeopcount++;
unlock:
if (error != 0 || (flags & V_XSLEEP) != 0)
MNT_REL(mp);
return;
if (vfs_op_thread_enter(mp)) {
- c = atomic_fetchadd_int(&mp->mnt_writeopcount, -1) - 1;
- if (c < 0)
- panic("vn_finished_write: invalid writeopcount %d", c);
- MNT_REL_UNLOCKED(mp);
+ vfs_mp_count_sub_pcpu(mp, writeopcount, 1);
+ vfs_mp_count_sub_pcpu(mp, ref, 1);
vfs_op_thread_exit(mp);
return;
}
MNT_ILOCK(mp);
+ vfs_assert_mount_counters(mp);
MNT_REL(mp);
- c = atomic_fetchadd_int(&mp->mnt_writeopcount, -1) - 1;
+ c = --mp->mnt_writeopcount;
+ if (mp->mnt_vfs_ops == 0) {
+ MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0);
+ MNT_IUNLOCK(mp);
+ return;
+ }
if (c < 0)
- panic("vn_finished_write: invalid writeopcount %d", c);
+ vfs_dump_mount_counters(mp);
if ((mp->mnt_kern_flag & MNTK_SUSPEND) != 0 && c == 0)
wakeup(&mp->mnt_writeopcount);
MNT_IUNLOCK(mp);
vfs_op_enter(mp);
MNT_ILOCK(mp);
+ vfs_assert_mount_counters(mp);
if (mp->mnt_susp_owner == curthread) {
vfs_op_exit_locked(mp);
MNT_IUNLOCK(mp);
curthread->td_pflags &= ~TDP_IGNSUSP;
if ((flags & VR_START_WRITE) != 0) {
MNT_REF(mp);
- atomic_add_int(&mp->mnt_writeopcount, 1);
+ mp->mnt_writeopcount++;
}
MNT_IUNLOCK(mp);
if ((flags & VR_NO_SUSPCLR) == 0)
TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
int mnt_vfs_ops; /* (i) pending vfs ops */
int *mnt_thread_in_ops_pcpu;
+ int *mnt_ref_pcpu;
+ int *mnt_lockref_pcpu;
+ int *mnt_writeopcount_pcpu;
};
/*
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)
#define MNT_MTX(mp) (&(mp)->mnt_mtx)
-#define MNT_REF_UNLOCKED(mp) do { \
- atomic_add_int(&(mp)->mnt_ref, 1); \
-} while (0)
-#define MNT_REL_UNLOCKED(mp) do { \
- int _c; \
- _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
- KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
-} while (0)
-
#define MNT_REF(mp) do { \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- atomic_add_int(&(mp)->mnt_ref, 1); \
+ mp->mnt_ref++; \
} while (0)
#define MNT_REL(mp) do { \
- int _c; \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
- KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
- if (_c == 0) \
+ (mp)->mnt_ref--; \
+ if ((mp)->mnt_vfs_ops && (mp)->mnt_ref < 0) \
+ vfs_dump_mount_counters(mp); \
+ if ((mp)->mnt_ref == 0 && (mp)->mnt_vfs_ops) \
wakeup((mp)); \
} while (0)
void vfs_op_exit_locked(struct mount *);
void vfs_op_exit(struct mount *);
+#ifdef DIAGNOSTIC
+void vfs_assert_mount_counters(struct mount *);
+void vfs_dump_mount_counters(struct mount *);
+#else
+#define vfs_assert_mount_counters(mp) do { } while (0)
+#define vfs_dump_mount_counters(mp) do { } while (0)
+#endif
+
+enum mount_counter { MNT_COUNT_REF, MNT_COUNT_LOCKREF, MNT_COUNT_WRITEOPCOUNT };
+int vfs_mount_fetch_counter(struct mount *, enum mount_counter);
+
/*
* We mark ourselves as entering the section and post a sequentially consistent
* fence, meaning the store is completed before we get into the section and
* before making any changes or only make changes safe while the section is
* executed.
*/
+#define vfs_op_thread_entered(mp) ({ \
+ MPASS(curthread->td_critnest > 0); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) == 1; \
+})
#define vfs_op_thread_enter(mp) ({ \
- struct mount *_mp = (mp); \
bool _retval = true; \
critical_enter(); \
- *(int *)zpcpu_get(_mp->mnt_thread_in_ops_pcpu) = 1; \
+ MPASS(!vfs_op_thread_entered(mp)); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 1; \
atomic_thread_fence_seq_cst(); \
- if (__predict_false(_mp->mnt_vfs_ops > 0)) { \
- vfs_op_thread_exit(_mp); \
+ if (__predict_false(mp->mnt_vfs_ops > 0)) { \
+ vfs_op_thread_exit(mp); \
_retval = false; \
} \
_retval; \
})
#define vfs_op_thread_exit(mp) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
atomic_thread_fence_rel(); \
*(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 0; \
critical_exit(); \
} while (0)
+#define vfs_mp_count_add_pcpu(mp, count, val) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
+ (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) += val; \
+} while (0)
+
+#define vfs_mp_count_sub_pcpu(mp, count, val) do { \
+ MPASS(vfs_op_thread_entered(mp)); \
+ (*(int *)zpcpu_get(mp->mnt_##count##_pcpu)) -= val; \
+} while (0)
+
#else /* !_KERNEL */
#include <sys/cdefs.h>
return ((char *)(base) + UMA_PCPU_ALLOC_SIZE * cpu);
}
+/*
+ * This operation is NOT atomic and does not post any barriers.
+ * If you use this the assumption is that the target CPU will not
+ * be modifying this variable.
+ * If you need atomicity use xchg.
+ * */
+#define zpcpu_replace_cpu(base, val, cpu) ({ \
+ __typeof(val) _old = *(__typeof(val) *)zpcpu_get_cpu(base, cpu);\
+ *(__typeof(val) *)zpcpu_get_cpu(base, cpu) = val; \
+ _old; \
+})
+
/*
* Machine dependent callouts. cpu_pcpu_init() is responsible for
* initializing machine dependent fields of struct pcpu, and
* (fs_minfree).
*/
if (resource == FLUSH_INODES_WAIT) {
- needed = vp->v_mount->mnt_writeopcount + 2;
+ needed = vfs_mount_fetch_counter(vp->v_mount,
+ MNT_COUNT_WRITEOPCOUNT) + 2;
} else if (resource == FLUSH_BLOCKS_WAIT) {
- needed = (vp->v_mount->mnt_writeopcount + 2) *
- fs->fs_contigsumsize;
+ needed = (vfs_mount_fetch_counter(vp->v_mount,
+ MNT_COUNT_WRITEOPCOUNT) + 2) * fs->fs_contigsumsize;
if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE))
needed += fragstoblks(fs,
roundup((fs->fs_dsize * fs->fs_minfree / 100) -