xfs: Reduce log force overhead for delayed logging

Delayed logging adds some serialisation to the log force process to ensure that it does not deference a bad commit context structure when determining if a CIL push is necessary or not. It does this by grabing the CIL context lock exclusively, then dropping it before pushing the CIL if necessary. This causes serialisation of all log forces and pushes regardless of whether a force is necessary or not. As a result fsync heavy workloads (like dbench) can be significantly slower with delayed logging than without. To avoid this penalty, copy the current sequence from the context to the CIL structure when they are swapped. This allows us to do unlocked checks on the current sequence without having to worry about dereferencing context structures that may have already been freed. Hence we can remove the CIL context locking in the forcing code and only call into the push code if the current context matches the sequence we need to force. By passing the sequence into the push code, we can check the sequence again once we have the CIL lock held exclusive and abort if the sequence has already been pushed. This avoids a lock round-trip and unnecessary CIL pushes when we have racing push calls. The result is that the regression in dbench performance goes away - this change improves dbench performance on a ramdisk from ~2100MB/s to ~2500MB/s. This compares favourably to not using delayed logging which retuns ~2500MB/s for the same workload. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
author: Dave Chinner <dchinner@redhat.com> 2010-08-23 21:40:03 -0400
committer: Dave Chinner <david@fromorbit.com> 2010-08-23 21:40:03 -0400
commit: a44f13edf0ebb4e41942d0f16ca80489dcf6659d (patch)
tree: 42bcbee56a62851e969292033efd600cced80ca5
parent: 1a387d3be2b30c90f20d49a3497a8fc0693a9d18 (diff)
3 files changed, 147 insertions, 118 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 925d572bf0f4..33f718f92a48 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3015,7 +3015,8 @@ _xfs_log_force(
        XFS_STATS_INC(xs_log_force);
-        xlog_cil_push(log, 1);
+        if (log->l_cilp)
+                xlog_cil_force(log);
        spin_lock(&log->l_icloglock);
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
        XFS_STATS_INC(xs_log_force);
        if (log->l_cilp) {
-                lsn = xlog_cil_push_lsn(log, lsn);
+                lsn = xlog_cil_force_lsn(log, lsn);
                if (lsn == NULLCOMMITLSN)
                        return 0;
        }
@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
         * call below.
         */
        if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
-                xlog_cil_push(log, 1);
+                xlog_cil_force(log);
        /*
         * We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ef8e7d9f445d..9768f2437bb3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
        ctx->sequence = 1;
        ctx->cil = cil;
        cil->xc_ctx = ctx;
+        cil->xc_current_sequence = ctx->sequence;
        cil->xc_log = log;
        log->l_cilp = cil;
@@ -321,94 +322,6 @@ xlog_cil_free_logvec(
 }
 /*
- * Commit a transaction with the given vector to the Committed Item List.
- *
- * To do this, we need to format the item, pin it in memory if required and
- * account for the space used by the transaction. Once we have done that we
- * need to release the unused reservation for the transaction, attach the
- * transaction to the checkpoint context so we carry the busy extents through
- * to checkpoint completion, and then unlock all the items in the transaction.
- *
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
- * Called with the context lock already held in read mode to lock out
- * background commit, returns without it held once background commits are
- * allowed again.
- */
-int
-xfs_log_commit_cil(
-        struct xfs_mount        *mp,
-        struct xfs_trans        *tp,
-        struct xfs_log_vec      *log_vector,
-        xfs_lsn_t               *commit_lsn,
-        int                     flags)
-{
-        struct log              *log = mp->m_log;
-        int                     log_flags = 0;
-        int                     push = 0;
-        if (flags & XFS_TRANS_RELEASE_LOG_RES)
-                log_flags = XFS_LOG_REL_PERM_RESERV;
-        if (XLOG_FORCED_SHUTDOWN(log)) {
-                xlog_cil_free_logvec(log_vector);
-                return XFS_ERROR(EIO);
-        }
-        /* lock out background commit */
-        down_read(&log->l_cilp->xc_ctx_lock);
-        xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
-        /* check we didn't blow the reservation */
-        if (tp->t_ticket->t_curr_res < 0)
-                xlog_print_tic_res(log->l_mp, tp->t_ticket);
-        /* attach the transaction to the CIL if it has any busy extents */
-        if (!list_empty(&tp->t_busy)) {
-                spin_lock(&log->l_cilp->xc_cil_lock);
-                list_splice_init(&tp->t_busy,
-                                        &log->l_cilp->xc_ctx->busy_extents);
-                spin_unlock(&log->l_cilp->xc_cil_lock);
-        }
-        tp->t_commit_lsn = *commit_lsn;
-        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
-        xfs_trans_unreserve_and_mod_sb(tp);
-        /*
-         * Once all the items of the transaction have been copied to the CIL,
-         * the items can be unlocked and freed.
-         *
-         * This needs to be done before we drop the CIL context lock because we
-         * have to update state in the log items and unlock them before they go
-         * to disk. If we don't, then the CIL checkpoint can race with us and
-         * we can run checkpoint completion before we've updated and unlocked
-         * the log items. This affects (at least) processing of stale buffers,
-         * inodes and EFIs.
-         */
-        xfs_trans_free_items(tp, *commit_lsn, 0);
-        /* check for background commit before unlock */
-        if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
-                push = 1;
-        up_read(&log->l_cilp->xc_ctx_lock);
-        /*
-         * We need to push CIL every so often so we don't cache more than we
-         * can fit in the log. The limit really is that a checkpoint can't be
-         * more than half the log (the current checkpoint is not allowed to
-         * overwrite the previous checkpoint), but commit latency and memory
-         * usage limit this to a smaller size in most cases.
-         */
-        if (push)
-                xlog_cil_push(log, 0);
-        return 0;
-}
-/*
 * Mark all items committed and clear busy extents. We free the log vector
 * chains in a separate pass so that we unpin the log items as quickly as
 * possible.
@@ -441,13 +354,23 @@ xlog_cil_committed(
 }
 /*
- * Push the Committed Item List to the log. If the push_now flag is not set,
+ * Push the Committed Item List to the log. If @push_seq flag is zero, then it
- * then it is a background flush and so we can chose to ignore it.
+ * is a background flush and so we can chose to ignore it. Otherwise, if the
+ * current sequence is the same as @push_seq we need to do a flush. If
+ * @push_seq is less than the current sequence, then it has already been
+ * flushed and we don't need to do anything - the caller will wait for it to
+ * complete if necessary.
+ *
+ * @push_seq is a value rather than a flag because that allows us to do an
+ * unlocked check of the sequence number for a match. Hence we can allows log
+ * forces to run racily and not issue pushes for the same sequence twice. If we
+ * get a race between multiple pushes for the same sequence they will block on
+ * the first one and then abort, hence avoiding needless pushes.
 */
-int
+STATIC int
 xlog_cil_push(
        struct log              *log,
-        int                     push_now)
+        xfs_lsn_t               push_seq)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_log_vec      *lv;
@@ -467,12 +390,14 @@ xlog_cil_push(
        if (!cil)
                return 0;
+        ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
        new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
        new_ctx->ticket = xlog_cil_ticket_alloc(log);
        /* lock out transaction commit, but don't block on background push */
        if (!down_write_trylock(&cil->xc_ctx_lock)) {
-                if (!push_now)
+                if (!push_seq)
                        goto out_free_ticket;
                down_write(&cil->xc_ctx_lock);
        }
@@ -483,7 +408,11 @@ xlog_cil_push(
                goto out_skip;
        /* check for spurious background flush */
-        if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+        if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+                goto out_skip;
+        /* check for a previously pushed seqeunce */
+        if (push_seq < cil->xc_ctx->sequence)
                goto out_skip;
        /*
@@ -529,6 +458,13 @@ xlog_cil_push(
        cil->xc_ctx = new_ctx;
        /*
+         * mirror the new sequence into the cil structure so that we can do
+         * unlocked checks against the current sequence in log forces without
+         * risking deferencing a freed context pointer.
+         */
+        cil->xc_current_sequence = new_ctx->sequence;
+        /*
         * The switch is now done, so we can drop the context lock and move out
         * of a shared context. We can't just go straight to the commit record,
         * though - we need to synchronise with previous and future commits so
@@ -640,6 +576,94 @@ out_abort:
 }
 /*
+ * Commit a transaction with the given vector to the Committed Item List.
+ *
+ * To do this, we need to format the item, pin it in memory if required and
+ * account for the space used by the transaction. Once we have done that we
+ * need to release the unused reservation for the transaction, attach the
+ * transaction to the checkpoint context so we carry the busy extents through
+ * to checkpoint completion, and then unlock all the items in the transaction.
+ *
+ * For more specific information about the order of operations in
+ * xfs_log_commit_cil() please refer to the comments in
+ * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+int
+xfs_log_commit_cil(
+        struct xfs_mount        *mp,
+        struct xfs_trans        *tp,
+        struct xfs_log_vec      *log_vector,
+        xfs_lsn_t               *commit_lsn,
+        int                     flags)
+{
+        struct log              *log = mp->m_log;
+        int                     log_flags = 0;
+        int                     push = 0;
+        if (flags & XFS_TRANS_RELEASE_LOG_RES)
+                log_flags = XFS_LOG_REL_PERM_RESERV;
+        if (XLOG_FORCED_SHUTDOWN(log)) {
+                xlog_cil_free_logvec(log_vector);
+                return XFS_ERROR(EIO);
+        }
+        /* lock out background commit */
+        down_read(&log->l_cilp->xc_ctx_lock);
+        xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
+        /* check we didn't blow the reservation */
+        if (tp->t_ticket->t_curr_res < 0)
+                xlog_print_tic_res(log->l_mp, tp->t_ticket);
+        /* attach the transaction to the CIL if it has any busy extents */
+        if (!list_empty(&tp->t_busy)) {
+                spin_lock(&log->l_cilp->xc_cil_lock);
+                list_splice_init(&tp->t_busy,
+                                        &log->l_cilp->xc_ctx->busy_extents);
+                spin_unlock(&log->l_cilp->xc_cil_lock);
+        }
+        tp->t_commit_lsn = *commit_lsn;
+        xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+        xfs_trans_unreserve_and_mod_sb(tp);
+        /*
+         * Once all the items of the transaction have been copied to the CIL,
+         * the items can be unlocked and freed.
+         *
+         * This needs to be done before we drop the CIL context lock because we
+         * have to update state in the log items and unlock them before they go
+         * to disk. If we don't, then the CIL checkpoint can race with us and
+         * we can run checkpoint completion before we've updated and unlocked
+         * the log items. This affects (at least) processing of stale buffers,
+         * inodes and EFIs.
+         */
+        xfs_trans_free_items(tp, *commit_lsn, 0);
+        /* check for background commit before unlock */
+        if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+                push = 1;
+        up_read(&log->l_cilp->xc_ctx_lock);
+        /*
+         * We need to push CIL every so often so we don't cache more than we
+         * can fit in the log. The limit really is that a checkpoint can't be
+         * more than half the log (the current checkpoint is not allowed to
+         * overwrite the previous checkpoint), but commit latency and memory
+         * usage limit this to a smaller size in most cases.
+         */
+        if (push)
+                xlog_cil_push(log, 0);
+        return 0;
+}
+/*
 * Conditionally push the CIL based on the sequence passed in.
 *
 * We only need to push if we haven't already pushed the sequence
@@ -653,39 +677,34 @@ out_abort:
 * commit lsn is there. It'll be empty, so this is broken for now.
 */
 xfs_lsn_t
-xlog_cil_push_lsn(
+xlog_cil_force_lsn(
        struct log      *log,
-        xfs_lsn_t       push_seq)
+        xfs_lsn_t       sequence)
 {
        struct xfs_cil          *cil = log->l_cilp;
        struct xfs_cil_ctx      *ctx;
        xfs_lsn_t               commit_lsn = NULLCOMMITLSN;
-restart:
+        ASSERT(sequence <= cil->xc_current_sequence);
-        down_write(&cil->xc_ctx_lock);
-        ASSERT(push_seq <= cil->xc_ctx->sequence);
+        /*
+         * check to see if we need to force out the current context.
-        /* check to see if we need to force out the current context */
+         * xlog_cil_push() handles racing pushes for the same sequence,
-        if (push_seq == cil->xc_ctx->sequence) {
+         * so no need to deal with it here.
-                up_write(&cil->xc_ctx_lock);
+         */
-                xlog_cil_push(log, 1);
+        if (sequence == cil->xc_current_sequence)
-                goto restart;
+                xlog_cil_push(log, sequence);
-        }
        /*
         * See if we can find a previous sequence still committing.
-         * We can drop the flush lock as soon as we have the cil lock
-         * because we are now only comparing contexts protected by
-         * the cil lock.
-         *
         * We need to wait for all previous sequence commits to complete
         * before allowing the force of push_seq to go ahead. Hence block
         * on commits for those as well.
         */
+restart:
        spin_lock(&cil->xc_cil_lock);
-        up_write(&cil->xc_ctx_lock);
        list_for_each_entry(ctx, &cil->xc_committing, committing) {
-                if (ctx->sequence > push_seq)
+                if (ctx->sequence > sequence)
                        continue;
                if (!ctx->commit_lsn) {
                        /*
@@ -695,7 +714,7 @@ restart:
                        sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
                        goto restart;
                }
-                if (ctx->sequence != push_seq)
+                if (ctx->sequence != sequence)
                        continue;
                /* found it! */
                commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8c072618965c..ced52b98b322 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,6 +422,7 @@ struct xfs_cil {
        struct rw_semaphore     xc_ctx_lock;
        struct list_head        xc_committing;
        sv_t                    xc_commit_wait;
+        xfs_lsn_t               xc_current_sequence;
 };
 /*
@@ -562,8 +563,16 @@ int	xlog_cil_init(struct log *log);
 void    xlog_cil_init_post_recovery(struct log *log);
 void    xlog_cil_destroy(struct log *log);
-int     xlog_cil_push(struct log *log, int push_now);
+/*
-xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
+ * CIL force routines
+ */
+xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+static inline void
+xlog_cil_force(struct log *log)
+{
+        xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+}
 /*
 * Unmount record type is used as a pseudo transaction type for the ticket.
author	Dave Chinner <dchinner@redhat.com>	2010-08-23 21:40:03 -0400
committer	Dave Chinner <david@fromorbit.com>	2010-08-23 21:40:03 -0400
commit	a44f13edf0ebb4e41942d0f16ca80489dcf6659d (patch)
tree	42bcbee56a62851e969292033efd600cced80ca5
parent	1a387d3be2b30c90f20d49a3497a8fc0693a9d18 (diff)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 925d572bf0f4..33f718f92a48 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c
@@ -3015,7 +3015,8 @@ _xfs_log_force(
3015		3015
3016	XFS_STATS_INC(xs_log_force);	3016	XFS_STATS_INC(xs_log_force);
3017		3017
3018	xlog_cil_push(log, 1);	3018	if (log->l_cilp)
		3019	xlog_cil_force(log);
3019		3020
3020	spin_lock(&log->l_icloglock);	3021	spin_lock(&log->l_icloglock);
3021		3022
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
3167	XFS_STATS_INC(xs_log_force);	3168	XFS_STATS_INC(xs_log_force);
3168		3169
3169	if (log->l_cilp) {	3170	if (log->l_cilp) {
3170	lsn = xlog_cil_push_lsn(log, lsn);	3171	lsn = xlog_cil_force_lsn(log, lsn);
3171	if (lsn == NULLCOMMITLSN)	3172	if (lsn == NULLCOMMITLSN)
3172	return 0;	3173	return 0;
3173	}	3174	}
@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
3724	* call below.	3725	* call below.
3725	*/	3726	*/
3726	if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))	3727	if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
3727	xlog_cil_push(log, 1);	3728	xlog_cil_force(log);
3728		3729
3729	/*	3730	/*
3730	* We must hold both the GRANT lock and the LOG lock,	3731	* We must hold both the GRANT lock and the LOG lock,


diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ef8e7d9f445d..9768f2437bb3 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
68	ctx->sequence = 1;	68	ctx->sequence = 1;
69	ctx->cil = cil;	69	ctx->cil = cil;
70	cil->xc_ctx = ctx;	70	cil->xc_ctx = ctx;
		71	cil->xc_current_sequence = ctx->sequence;
71		72
72	cil->xc_log = log;	73	cil->xc_log = log;
73	log->l_cilp = cil;	74	log->l_cilp = cil;
@@ -321,94 +322,6 @@ xlog_cil_free_logvec(
321	}	322	}
322		323
323	/*	324	/*
324	* Commit a transaction with the given vector to the Committed Item List.
325	*
326	* To do this, we need to format the item, pin it in memory if required and
327	* account for the space used by the transaction. Once we have done that we
328	* need to release the unused reservation for the transaction, attach the
329	* transaction to the checkpoint context so we carry the busy extents through
330	* to checkpoint completion, and then unlock all the items in the transaction.
331	*
332	* For more specific information about the order of operations in
333	* xfs_log_commit_cil() please refer to the comments in
334	* xfs_trans_commit_iclog().
335	*
336	* Called with the context lock already held in read mode to lock out
337	* background commit, returns without it held once background commits are
338	* allowed again.
339	*/
340	int
341	xfs_log_commit_cil(
342	struct xfs_mount *mp,
343	struct xfs_trans *tp,
344	struct xfs_log_vec *log_vector,
345	xfs_lsn_t *commit_lsn,
346	int flags)
347	{
348	struct log *log = mp->m_log;
349	int log_flags = 0;
350	int push = 0;
351
352	if (flags & XFS_TRANS_RELEASE_LOG_RES)
353	log_flags = XFS_LOG_REL_PERM_RESERV;
354
355	if (XLOG_FORCED_SHUTDOWN(log)) {
356	xlog_cil_free_logvec(log_vector);
357	return XFS_ERROR(EIO);
358	}
359
360	/* lock out background commit */
361	down_read(&log->l_cilp->xc_ctx_lock);
362	xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
363
364	/* check we didn't blow the reservation */
365	if (tp->t_ticket->t_curr_res < 0)
366	xlog_print_tic_res(log->l_mp, tp->t_ticket);
367
368	/* attach the transaction to the CIL if it has any busy extents */
369	if (!list_empty(&tp->t_busy)) {
370	spin_lock(&log->l_cilp->xc_cil_lock);
371	list_splice_init(&tp->t_busy,
372	&log->l_cilp->xc_ctx->busy_extents);
373	spin_unlock(&log->l_cilp->xc_cil_lock);
374	}
375
376	tp->t_commit_lsn = *commit_lsn;
377	xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
378	xfs_trans_unreserve_and_mod_sb(tp);
379
380	/*
381	* Once all the items of the transaction have been copied to the CIL,
382	* the items can be unlocked and freed.
383	*
384	* This needs to be done before we drop the CIL context lock because we
385	* have to update state in the log items and unlock them before they go
386	* to disk. If we don't, then the CIL checkpoint can race with us and
387	* we can run checkpoint completion before we've updated and unlocked
388	* the log items. This affects (at least) processing of stale buffers,
389	* inodes and EFIs.
390	*/
391	xfs_trans_free_items(tp, *commit_lsn, 0);
392
393	/* check for background commit before unlock */
394	if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
395	push = 1;
396
397	up_read(&log->l_cilp->xc_ctx_lock);
398
399	/*
400	* We need to push CIL every so often so we don't cache more than we
401	* can fit in the log. The limit really is that a checkpoint can't be
402	* more than half the log (the current checkpoint is not allowed to
403	* overwrite the previous checkpoint), but commit latency and memory
404	* usage limit this to a smaller size in most cases.
405	*/
406	if (push)
407	xlog_cil_push(log, 0);
408	return 0;
409	}
410
411	/*
412	* Mark all items committed and clear busy extents. We free the log vector	325	* Mark all items committed and clear busy extents. We free the log vector
413	* chains in a separate pass so that we unpin the log items as quickly as	326	* chains in a separate pass so that we unpin the log items as quickly as
414	* possible.	327	* possible.
@@ -441,13 +354,23 @@ xlog_cil_committed(
441	}	354	}
442		355
443	/*	356	/*
444	* Push the Committed Item List to the log. If the push_now flag is not set,	357	* Push the Committed Item List to the log. If @push_seq flag is zero, then it
445	* then it is a background flush and so we can chose to ignore it.	358	* is a background flush and so we can chose to ignore it. Otherwise, if the
		359	* current sequence is the same as @push_seq we need to do a flush. If
		360	* @push_seq is less than the current sequence, then it has already been
		361	* flushed and we don't need to do anything - the caller will wait for it to
		362	* complete if necessary.
		363	*
		364	* @push_seq is a value rather than a flag because that allows us to do an
		365	* unlocked check of the sequence number for a match. Hence we can allows log
		366	* forces to run racily and not issue pushes for the same sequence twice. If we
		367	* get a race between multiple pushes for the same sequence they will block on
		368	* the first one and then abort, hence avoiding needless pushes.
446	*/	369	*/
447	int	370	STATIC int
448	xlog_cil_push(	371	xlog_cil_push(
449	struct log *log,	372	struct log *log,
450	int push_now)	373	xfs_lsn_t push_seq)
451	{	374	{
452	struct xfs_cil *cil = log->l_cilp;	375	struct xfs_cil *cil = log->l_cilp;
453	struct xfs_log_vec *lv;	376	struct xfs_log_vec *lv;
@@ -467,12 +390,14 @@ xlog_cil_push(
467	if (!cil)	390	if (!cil)
468	return 0;	391	return 0;
469		392
		393	ASSERT(!push_seq \|\| push_seq <= cil->xc_ctx->sequence);
		394
470	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP\|KM_NOFS);	395	new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP\|KM_NOFS);
471	new_ctx->ticket = xlog_cil_ticket_alloc(log);	396	new_ctx->ticket = xlog_cil_ticket_alloc(log);
472		397
473	/* lock out transaction commit, but don't block on background push */	398	/* lock out transaction commit, but don't block on background push */
474	if (!down_write_trylock(&cil->xc_ctx_lock)) {	399	if (!down_write_trylock(&cil->xc_ctx_lock)) {
475	if (!push_now)	400	if (!push_seq)
476	goto out_free_ticket;	401	goto out_free_ticket;
477	down_write(&cil->xc_ctx_lock);	402	down_write(&cil->xc_ctx_lock);
478	}	403	}
@@ -483,7 +408,11 @@ xlog_cil_push(
483	goto out_skip;	408	goto out_skip;
484		409
485	/* check for spurious background flush */	410	/* check for spurious background flush */
486	if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))	411	if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
		412	goto out_skip;
		413
		414	/* check for a previously pushed seqeunce */
		415	if (push_seq < cil->xc_ctx->sequence)
487	goto out_skip;	416	goto out_skip;
488		417
489	/*	418	/*
@@ -529,6 +458,13 @@ xlog_cil_push(
529	cil->xc_ctx = new_ctx;	458	cil->xc_ctx = new_ctx;
530		459
531	/*	460	/*
		461	* mirror the new sequence into the cil structure so that we can do
		462	* unlocked checks against the current sequence in log forces without
		463	* risking deferencing a freed context pointer.
		464	*/
		465	cil->xc_current_sequence = new_ctx->sequence;
		466
		467	/*
532	* The switch is now done, so we can drop the context lock and move out	468	* The switch is now done, so we can drop the context lock and move out
533	* of a shared context. We can't just go straight to the commit record,	469	* of a shared context. We can't just go straight to the commit record,
534	* though - we need to synchronise with previous and future commits so	470	* though - we need to synchronise with previous and future commits so
@@ -640,6 +576,94 @@ out_abort:
640	}	576	}
641		577
642	/*	578	/*
		579	* Commit a transaction with the given vector to the Committed Item List.
		580	*
		581	* To do this, we need to format the item, pin it in memory if required and
		582	* account for the space used by the transaction. Once we have done that we
		583	* need to release the unused reservation for the transaction, attach the
		584	* transaction to the checkpoint context so we carry the busy extents through
		585	* to checkpoint completion, and then unlock all the items in the transaction.
		586	*
		587	* For more specific information about the order of operations in
		588	* xfs_log_commit_cil() please refer to the comments in
		589	* xfs_trans_commit_iclog().
		590	*
		591	* Called with the context lock already held in read mode to lock out
		592	* background commit, returns without it held once background commits are
		593	* allowed again.
		594	*/
		595	int
		596	xfs_log_commit_cil(
		597	struct xfs_mount *mp,
		598	struct xfs_trans *tp,
		599	struct xfs_log_vec *log_vector,
		600	xfs_lsn_t *commit_lsn,
		601	int flags)
		602	{
		603	struct log *log = mp->m_log;
		604	int log_flags = 0;
		605	int push = 0;
		606
		607	if (flags & XFS_TRANS_RELEASE_LOG_RES)
		608	log_flags = XFS_LOG_REL_PERM_RESERV;
		609
		610	if (XLOG_FORCED_SHUTDOWN(log)) {
		611	xlog_cil_free_logvec(log_vector);
		612	return XFS_ERROR(EIO);
		613	}
		614
		615	/* lock out background commit */
		616	down_read(&log->l_cilp->xc_ctx_lock);
		617	xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
		618
		619	/* check we didn't blow the reservation */
		620	if (tp->t_ticket->t_curr_res < 0)
		621	xlog_print_tic_res(log->l_mp, tp->t_ticket);
		622
		623	/* attach the transaction to the CIL if it has any busy extents */
		624	if (!list_empty(&tp->t_busy)) {
		625	spin_lock(&log->l_cilp->xc_cil_lock);
		626	list_splice_init(&tp->t_busy,
		627	&log->l_cilp->xc_ctx->busy_extents);
		628	spin_unlock(&log->l_cilp->xc_cil_lock);
		629	}
		630
		631	tp->t_commit_lsn = *commit_lsn;
		632	xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
		633	xfs_trans_unreserve_and_mod_sb(tp);
		634
		635	/*
		636	* Once all the items of the transaction have been copied to the CIL,
		637	* the items can be unlocked and freed.
		638	*
		639	* This needs to be done before we drop the CIL context lock because we
		640	* have to update state in the log items and unlock them before they go
		641	* to disk. If we don't, then the CIL checkpoint can race with us and
		642	* we can run checkpoint completion before we've updated and unlocked
		643	* the log items. This affects (at least) processing of stale buffers,
		644	* inodes and EFIs.
		645	*/
		646	xfs_trans_free_items(tp, *commit_lsn, 0);
		647
		648	/* check for background commit before unlock */
		649	if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
		650	push = 1;
		651
		652	up_read(&log->l_cilp->xc_ctx_lock);
		653
		654	/*
		655	* We need to push CIL every so often so we don't cache more than we
		656	* can fit in the log. The limit really is that a checkpoint can't be
		657	* more than half the log (the current checkpoint is not allowed to
		658	* overwrite the previous checkpoint), but commit latency and memory
		659	* usage limit this to a smaller size in most cases.
		660	*/
		661	if (push)
		662	xlog_cil_push(log, 0);
		663	return 0;
		664	}
		665
		666	/*
643	* Conditionally push the CIL based on the sequence passed in.	667	* Conditionally push the CIL based on the sequence passed in.
644	*	668	*
645	* We only need to push if we haven't already pushed the sequence	669	* We only need to push if we haven't already pushed the sequence
@@ -653,39 +677,34 @@ out_abort:
653	* commit lsn is there. It'll be empty, so this is broken for now.	677	* commit lsn is there. It'll be empty, so this is broken for now.
654	*/	678	*/
655	xfs_lsn_t	679	xfs_lsn_t
656	xlog_cil_push_lsn(	680	xlog_cil_force_lsn(
657	struct log *log,	681	struct log *log,
658	xfs_lsn_t push_seq)	682	xfs_lsn_t sequence)
659	{	683	{
660	struct xfs_cil *cil = log->l_cilp;	684	struct xfs_cil *cil = log->l_cilp;
661	struct xfs_cil_ctx *ctx;	685	struct xfs_cil_ctx *ctx;
662	xfs_lsn_t commit_lsn = NULLCOMMITLSN;	686	xfs_lsn_t commit_lsn = NULLCOMMITLSN;
663		687
664	restart:	688	ASSERT(sequence <= cil->xc_current_sequence);
665	down_write(&cil->xc_ctx_lock);	689
666	ASSERT(push_seq <= cil->xc_ctx->sequence);	690	/*
667		691	* check to see if we need to force out the current context.
668	/* check to see if we need to force out the current context */	692	* xlog_cil_push() handles racing pushes for the same sequence,
669	if (push_seq == cil->xc_ctx->sequence) {	693	* so no need to deal with it here.
670	up_write(&cil->xc_ctx_lock);	694	*/
671	xlog_cil_push(log, 1);	695	if (sequence == cil->xc_current_sequence)
672	goto restart;	696	xlog_cil_push(log, sequence);
673	}
674		697
675	/*	698	/*
676	* See if we can find a previous sequence still committing.	699	* See if we can find a previous sequence still committing.
677	* We can drop the flush lock as soon as we have the cil lock
678	* because we are now only comparing contexts protected by
679	* the cil lock.
680	*
681	* We need to wait for all previous sequence commits to complete	700	* We need to wait for all previous sequence commits to complete
682	* before allowing the force of push_seq to go ahead. Hence block	701	* before allowing the force of push_seq to go ahead. Hence block
683	* on commits for those as well.	702	* on commits for those as well.
684	*/	703	*/
		704	restart:
685	spin_lock(&cil->xc_cil_lock);	705	spin_lock(&cil->xc_cil_lock);
686	up_write(&cil->xc_ctx_lock);
687	list_for_each_entry(ctx, &cil->xc_committing, committing) {	706	list_for_each_entry(ctx, &cil->xc_committing, committing) {
688	if (ctx->sequence > push_seq)	707	if (ctx->sequence > sequence)
689	continue;	708	continue;
690	if (!ctx->commit_lsn) {	709	if (!ctx->commit_lsn) {
691	/*	710	/*
@@ -695,7 +714,7 @@ restart:
695	sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);	714	sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
696	goto restart;	715	goto restart;
697	}	716	}
698	if (ctx->sequence != push_seq)	717	if (ctx->sequence != sequence)
699	continue;	718	continue;
700	/* found it! */	719	/* found it! */
701	commit_lsn = ctx->commit_lsn;	720	commit_lsn = ctx->commit_lsn;


diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8c072618965c..ced52b98b322 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h
@@ -422,6 +422,7 @@ struct xfs_cil {
422	struct rw_semaphore xc_ctx_lock;	422	struct rw_semaphore xc_ctx_lock;
423	struct list_head xc_committing;	423	struct list_head xc_committing;
424	sv_t xc_commit_wait;	424	sv_t xc_commit_wait;
		425	xfs_lsn_t xc_current_sequence;
425	};	426	};
426		427
427	/*	428	/*
@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log);
562	void xlog_cil_init_post_recovery(struct log *log);	563	void xlog_cil_init_post_recovery(struct log *log);
563	void xlog_cil_destroy(struct log *log);	564	void xlog_cil_destroy(struct log *log);
564		565
565	int xlog_cil_push(struct log *log, int push_now);	566	/*
566	xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);	567	* CIL force routines
		568	*/
		569	xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
		570
		571	static inline void
		572	xlog_cil_force(struct log *log)
		573	{
		574	xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
		575	}
567		576
568	/*	577	/*
569	* Unmount record type is used as a pseudo transaction type for the ticket.	578	* Unmount record type is used as a pseudo transaction type for the ticket.