aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-03-05 21:44:14 -0500
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-17 21:38:10 -0400
commit155cc6b784a959ed456fe46dca522e1d28b3b718 (patch)
treee88b9041570f299497a6f9aac7e01797affed205
parentb589334c7a1fff85d2f009d5db4c34fad48925e9 (diff)
[XFS] Use atomics for iclog reference counting
Now that we update the log tail LSN less frequently on transaction completion, we pass the contention straight to the global log state lock (l_iclog_lock) during transaction completion. We currently have to take this lock to decrement the iclog reference count. there is a reference count on each iclog, so we need to take þhe global lock for all refcount changes. When large numbers of processes are all doing small trnasctions, the iclog reference counts will be quite high, and the state change that absolutely requires the l_iclog_lock is the except rather than the norm. Change the reference counting on the iclogs to use atomic_inc/dec so that we can use atomic_dec_and_lock during transaction completion and avoid the need for grabbing the l_iclog_lock for every reference count decrement except the one that matters - the last. SGI-PV: 975671 SGI-Modid: xfs-linux-melb:xfs-kern:30505a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r--fs/xfs/xfs_log.c36
-rw-r--r--fs/xfs/xfs_log_priv.h2
2 files changed, 21 insertions, 17 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2e35077ff6b2..1fa980933895 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -675,7 +675,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
675 675
676 spin_lock(&log->l_icloglock); 676 spin_lock(&log->l_icloglock);
677 iclog = log->l_iclog; 677 iclog = log->l_iclog;
678 iclog->ic_refcnt++; 678 atomic_inc(&iclog->ic_refcnt);
679 spin_unlock(&log->l_icloglock); 679 spin_unlock(&log->l_icloglock);
680 xlog_state_want_sync(log, iclog); 680 xlog_state_want_sync(log, iclog);
681 (void) xlog_state_release_iclog(log, iclog); 681 (void) xlog_state_release_iclog(log, iclog);
@@ -713,7 +713,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
713 */ 713 */
714 spin_lock(&log->l_icloglock); 714 spin_lock(&log->l_icloglock);
715 iclog = log->l_iclog; 715 iclog = log->l_iclog;
716 iclog->ic_refcnt++; 716 atomic_inc(&iclog->ic_refcnt);
717 spin_unlock(&log->l_icloglock); 717 spin_unlock(&log->l_icloglock);
718 718
719 xlog_state_want_sync(log, iclog); 719 xlog_state_want_sync(log, iclog);
@@ -1405,7 +1405,7 @@ xlog_sync(xlog_t *log,
1405 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1405 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1406 1406
1407 XFS_STATS_INC(xs_log_writes); 1407 XFS_STATS_INC(xs_log_writes);
1408 ASSERT(iclog->ic_refcnt == 0); 1408 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
1409 1409
1410 /* Add for LR header */ 1410 /* Add for LR header */
1411 count_init = log->l_iclog_hsize + iclog->ic_offset; 1411 count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -2309,7 +2309,7 @@ xlog_state_done_syncing(
2309 2309
2310 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2310 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2311 iclog->ic_state == XLOG_STATE_IOERROR); 2311 iclog->ic_state == XLOG_STATE_IOERROR);
2312 ASSERT(iclog->ic_refcnt == 0); 2312 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
2313 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2313 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
2314 2314
2315 2315
@@ -2391,7 +2391,7 @@ restart:
2391 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2391 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
2392 head = &iclog->ic_header; 2392 head = &iclog->ic_header;
2393 2393
2394 iclog->ic_refcnt++; /* prevents sync */ 2394 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
2395 log_offset = iclog->ic_offset; 2395 log_offset = iclog->ic_offset;
2396 2396
2397 /* On the 1st write to an iclog, figure out lsn. This works 2397 /* On the 1st write to an iclog, figure out lsn. This works
@@ -2423,12 +2423,12 @@ restart:
2423 xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 2423 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2424 2424
2425 /* If I'm the only one writing to this iclog, sync it to disk */ 2425 /* If I'm the only one writing to this iclog, sync it to disk */
2426 if (iclog->ic_refcnt == 1) { 2426 if (atomic_read(&iclog->ic_refcnt) == 1) {
2427 spin_unlock(&log->l_icloglock); 2427 spin_unlock(&log->l_icloglock);
2428 if ((error = xlog_state_release_iclog(log, iclog))) 2428 if ((error = xlog_state_release_iclog(log, iclog)))
2429 return error; 2429 return error;
2430 } else { 2430 } else {
2431 iclog->ic_refcnt--; 2431 atomic_dec(&iclog->ic_refcnt);
2432 spin_unlock(&log->l_icloglock); 2432 spin_unlock(&log->l_icloglock);
2433 } 2433 }
2434 goto restart; 2434 goto restart;
@@ -2819,18 +2819,21 @@ xlog_state_release_iclog(
2819{ 2819{
2820 int sync = 0; /* do we sync? */ 2820 int sync = 0; /* do we sync? */
2821 2821
2822 spin_lock(&log->l_icloglock); 2822 if (iclog->ic_state & XLOG_STATE_IOERROR)
2823 return XFS_ERROR(EIO);
2824
2825 ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
2826 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
2827 return 0;
2828
2823 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2829 if (iclog->ic_state & XLOG_STATE_IOERROR) {
2824 spin_unlock(&log->l_icloglock); 2830 spin_unlock(&log->l_icloglock);
2825 return XFS_ERROR(EIO); 2831 return XFS_ERROR(EIO);
2826 } 2832 }
2827
2828 ASSERT(iclog->ic_refcnt > 0);
2829 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || 2833 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
2830 iclog->ic_state == XLOG_STATE_WANT_SYNC); 2834 iclog->ic_state == XLOG_STATE_WANT_SYNC);
2831 2835
2832 if (--iclog->ic_refcnt == 0 && 2836 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2833 iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2834 /* update tail before writing to iclog */ 2837 /* update tail before writing to iclog */
2835 xlog_assign_tail_lsn(log->l_mp); 2838 xlog_assign_tail_lsn(log->l_mp);
2836 sync++; 2839 sync++;
@@ -2950,7 +2953,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2950 * previous iclog and go to sleep. 2953 * previous iclog and go to sleep.
2951 */ 2954 */
2952 if (iclog->ic_state == XLOG_STATE_DIRTY || 2955 if (iclog->ic_state == XLOG_STATE_DIRTY ||
2953 (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { 2956 (atomic_read(&iclog->ic_refcnt) == 0
2957 && iclog->ic_offset == 0)) {
2954 iclog = iclog->ic_prev; 2958 iclog = iclog->ic_prev;
2955 if (iclog->ic_state == XLOG_STATE_ACTIVE || 2959 if (iclog->ic_state == XLOG_STATE_ACTIVE ||
2956 iclog->ic_state == XLOG_STATE_DIRTY) 2960 iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2958,14 +2962,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2958 else 2962 else
2959 goto maybe_sleep; 2963 goto maybe_sleep;
2960 } else { 2964 } else {
2961 if (iclog->ic_refcnt == 0) { 2965 if (atomic_read(&iclog->ic_refcnt) == 0) {
2962 /* We are the only one with access to this 2966 /* We are the only one with access to this
2963 * iclog. Flush it out now. There should 2967 * iclog. Flush it out now. There should
2964 * be a roundoff of zero to show that someone 2968 * be a roundoff of zero to show that someone
2965 * has already taken care of the roundoff from 2969 * has already taken care of the roundoff from
2966 * the previous sync. 2970 * the previous sync.
2967 */ 2971 */
2968 iclog->ic_refcnt++; 2972 atomic_inc(&iclog->ic_refcnt);
2969 lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2973 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2970 xlog_state_switch_iclogs(log, iclog, 0); 2974 xlog_state_switch_iclogs(log, iclog, 0);
2971 spin_unlock(&log->l_icloglock); 2975 spin_unlock(&log->l_icloglock);
@@ -3097,7 +3101,7 @@ try_again:
3097 already_slept = 1; 3101 already_slept = 1;
3098 goto try_again; 3102 goto try_again;
3099 } else { 3103 } else {
3100 iclog->ic_refcnt++; 3104 atomic_inc(&iclog->ic_refcnt);
3101 xlog_state_switch_iclogs(log, iclog, 0); 3105 xlog_state_switch_iclogs(log, iclog, 0);
3102 spin_unlock(&log->l_icloglock); 3106 spin_unlock(&log->l_icloglock);
3103 if (xlog_state_release_iclog(log, iclog)) 3107 if (xlog_state_release_iclog(log, iclog))
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index c6244cc733c0..01c63db25a1d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -339,7 +339,7 @@ typedef struct xlog_iclog_fields {
339#endif 339#endif
340 int ic_size; 340 int ic_size;
341 int ic_offset; 341 int ic_offset;
342 int ic_refcnt; 342 atomic_t ic_refcnt;
343 int ic_bwritecnt; 343 int ic_bwritecnt;
344 ushort_t ic_state; 344 ushort_t ic_state;
345 char *ic_datap; /* pointer to iclog data */ 345 char *ic_datap; /* pointer to iclog data */