aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-04-09 22:18:39 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-17 21:50:22 -0400
commit114d23aae51233b2bc62d8e2a632bcb55de1953d (patch)
tree39aa3e7e6dd32c39a416e34c4a663cb329315685 /fs
parent2abdb8c88110bab78bfe17e51346e735560daa02 (diff)
[XFS] Per iclog callback chain lock
Rather than use the icloglock for protecting the iclog completion callback chain, use a new per-iclog lock so that walking the callback chain doesn't require holding a global lock. This reduces contention on the icloglock during transaction commit and log I/O completion by reducing the number of times we need to hold the global icloglock during these operations. SGI-PV: 978729 SGI-Modid: xfs-linux-melb:xfs-kern:30770a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log.c35
-rw-r--r--fs/xfs/xfs_log_priv.h33
2 files changed, 45 insertions, 23 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1fa980933895..7a5b12d93537 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -397,12 +397,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
397 void *iclog_hndl, /* iclog to hang callback off */ 397 void *iclog_hndl, /* iclog to hang callback off */
398 xfs_log_callback_t *cb) 398 xfs_log_callback_t *cb)
399{ 399{
400 xlog_t *log = mp->m_log;
401 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 400 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
402 int abortflg; 401 int abortflg;
403 402
404 cb->cb_next = NULL; 403 spin_lock(&iclog->ic_callback_lock);
405 spin_lock(&log->l_icloglock);
406 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 404 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
407 if (!abortflg) { 405 if (!abortflg) {
408 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || 406 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +409,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
411 *(iclog->ic_callback_tail) = cb; 409 *(iclog->ic_callback_tail) = cb;
412 iclog->ic_callback_tail = &(cb->cb_next); 410 iclog->ic_callback_tail = &(cb->cb_next);
413 } 411 }
414 spin_unlock(&log->l_icloglock); 412 spin_unlock(&iclog->ic_callback_lock);
415 return abortflg; 413 return abortflg;
416} /* xfs_log_notify */ 414} /* xfs_log_notify */
417 415
@@ -1257,6 +1255,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1257 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1255 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1258 iclog->ic_state = XLOG_STATE_ACTIVE; 1256 iclog->ic_state = XLOG_STATE_ACTIVE;
1259 iclog->ic_log = log; 1257 iclog->ic_log = log;
1258 atomic_set(&iclog->ic_refcnt, 0);
1259 spin_lock_init(&iclog->ic_callback_lock);
1260 iclog->ic_callback_tail = &(iclog->ic_callback); 1260 iclog->ic_callback_tail = &(iclog->ic_callback);
1261 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1261 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
1262 1262
@@ -1987,7 +1987,7 @@ xlog_state_clean_log(xlog_t *log)
1987 if (iclog->ic_state == XLOG_STATE_DIRTY) { 1987 if (iclog->ic_state == XLOG_STATE_DIRTY) {
1988 iclog->ic_state = XLOG_STATE_ACTIVE; 1988 iclog->ic_state = XLOG_STATE_ACTIVE;
1989 iclog->ic_offset = 0; 1989 iclog->ic_offset = 0;
1990 iclog->ic_callback = NULL; /* don't need to free */ 1990 ASSERT(iclog->ic_callback == NULL);
1991 /* 1991 /*
1992 * If the number of ops in this iclog indicate it just 1992 * If the number of ops in this iclog indicate it just
1993 * contains the dummy transaction, we can 1993 * contains the dummy transaction, we can
@@ -2190,37 +2190,40 @@ xlog_state_do_callback(
2190 be64_to_cpu(iclog->ic_header.h_lsn); 2190 be64_to_cpu(iclog->ic_header.h_lsn);
2191 spin_unlock(&log->l_grant_lock); 2191 spin_unlock(&log->l_grant_lock);
2192 2192
2193 /*
2194 * Keep processing entries in the callback list
2195 * until we come around and it is empty. We
2196 * need to atomically see that the list is
2197 * empty and change the state to DIRTY so that
2198 * we don't miss any more callbacks being added.
2199 */
2200 spin_lock(&log->l_icloglock);
2201 } else { 2193 } else {
2194 spin_unlock(&log->l_icloglock);
2202 ioerrors++; 2195 ioerrors++;
2203 } 2196 }
2204 cb = iclog->ic_callback;
2205 2197
2198 /*
2199 * Keep processing entries in the callback list until
2200 * we come around and it is empty. We need to
2201 * atomically see that the list is empty and change the
2202 * state to DIRTY so that we don't miss any more
2203 * callbacks being added.
2204 */
2205 spin_lock(&iclog->ic_callback_lock);
2206 cb = iclog->ic_callback;
2206 while (cb) { 2207 while (cb) {
2207 iclog->ic_callback_tail = &(iclog->ic_callback); 2208 iclog->ic_callback_tail = &(iclog->ic_callback);
2208 iclog->ic_callback = NULL; 2209 iclog->ic_callback = NULL;
2209 spin_unlock(&log->l_icloglock); 2210 spin_unlock(&iclog->ic_callback_lock);
2210 2211
2211 /* perform callbacks in the order given */ 2212 /* perform callbacks in the order given */
2212 for (; cb; cb = cb_next) { 2213 for (; cb; cb = cb_next) {
2213 cb_next = cb->cb_next; 2214 cb_next = cb->cb_next;
2214 cb->cb_func(cb->cb_arg, aborted); 2215 cb->cb_func(cb->cb_arg, aborted);
2215 } 2216 }
2216 spin_lock(&log->l_icloglock); 2217 spin_lock(&iclog->ic_callback_lock);
2217 cb = iclog->ic_callback; 2218 cb = iclog->ic_callback;
2218 } 2219 }
2219 2220
2220 loopdidcallbacks++; 2221 loopdidcallbacks++;
2221 funcdidcallbacks++; 2222 funcdidcallbacks++;
2222 2223
2224 spin_lock(&log->l_icloglock);
2223 ASSERT(iclog->ic_callback == NULL); 2225 ASSERT(iclog->ic_callback == NULL);
2226 spin_unlock(&iclog->ic_callback_lock);
2224 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2227 if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2225 iclog->ic_state = XLOG_STATE_DIRTY; 2228 iclog->ic_state = XLOG_STATE_DIRTY;
2226 2229
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 01c63db25a1d..104b623aa082 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -324,6 +324,19 @@ typedef struct xlog_rec_ext_header {
324 * - ic_offset is the current number of bytes written to in this iclog. 324 * - ic_offset is the current number of bytes written to in this iclog.
325 * - ic_refcnt is bumped when someone is writing to the log. 325 * - ic_refcnt is bumped when someone is writing to the log.
326 * - ic_state is the state of the iclog. 326 * - ic_state is the state of the iclog.
327 *
328 * Because of cacheline contention on large machines, we need to separate
329 * various resources onto different cachelines. To start with, make the
330 * structure cacheline aligned. The following fields can be contended on
331 * by independent processes:
332 *
333 * - ic_callback_*
334 * - ic_refcnt
335 * - fields protected by the global l_icloglock
336 *
337 * so we need to ensure that these fields are located in separate cachelines.
338 * We'll put all the read-only and l_icloglock fields in the first cacheline,
339 * and move everything else out to subsequent cachelines.
327 */ 340 */
328typedef struct xlog_iclog_fields { 341typedef struct xlog_iclog_fields {
329 sv_t ic_forcesema; 342 sv_t ic_forcesema;
@@ -332,18 +345,23 @@ typedef struct xlog_iclog_fields {
332 struct xlog_in_core *ic_prev; 345 struct xlog_in_core *ic_prev;
333 struct xfs_buf *ic_bp; 346 struct xfs_buf *ic_bp;
334 struct log *ic_log; 347 struct log *ic_log;
335 xfs_log_callback_t *ic_callback;
336 xfs_log_callback_t **ic_callback_tail;
337#ifdef XFS_LOG_TRACE
338 struct ktrace *ic_trace;
339#endif
340 int ic_size; 348 int ic_size;
341 int ic_offset; 349 int ic_offset;
342 atomic_t ic_refcnt;
343 int ic_bwritecnt; 350 int ic_bwritecnt;
344 ushort_t ic_state; 351 ushort_t ic_state;
345 char *ic_datap; /* pointer to iclog data */ 352 char *ic_datap; /* pointer to iclog data */
346} xlog_iclog_fields_t; 353#ifdef XFS_LOG_TRACE
354 struct ktrace *ic_trace;
355#endif
356
357 /* Callback structures need their own cacheline */
358 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
359 xfs_log_callback_t *ic_callback;
360 xfs_log_callback_t **ic_callback_tail;
361
362 /* reference counts need their own cacheline */
363 atomic_t ic_refcnt ____cacheline_aligned_in_smp;
364} xlog_iclog_fields_t ____cacheline_aligned_in_smp;
347 365
348typedef union xlog_in_core2 { 366typedef union xlog_in_core2 {
349 xlog_rec_header_t hic_header; 367 xlog_rec_header_t hic_header;
@@ -366,6 +384,7 @@ typedef struct xlog_in_core {
366#define ic_bp hic_fields.ic_bp 384#define ic_bp hic_fields.ic_bp
367#define ic_log hic_fields.ic_log 385#define ic_log hic_fields.ic_log
368#define ic_callback hic_fields.ic_callback 386#define ic_callback hic_fields.ic_callback
387#define ic_callback_lock hic_fields.ic_callback_lock
369#define ic_callback_tail hic_fields.ic_callback_tail 388#define ic_callback_tail hic_fields.ic_callback_tail
370#define ic_trace hic_fields.ic_trace 389#define ic_trace hic_fields.ic_trace
371#define ic_size hic_fields.ic_size 390#define ic_size hic_fields.ic_size