aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c273
1 files changed, 91 insertions, 182 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index a75edca1860f..afaee301b0ee 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -41,6 +41,7 @@
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43 43
44kmem_zone_t *xfs_log_ticket_zone;
44 45
45#define xlog_write_adv_cnt(ptr, len, off, bytes) \ 46#define xlog_write_adv_cnt(ptr, len, off, bytes) \
46 { (ptr) += (bytes); \ 47 { (ptr) += (bytes); \
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log,
73 xlog_ticket_t *ticket, 74 xlog_ticket_t *ticket,
74 int *continued_write, 75 int *continued_write,
75 int *logoffsetp); 76 int *logoffsetp);
76STATIC void xlog_state_put_ticket(xlog_t *log,
77 xlog_ticket_t *tic);
78STATIC int xlog_state_release_iclog(xlog_t *log, 77STATIC int xlog_state_release_iclog(xlog_t *log,
79 xlog_in_core_t *iclog); 78 xlog_in_core_t *iclog);
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
101 100
102 101
103/* local ticket functions */ 102/* local ticket functions */
104STATIC void xlog_state_ticket_alloc(xlog_t *log);
105STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 103STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log,
106 int unit_bytes, 104 int unit_bytes,
107 int count, 105 int count,
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp,
330 */ 328 */
331 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 329 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
332 xlog_ungrant_log_space(log, ticket); 330 xlog_ungrant_log_space(log, ticket);
333 xlog_state_put_ticket(log, ticket); 331 xlog_ticket_put(log, ticket);
334 } else { 332 } else {
335 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 333 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
336 xlog_regrant_reserve_log_space(log, ticket); 334 xlog_regrant_reserve_log_space(log, ticket);
@@ -384,7 +382,27 @@ _xfs_log_force(
384 return xlog_state_sync_all(log, flags, log_flushed); 382 return xlog_state_sync_all(log, flags, log_flushed);
385 else 383 else
386 return xlog_state_sync(log, lsn, flags, log_flushed); 384 return xlog_state_sync(log, lsn, flags, log_flushed);
387} /* xfs_log_force */ 385} /* _xfs_log_force */
386
387/*
388 * Wrapper for _xfs_log_force(), to be used when caller doesn't care
389 * about errors or whether the log was flushed or not. This is the normal
390 * interface to use when trying to unpin items or move the log forward.
391 */
392void
393xfs_log_force(
394 xfs_mount_t *mp,
395 xfs_lsn_t lsn,
396 uint flags)
397{
398 int error;
399 error = _xfs_log_force(mp, lsn, flags, NULL);
400 if (error) {
401 xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: "
402 "error %d returned.", error);
403 }
404}
405
388 406
389/* 407/*
390 * Attaches a new iclog I/O completion callback routine during 408 * Attaches a new iclog I/O completion callback routine during
@@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
397 void *iclog_hndl, /* iclog to hang callback off */ 415 void *iclog_hndl, /* iclog to hang callback off */
398 xfs_log_callback_t *cb) 416 xfs_log_callback_t *cb)
399{ 417{
400 xlog_t *log = mp->m_log;
401 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; 418 xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
402 int abortflg; 419 int abortflg;
403 420
404 cb->cb_next = NULL; 421 spin_lock(&iclog->ic_callback_lock);
405 spin_lock(&log->l_icloglock);
406 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); 422 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
407 if (!abortflg) { 423 if (!abortflg) {
408 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || 424 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
@@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
411 *(iclog->ic_callback_tail) = cb; 427 *(iclog->ic_callback_tail) = cb;
412 iclog->ic_callback_tail = &(cb->cb_next); 428 iclog->ic_callback_tail = &(cb->cb_next);
413 } 429 }
414 spin_unlock(&log->l_icloglock); 430 spin_unlock(&iclog->ic_callback_lock);
415 return abortflg; 431 return abortflg;
416} /* xfs_log_notify */ 432} /* xfs_log_notify */
417 433
@@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp,
471 /* may sleep if need to allocate more tickets */ 487 /* may sleep if need to allocate more tickets */
472 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 488 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
473 client, flags); 489 client, flags);
490 if (!internal_ticket)
491 return XFS_ERROR(ENOMEM);
474 internal_ticket->t_trans_type = t_type; 492 internal_ticket->t_trans_type = t_type;
475 *ticket = internal_ticket; 493 *ticket = internal_ticket;
476 xlog_trace_loggrant(log, internal_ticket, 494 xlog_trace_loggrant(log, internal_ticket,
@@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
636 if (mp->m_flags & XFS_MOUNT_RDONLY) 654 if (mp->m_flags & XFS_MOUNT_RDONLY)
637 return 0; 655 return 0;
638 656
639 xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); 657 error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL);
658 ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log)));
640 659
641#ifdef DEBUG 660#ifdef DEBUG
642 first_iclog = iclog = log->l_iclog; 661 first_iclog = iclog = log->l_iclog;
@@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
675 694
676 spin_lock(&log->l_icloglock); 695 spin_lock(&log->l_icloglock);
677 iclog = log->l_iclog; 696 iclog = log->l_iclog;
678 iclog->ic_refcnt++; 697 atomic_inc(&iclog->ic_refcnt);
679 spin_unlock(&log->l_icloglock); 698 spin_unlock(&log->l_icloglock);
680 xlog_state_want_sync(log, iclog); 699 xlog_state_want_sync(log, iclog);
681 (void) xlog_state_release_iclog(log, iclog); 700 error = xlog_state_release_iclog(log, iclog);
682 701
683 spin_lock(&log->l_icloglock); 702 spin_lock(&log->l_icloglock);
684 if (!(iclog->ic_state == XLOG_STATE_ACTIVE || 703 if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
@@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
695 if (tic) { 714 if (tic) {
696 xlog_trace_loggrant(log, tic, "unmount rec"); 715 xlog_trace_loggrant(log, tic, "unmount rec");
697 xlog_ungrant_log_space(log, tic); 716 xlog_ungrant_log_space(log, tic);
698 xlog_state_put_ticket(log, tic); 717 xlog_ticket_put(log, tic);
699 } 718 }
700 } else { 719 } else {
701 /* 720 /*
@@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp)
713 */ 732 */
714 spin_lock(&log->l_icloglock); 733 spin_lock(&log->l_icloglock);
715 iclog = log->l_iclog; 734 iclog = log->l_iclog;
716 iclog->ic_refcnt++; 735 atomic_inc(&iclog->ic_refcnt);
717 spin_unlock(&log->l_icloglock); 736 spin_unlock(&log->l_icloglock);
718 737
719 xlog_state_want_sync(log, iclog); 738 xlog_state_want_sync(log, iclog);
720 (void) xlog_state_release_iclog(log, iclog); 739 error = xlog_state_release_iclog(log, iclog);
721 740
722 spin_lock(&log->l_icloglock); 741 spin_lock(&log->l_icloglock);
723 742
@@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
732 } 751 }
733 } 752 }
734 753
735 return 0; 754 return error;
736} /* xfs_log_unmount_write */ 755} /* xfs_log_unmount_write */
737 756
738/* 757/*
@@ -1090,7 +1109,7 @@ xlog_get_iclog_buffer_size(xfs_mount_t *mp,
1090 size >>= 1; 1109 size >>= 1;
1091 } 1110 }
1092 1111
1093 if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { 1112 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1094 /* # headers = size / 32K 1113 /* # headers = size / 32K
1095 * one header holds cycles from 32K of data 1114 * one header holds cycles from 32K of data
1096 */ 1115 */
@@ -1186,13 +1205,13 @@ xlog_alloc_log(xfs_mount_t *mp,
1186 log->l_grant_reserve_cycle = 1; 1205 log->l_grant_reserve_cycle = 1;
1187 log->l_grant_write_cycle = 1; 1206 log->l_grant_write_cycle = 1;
1188 1207
1189 if (XFS_SB_VERSION_HASSECTOR(&mp->m_sb)) { 1208 if (xfs_sb_version_hassector(&mp->m_sb)) {
1190 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT; 1209 log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
1191 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log); 1210 ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
1192 /* for larger sector sizes, must have v2 or external log */ 1211 /* for larger sector sizes, must have v2 or external log */
1193 ASSERT(log->l_sectbb_log == 0 || 1212 ASSERT(log->l_sectbb_log == 0 ||
1194 log->l_logBBstart == 0 || 1213 log->l_logBBstart == 0 ||
1195 XFS_SB_VERSION_HASLOGV2(&mp->m_sb)); 1214 xfs_sb_version_haslogv2(&mp->m_sb));
1196 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT); 1215 ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
1197 } 1216 }
1198 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1; 1217 log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
@@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1210 spin_lock_init(&log->l_icloglock); 1229 spin_lock_init(&log->l_icloglock);
1211 spin_lock_init(&log->l_grant_lock); 1230 spin_lock_init(&log->l_grant_lock);
1212 initnsema(&log->l_flushsema, 0, "ic-flush"); 1231 initnsema(&log->l_flushsema, 0, "ic-flush");
1213 xlog_state_ticket_alloc(log); /* wait until after icloglock inited */
1214 1232
1215 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1233 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1216 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1234 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1240,23 +1258,24 @@ xlog_alloc_log(xfs_mount_t *mp,
1240 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); 1258 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1241 iclog->ic_bp = bp; 1259 iclog->ic_bp = bp;
1242 iclog->hic_data = bp->b_addr; 1260 iclog->hic_data = bp->b_addr;
1243 1261#ifdef DEBUG
1244 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); 1262 log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header);
1245 1263#endif
1246 head = &iclog->ic_header; 1264 head = &iclog->ic_header;
1247 memset(head, 0, sizeof(xlog_rec_header_t)); 1265 memset(head, 0, sizeof(xlog_rec_header_t));
1248 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 1266 head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM);
1249 head->h_version = cpu_to_be32( 1267 head->h_version = cpu_to_be32(
1250 XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) ? 2 : 1); 1268 xfs_sb_version_haslogv2(&log->l_mp->m_sb) ? 2 : 1);
1251 head->h_size = cpu_to_be32(log->l_iclog_size); 1269 head->h_size = cpu_to_be32(log->l_iclog_size);
1252 /* new fields */ 1270 /* new fields */
1253 head->h_fmt = cpu_to_be32(XLOG_FMT); 1271 head->h_fmt = cpu_to_be32(XLOG_FMT);
1254 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1272 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1255 1273
1256
1257 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; 1274 iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
1258 iclog->ic_state = XLOG_STATE_ACTIVE; 1275 iclog->ic_state = XLOG_STATE_ACTIVE;
1259 iclog->ic_log = log; 1276 iclog->ic_log = log;
1277 atomic_set(&iclog->ic_refcnt, 0);
1278 spin_lock_init(&iclog->ic_callback_lock);
1260 iclog->ic_callback_tail = &(iclog->ic_callback); 1279 iclog->ic_callback_tail = &(iclog->ic_callback);
1261 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; 1280 iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize;
1262 1281
@@ -1402,10 +1421,10 @@ xlog_sync(xlog_t *log,
1402 int roundoff; /* roundoff to BB or stripe */ 1421 int roundoff; /* roundoff to BB or stripe */
1403 int split = 0; /* split write into two regions */ 1422 int split = 0; /* split write into two regions */
1404 int error; 1423 int error;
1405 int v2 = XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb); 1424 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1406 1425
1407 XFS_STATS_INC(xs_log_writes); 1426 XFS_STATS_INC(xs_log_writes);
1408 ASSERT(iclog->ic_refcnt == 0); 1427 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
1409 1428
1410 /* Add for LR header */ 1429 /* Add for LR header */
1411 count_init = log->l_iclog_hsize + iclog->ic_offset; 1430 count_init = log->l_iclog_hsize + iclog->ic_offset;
@@ -1538,7 +1557,6 @@ STATIC void
1538xlog_dealloc_log(xlog_t *log) 1557xlog_dealloc_log(xlog_t *log)
1539{ 1558{
1540 xlog_in_core_t *iclog, *next_iclog; 1559 xlog_in_core_t *iclog, *next_iclog;
1541 xlog_ticket_t *tic, *next_tic;
1542 int i; 1560 int i;
1543 1561
1544 iclog = log->l_iclog; 1562 iclog = log->l_iclog;
@@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log)
1559 spinlock_destroy(&log->l_icloglock); 1577 spinlock_destroy(&log->l_icloglock);
1560 spinlock_destroy(&log->l_grant_lock); 1578 spinlock_destroy(&log->l_grant_lock);
1561 1579
1562 /* XXXsup take a look at this again. */
1563 if ((log->l_ticket_cnt != log->l_ticket_tcnt) &&
1564 !XLOG_FORCED_SHUTDOWN(log)) {
1565 xfs_fs_cmn_err(CE_WARN, log->l_mp,
1566 "xlog_dealloc_log: (cnt: %d, total: %d)",
1567 log->l_ticket_cnt, log->l_ticket_tcnt);
1568 /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
1569
1570 } else {
1571 tic = log->l_unmount_free;
1572 while (tic) {
1573 next_tic = tic->t_next;
1574 kmem_free(tic, PAGE_SIZE);
1575 tic = next_tic;
1576 }
1577 }
1578 xfs_buf_free(log->l_xbuf); 1580 xfs_buf_free(log->l_xbuf);
1579#ifdef XFS_LOG_TRACE 1581#ifdef XFS_LOG_TRACE
1580 if (log->l_trace != NULL) { 1582 if (log->l_trace != NULL) {
@@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log)
1987 if (iclog->ic_state == XLOG_STATE_DIRTY) { 1989 if (iclog->ic_state == XLOG_STATE_DIRTY) {
1988 iclog->ic_state = XLOG_STATE_ACTIVE; 1990 iclog->ic_state = XLOG_STATE_ACTIVE;
1989 iclog->ic_offset = 0; 1991 iclog->ic_offset = 0;
1990 iclog->ic_callback = NULL; /* don't need to free */ 1992 ASSERT(iclog->ic_callback == NULL);
1991 /* 1993 /*
1992 * If the number of ops in this iclog indicate it just 1994 * If the number of ops in this iclog indicate it just
1993 * contains the dummy transaction, we can 1995 * contains the dummy transaction, we can
@@ -2190,37 +2192,40 @@ xlog_state_do_callback(
2190 be64_to_cpu(iclog->ic_header.h_lsn); 2192 be64_to_cpu(iclog->ic_header.h_lsn);
2191 spin_unlock(&log->l_grant_lock); 2193 spin_unlock(&log->l_grant_lock);
2192 2194
2193 /*
2194 * Keep processing entries in the callback list
2195 * until we come around and it is empty. We
2196 * need to atomically see that the list is
2197 * empty and change the state to DIRTY so that
2198 * we don't miss any more callbacks being added.
2199 */
2200 spin_lock(&log->l_icloglock);
2201 } else { 2195 } else {
2196 spin_unlock(&log->l_icloglock);
2202 ioerrors++; 2197 ioerrors++;
2203 } 2198 }
2204 cb = iclog->ic_callback;
2205 2199
2200 /*
2201 * Keep processing entries in the callback list until
2202 * we come around and it is empty. We need to
2203 * atomically see that the list is empty and change the
2204 * state to DIRTY so that we don't miss any more
2205 * callbacks being added.
2206 */
2207 spin_lock(&iclog->ic_callback_lock);
2208 cb = iclog->ic_callback;
2206 while (cb) { 2209 while (cb) {
2207 iclog->ic_callback_tail = &(iclog->ic_callback); 2210 iclog->ic_callback_tail = &(iclog->ic_callback);
2208 iclog->ic_callback = NULL; 2211 iclog->ic_callback = NULL;
2209 spin_unlock(&log->l_icloglock); 2212 spin_unlock(&iclog->ic_callback_lock);
2210 2213
2211 /* perform callbacks in the order given */ 2214 /* perform callbacks in the order given */
2212 for (; cb; cb = cb_next) { 2215 for (; cb; cb = cb_next) {
2213 cb_next = cb->cb_next; 2216 cb_next = cb->cb_next;
2214 cb->cb_func(cb->cb_arg, aborted); 2217 cb->cb_func(cb->cb_arg, aborted);
2215 } 2218 }
2216 spin_lock(&log->l_icloglock); 2219 spin_lock(&iclog->ic_callback_lock);
2217 cb = iclog->ic_callback; 2220 cb = iclog->ic_callback;
2218 } 2221 }
2219 2222
2220 loopdidcallbacks++; 2223 loopdidcallbacks++;
2221 funcdidcallbacks++; 2224 funcdidcallbacks++;
2222 2225
2226 spin_lock(&log->l_icloglock);
2223 ASSERT(iclog->ic_callback == NULL); 2227 ASSERT(iclog->ic_callback == NULL);
2228 spin_unlock(&iclog->ic_callback_lock);
2224 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2229 if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2225 iclog->ic_state = XLOG_STATE_DIRTY; 2230 iclog->ic_state = XLOG_STATE_DIRTY;
2226 2231
@@ -2241,7 +2246,7 @@ xlog_state_do_callback(
2241 repeats = 0; 2246 repeats = 0;
2242 xfs_fs_cmn_err(CE_WARN, log->l_mp, 2247 xfs_fs_cmn_err(CE_WARN, log->l_mp,
2243 "%s: possible infinite loop (%d iterations)", 2248 "%s: possible infinite loop (%d iterations)",
2244 __FUNCTION__, flushcnt); 2249 __func__, flushcnt);
2245 } 2250 }
2246 } while (!ioerrors && loopdidcallbacks); 2251 } while (!ioerrors && loopdidcallbacks);
2247 2252
@@ -2309,7 +2314,7 @@ xlog_state_done_syncing(
2309 2314
2310 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2315 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2311 iclog->ic_state == XLOG_STATE_IOERROR); 2316 iclog->ic_state == XLOG_STATE_IOERROR);
2312 ASSERT(iclog->ic_refcnt == 0); 2317 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
2313 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); 2318 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
2314 2319
2315 2320
@@ -2391,7 +2396,7 @@ restart:
2391 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2396 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
2392 head = &iclog->ic_header; 2397 head = &iclog->ic_header;
2393 2398
2394 iclog->ic_refcnt++; /* prevents sync */ 2399 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
2395 log_offset = iclog->ic_offset; 2400 log_offset = iclog->ic_offset;
2396 2401
2397 /* On the 1st write to an iclog, figure out lsn. This works 2402 /* On the 1st write to an iclog, figure out lsn. This works
@@ -2423,12 +2428,12 @@ restart:
2423 xlog_state_switch_iclogs(log, iclog, iclog->ic_size); 2428 xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
2424 2429
2425 /* If I'm the only one writing to this iclog, sync it to disk */ 2430 /* If I'm the only one writing to this iclog, sync it to disk */
2426 if (iclog->ic_refcnt == 1) { 2431 if (atomic_read(&iclog->ic_refcnt) == 1) {
2427 spin_unlock(&log->l_icloglock); 2432 spin_unlock(&log->l_icloglock);
2428 if ((error = xlog_state_release_iclog(log, iclog))) 2433 if ((error = xlog_state_release_iclog(log, iclog)))
2429 return error; 2434 return error;
2430 } else { 2435 } else {
2431 iclog->ic_refcnt--; 2436 atomic_dec(&iclog->ic_refcnt);
2432 spin_unlock(&log->l_icloglock); 2437 spin_unlock(&log->l_icloglock);
2433 } 2438 }
2434 goto restart; 2439 goto restart;
@@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t *log,
2792 2797
2793 2798
2794/* 2799/*
2795 * Atomically put back used ticket.
2796 */
2797STATIC void
2798xlog_state_put_ticket(xlog_t *log,
2799 xlog_ticket_t *tic)
2800{
2801 spin_lock(&log->l_icloglock);
2802 xlog_ticket_put(log, tic);
2803 spin_unlock(&log->l_icloglock);
2804} /* xlog_state_put_ticket */
2805
2806/*
2807 * Flush iclog to disk if this is the last reference to the given iclog and 2800 * Flush iclog to disk if this is the last reference to the given iclog and
2808 * the WANT_SYNC bit is set. 2801 * the WANT_SYNC bit is set.
2809 * 2802 *
@@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t *log,
2813 * 2806 *
2814 */ 2807 */
2815STATIC int 2808STATIC int
2816xlog_state_release_iclog(xlog_t *log, 2809xlog_state_release_iclog(
2817 xlog_in_core_t *iclog) 2810 xlog_t *log,
2811 xlog_in_core_t *iclog)
2818{ 2812{
2819 int sync = 0; /* do we sync? */ 2813 int sync = 0; /* do we sync? */
2820 2814
2821 xlog_assign_tail_lsn(log->l_mp); 2815 if (iclog->ic_state & XLOG_STATE_IOERROR)
2816 return XFS_ERROR(EIO);
2822 2817
2823 spin_lock(&log->l_icloglock); 2818 ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
2819 if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
2820 return 0;
2824 2821
2825 if (iclog->ic_state & XLOG_STATE_IOERROR) { 2822 if (iclog->ic_state & XLOG_STATE_IOERROR) {
2826 spin_unlock(&log->l_icloglock); 2823 spin_unlock(&log->l_icloglock);
2827 return XFS_ERROR(EIO); 2824 return XFS_ERROR(EIO);
2828 } 2825 }
2829
2830 ASSERT(iclog->ic_refcnt > 0);
2831 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || 2826 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
2832 iclog->ic_state == XLOG_STATE_WANT_SYNC); 2827 iclog->ic_state == XLOG_STATE_WANT_SYNC);
2833 2828
2834 if (--iclog->ic_refcnt == 0 && 2829 if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
2835 iclog->ic_state == XLOG_STATE_WANT_SYNC) { 2830 /* update tail before writing to iclog */
2831 xlog_assign_tail_lsn(log->l_mp);
2836 sync++; 2832 sync++;
2837 iclog->ic_state = XLOG_STATE_SYNCING; 2833 iclog->ic_state = XLOG_STATE_SYNCING;
2838 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); 2834 iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn);
2839 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); 2835 xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn);
2840 /* cycle incremented when incrementing curr_block */ 2836 /* cycle incremented when incrementing curr_block */
2841 } 2837 }
2842
2843 spin_unlock(&log->l_icloglock); 2838 spin_unlock(&log->l_icloglock);
2844 2839
2845 /* 2840 /*
@@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log,
2849 * this iclog has consistent data, so we ignore IOERROR 2844 * this iclog has consistent data, so we ignore IOERROR
2850 * flags after this point. 2845 * flags after this point.
2851 */ 2846 */
2852 if (sync) { 2847 if (sync)
2853 return xlog_sync(log, iclog); 2848 return xlog_sync(log, iclog);
2854 }
2855 return 0; 2849 return 0;
2856
2857} /* xlog_state_release_iclog */ 2850} /* xlog_state_release_iclog */
2858 2851
2859 2852
@@ -2881,7 +2874,7 @@ xlog_state_switch_iclogs(xlog_t *log,
2881 log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize); 2874 log->l_curr_block += BTOBB(eventual_size)+BTOBB(log->l_iclog_hsize);
2882 2875
2883 /* Round up to next log-sunit */ 2876 /* Round up to next log-sunit */
2884 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 2877 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
2885 log->l_mp->m_sb.sb_logsunit > 1) { 2878 log->l_mp->m_sb.sb_logsunit > 1) {
2886 __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit); 2879 __uint32_t sunit_bb = BTOBB(log->l_mp->m_sb.sb_logsunit);
2887 log->l_curr_block = roundup(log->l_curr_block, sunit_bb); 2880 log->l_curr_block = roundup(log->l_curr_block, sunit_bb);
@@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2953 * previous iclog and go to sleep. 2946 * previous iclog and go to sleep.
2954 */ 2947 */
2955 if (iclog->ic_state == XLOG_STATE_DIRTY || 2948 if (iclog->ic_state == XLOG_STATE_DIRTY ||
2956 (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { 2949 (atomic_read(&iclog->ic_refcnt) == 0
2950 && iclog->ic_offset == 0)) {
2957 iclog = iclog->ic_prev; 2951 iclog = iclog->ic_prev;
2958 if (iclog->ic_state == XLOG_STATE_ACTIVE || 2952 if (iclog->ic_state == XLOG_STATE_ACTIVE ||
2959 iclog->ic_state == XLOG_STATE_DIRTY) 2953 iclog->ic_state == XLOG_STATE_DIRTY)
@@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
2961 else 2955 else
2962 goto maybe_sleep; 2956 goto maybe_sleep;
2963 } else { 2957 } else {
2964 if (iclog->ic_refcnt == 0) { 2958 if (atomic_read(&iclog->ic_refcnt) == 0) {
2965 /* We are the only one with access to this 2959 /* We are the only one with access to this
2966 * iclog. Flush it out now. There should 2960 * iclog. Flush it out now. There should
2967 * be a roundoff of zero to show that someone 2961 * be a roundoff of zero to show that someone
2968 * has already taken care of the roundoff from 2962 * has already taken care of the roundoff from
2969 * the previous sync. 2963 * the previous sync.
2970 */ 2964 */
2971 iclog->ic_refcnt++; 2965 atomic_inc(&iclog->ic_refcnt);
2972 lsn = be64_to_cpu(iclog->ic_header.h_lsn); 2966 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2973 xlog_state_switch_iclogs(log, iclog, 0); 2967 xlog_state_switch_iclogs(log, iclog, 0);
2974 spin_unlock(&log->l_icloglock); 2968 spin_unlock(&log->l_icloglock);
@@ -3100,7 +3094,7 @@ try_again:
3100 already_slept = 1; 3094 already_slept = 1;
3101 goto try_again; 3095 goto try_again;
3102 } else { 3096 } else {
3103 iclog->ic_refcnt++; 3097 atomic_inc(&iclog->ic_refcnt);
3104 xlog_state_switch_iclogs(log, iclog, 0); 3098 xlog_state_switch_iclogs(log, iclog, 0);
3105 spin_unlock(&log->l_icloglock); 3099 spin_unlock(&log->l_icloglock);
3106 if (xlog_state_release_iclog(log, iclog)) 3100 if (xlog_state_release_iclog(log, iclog))
@@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3172 */ 3166 */
3173 3167
3174/* 3168/*
3175 * Algorithm doesn't take into account page size. ;-( 3169 * Free a used ticket.
3176 */
3177STATIC void
3178xlog_state_ticket_alloc(xlog_t *log)
3179{
3180 xlog_ticket_t *t_list;
3181 xlog_ticket_t *next;
3182 xfs_caddr_t buf;
3183 uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2;
3184
3185 /*
3186 * The kmem_zalloc may sleep, so we shouldn't be holding the
3187 * global lock. XXXmiken: may want to use zone allocator.
3188 */
3189 buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP);
3190
3191 spin_lock(&log->l_icloglock);
3192
3193 /* Attach 1st ticket to Q, so we can keep track of allocated memory */
3194 t_list = (xlog_ticket_t *)buf;
3195 t_list->t_next = log->l_unmount_free;
3196 log->l_unmount_free = t_list++;
3197 log->l_ticket_cnt++;
3198 log->l_ticket_tcnt++;
3199
3200 /* Next ticket becomes first ticket attached to ticket free list */
3201 if (log->l_freelist != NULL) {
3202 ASSERT(log->l_tail != NULL);
3203 log->l_tail->t_next = t_list;
3204 } else {
3205 log->l_freelist = t_list;
3206 }
3207 log->l_ticket_cnt++;
3208 log->l_ticket_tcnt++;
3209
3210 /* Cycle through rest of alloc'ed memory, building up free Q */
3211 for ( ; i > 0; i--) {
3212 next = t_list + 1;
3213 t_list->t_next = next;
3214 t_list = next;
3215 log->l_ticket_cnt++;
3216 log->l_ticket_tcnt++;
3217 }
3218 t_list->t_next = NULL;
3219 log->l_tail = t_list;
3220 spin_unlock(&log->l_icloglock);
3221} /* xlog_state_ticket_alloc */
3222
3223
3224/*
3225 * Put ticket into free list
3226 *
3227 * Assumption: log lock is held around this call.
3228 */ 3170 */
3229STATIC void 3171STATIC void
3230xlog_ticket_put(xlog_t *log, 3172xlog_ticket_put(xlog_t *log,
3231 xlog_ticket_t *ticket) 3173 xlog_ticket_t *ticket)
3232{ 3174{
3233 sv_destroy(&ticket->t_sema); 3175 sv_destroy(&ticket->t_sema);
3234 3176 kmem_zone_free(xfs_log_ticket_zone, ticket);
3235 /*
3236 * Don't think caching will make that much difference. It's
3237 * more important to make debug easier.
3238 */
3239#if 0
3240 /* real code will want to use LIFO for caching */
3241 ticket->t_next = log->l_freelist;
3242 log->l_freelist = ticket;
3243 /* no need to clear fields */
3244#else
3245 /* When we debug, it is easier if tickets are cycled */
3246 ticket->t_next = NULL;
3247 if (log->l_tail) {
3248 log->l_tail->t_next = ticket;
3249 } else {
3250 ASSERT(log->l_freelist == NULL);
3251 log->l_freelist = ticket;
3252 }
3253 log->l_tail = ticket;
3254#endif /* DEBUG */
3255 log->l_ticket_cnt++;
3256} /* xlog_ticket_put */ 3177} /* xlog_ticket_put */
3257 3178
3258 3179
3259/* 3180/*
3260 * Grab ticket off freelist or allocation some more 3181 * Allocate and initialise a new log ticket.
3261 */ 3182 */
3262STATIC xlog_ticket_t * 3183STATIC xlog_ticket_t *
3263xlog_ticket_get(xlog_t *log, 3184xlog_ticket_get(xlog_t *log,
@@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t *log,
3269 xlog_ticket_t *tic; 3190 xlog_ticket_t *tic;
3270 uint num_headers; 3191 uint num_headers;
3271 3192
3272 alloc: 3193 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
3273 if (log->l_freelist == NULL) 3194 if (!tic)
3274 xlog_state_ticket_alloc(log); /* potentially sleep */ 3195 return NULL;
3275
3276 spin_lock(&log->l_icloglock);
3277 if (log->l_freelist == NULL) {
3278 spin_unlock(&log->l_icloglock);
3279 goto alloc;
3280 }
3281 tic = log->l_freelist;
3282 log->l_freelist = tic->t_next;
3283 if (log->l_freelist == NULL)
3284 log->l_tail = NULL;
3285 log->l_ticket_cnt--;
3286 spin_unlock(&log->l_icloglock);
3287 3196
3288 /* 3197 /*
3289 * Permanent reservations have up to 'cnt'-1 active log operations 3198 * Permanent reservations have up to 'cnt'-1 active log operations
@@ -3334,7 +3243,7 @@ xlog_ticket_get(xlog_t *log,
3334 unit_bytes += sizeof(xlog_op_header_t) * num_headers; 3243 unit_bytes += sizeof(xlog_op_header_t) * num_headers;
3335 3244
3336 /* for roundoff padding for transaction data and one for commit record */ 3245 /* for roundoff padding for transaction data and one for commit record */
3337 if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) && 3246 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
3338 log->l_mp->m_sb.sb_logsunit > 1) { 3247 log->l_mp->m_sb.sb_logsunit > 1) {
3339 /* log su roundoff */ 3248 /* log su roundoff */
3340 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; 3249 unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
@@ -3611,8 +3520,8 @@ xfs_log_force_umount(
3611 * before we mark the filesystem SHUTDOWN and wake 3520 * before we mark the filesystem SHUTDOWN and wake
3612 * everybody up to tell the bad news. 3521 * everybody up to tell the bad news.
3613 */ 3522 */
3614 spin_lock(&log->l_grant_lock);
3615 spin_lock(&log->l_icloglock); 3523 spin_lock(&log->l_icloglock);
3524 spin_lock(&log->l_grant_lock);
3616 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; 3525 mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
3617 XFS_BUF_DONE(mp->m_sb_bp); 3526 XFS_BUF_DONE(mp->m_sb_bp);
3618 /* 3527 /*