diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r-- | fs/xfs/xfs_log.c | 259 |
1 files changed, 84 insertions, 175 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 31f2b04f2c97..afaee301b0ee 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "xfs_inode.h" | 41 | #include "xfs_inode.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | 43 | ||
44 | kmem_zone_t *xfs_log_ticket_zone; | ||
44 | 45 | ||
45 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ | 46 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ |
46 | { (ptr) += (bytes); \ | 47 | { (ptr) += (bytes); \ |
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log, | |||
73 | xlog_ticket_t *ticket, | 74 | xlog_ticket_t *ticket, |
74 | int *continued_write, | 75 | int *continued_write, |
75 | int *logoffsetp); | 76 | int *logoffsetp); |
76 | STATIC void xlog_state_put_ticket(xlog_t *log, | ||
77 | xlog_ticket_t *tic); | ||
78 | STATIC int xlog_state_release_iclog(xlog_t *log, | 77 | STATIC int xlog_state_release_iclog(xlog_t *log, |
79 | xlog_in_core_t *iclog); | 78 | xlog_in_core_t *iclog); |
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
101 | 100 | ||
102 | 101 | ||
103 | /* local ticket functions */ | 102 | /* local ticket functions */ |
104 | STATIC void xlog_state_ticket_alloc(xlog_t *log); | ||
105 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, | 103 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, |
106 | int unit_bytes, | 104 | int unit_bytes, |
107 | int count, | 105 | int count, |
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
330 | */ | 328 | */ |
331 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); | 329 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); |
332 | xlog_ungrant_log_space(log, ticket); | 330 | xlog_ungrant_log_space(log, ticket); |
333 | xlog_state_put_ticket(log, ticket); | 331 | xlog_ticket_put(log, ticket); |
334 | } else { | 332 | } else { |
335 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | 333 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); |
336 | xlog_regrant_reserve_log_space(log, ticket); | 334 | xlog_regrant_reserve_log_space(log, ticket); |
@@ -384,7 +382,27 @@ _xfs_log_force( | |||
384 | return xlog_state_sync_all(log, flags, log_flushed); | 382 | return xlog_state_sync_all(log, flags, log_flushed); |
385 | else | 383 | else |
386 | return xlog_state_sync(log, lsn, flags, log_flushed); | 384 | return xlog_state_sync(log, lsn, flags, log_flushed); |
387 | } /* xfs_log_force */ | 385 | } /* _xfs_log_force */ |
386 | |||
387 | /* | ||
388 | * Wrapper for _xfs_log_force(), to be used when caller doesn't care | ||
389 | * about errors or whether the log was flushed or not. This is the normal | ||
390 | * interface to use when trying to unpin items or move the log forward. | ||
391 | */ | ||
392 | void | ||
393 | xfs_log_force( | ||
394 | xfs_mount_t *mp, | ||
395 | xfs_lsn_t lsn, | ||
396 | uint flags) | ||
397 | { | ||
398 | int error; | ||
399 | error = _xfs_log_force(mp, lsn, flags, NULL); | ||
400 | if (error) { | ||
401 | xfs_fs_cmn_err(CE_WARN, mp, "xfs_log_force: " | ||
402 | "error %d returned.", error); | ||
403 | } | ||
404 | } | ||
405 | |||
388 | 406 | ||
389 | /* | 407 | /* |
390 | * Attaches a new iclog I/O completion callback routine during | 408 | * Attaches a new iclog I/O completion callback routine during |
@@ -397,12 +415,10 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
397 | void *iclog_hndl, /* iclog to hang callback off */ | 415 | void *iclog_hndl, /* iclog to hang callback off */ |
398 | xfs_log_callback_t *cb) | 416 | xfs_log_callback_t *cb) |
399 | { | 417 | { |
400 | xlog_t *log = mp->m_log; | ||
401 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; | 418 | xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl; |
402 | int abortflg; | 419 | int abortflg; |
403 | 420 | ||
404 | cb->cb_next = NULL; | 421 | spin_lock(&iclog->ic_callback_lock); |
405 | spin_lock(&log->l_icloglock); | ||
406 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); | 422 | abortflg = (iclog->ic_state & XLOG_STATE_IOERROR); |
407 | if (!abortflg) { | 423 | if (!abortflg) { |
408 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || | 424 | ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) || |
@@ -411,7 +427,7 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */ | |||
411 | *(iclog->ic_callback_tail) = cb; | 427 | *(iclog->ic_callback_tail) = cb; |
412 | iclog->ic_callback_tail = &(cb->cb_next); | 428 | iclog->ic_callback_tail = &(cb->cb_next); |
413 | } | 429 | } |
414 | spin_unlock(&log->l_icloglock); | 430 | spin_unlock(&iclog->ic_callback_lock); |
415 | return abortflg; | 431 | return abortflg; |
416 | } /* xfs_log_notify */ | 432 | } /* xfs_log_notify */ |
417 | 433 | ||
@@ -471,6 +487,8 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
471 | /* may sleep if need to allocate more tickets */ | 487 | /* may sleep if need to allocate more tickets */ |
472 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, | 488 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, |
473 | client, flags); | 489 | client, flags); |
490 | if (!internal_ticket) | ||
491 | return XFS_ERROR(ENOMEM); | ||
474 | internal_ticket->t_trans_type = t_type; | 492 | internal_ticket->t_trans_type = t_type; |
475 | *ticket = internal_ticket; | 493 | *ticket = internal_ticket; |
476 | xlog_trace_loggrant(log, internal_ticket, | 494 | xlog_trace_loggrant(log, internal_ticket, |
@@ -636,7 +654,8 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
636 | if (mp->m_flags & XFS_MOUNT_RDONLY) | 654 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
637 | return 0; | 655 | return 0; |
638 | 656 | ||
639 | xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC); | 657 | error = _xfs_log_force(mp, 0, XFS_LOG_FORCE|XFS_LOG_SYNC, NULL); |
658 | ASSERT(error || !(XLOG_FORCED_SHUTDOWN(log))); | ||
640 | 659 | ||
641 | #ifdef DEBUG | 660 | #ifdef DEBUG |
642 | first_iclog = iclog = log->l_iclog; | 661 | first_iclog = iclog = log->l_iclog; |
@@ -675,10 +694,10 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
675 | 694 | ||
676 | spin_lock(&log->l_icloglock); | 695 | spin_lock(&log->l_icloglock); |
677 | iclog = log->l_iclog; | 696 | iclog = log->l_iclog; |
678 | iclog->ic_refcnt++; | 697 | atomic_inc(&iclog->ic_refcnt); |
679 | spin_unlock(&log->l_icloglock); | 698 | spin_unlock(&log->l_icloglock); |
680 | xlog_state_want_sync(log, iclog); | 699 | xlog_state_want_sync(log, iclog); |
681 | (void) xlog_state_release_iclog(log, iclog); | 700 | error = xlog_state_release_iclog(log, iclog); |
682 | 701 | ||
683 | spin_lock(&log->l_icloglock); | 702 | spin_lock(&log->l_icloglock); |
684 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || | 703 | if (!(iclog->ic_state == XLOG_STATE_ACTIVE || |
@@ -695,7 +714,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
695 | if (tic) { | 714 | if (tic) { |
696 | xlog_trace_loggrant(log, tic, "unmount rec"); | 715 | xlog_trace_loggrant(log, tic, "unmount rec"); |
697 | xlog_ungrant_log_space(log, tic); | 716 | xlog_ungrant_log_space(log, tic); |
698 | xlog_state_put_ticket(log, tic); | 717 | xlog_ticket_put(log, tic); |
699 | } | 718 | } |
700 | } else { | 719 | } else { |
701 | /* | 720 | /* |
@@ -713,11 +732,11 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
713 | */ | 732 | */ |
714 | spin_lock(&log->l_icloglock); | 733 | spin_lock(&log->l_icloglock); |
715 | iclog = log->l_iclog; | 734 | iclog = log->l_iclog; |
716 | iclog->ic_refcnt++; | 735 | atomic_inc(&iclog->ic_refcnt); |
717 | spin_unlock(&log->l_icloglock); | 736 | spin_unlock(&log->l_icloglock); |
718 | 737 | ||
719 | xlog_state_want_sync(log, iclog); | 738 | xlog_state_want_sync(log, iclog); |
720 | (void) xlog_state_release_iclog(log, iclog); | 739 | error = xlog_state_release_iclog(log, iclog); |
721 | 740 | ||
722 | spin_lock(&log->l_icloglock); | 741 | spin_lock(&log->l_icloglock); |
723 | 742 | ||
@@ -732,7 +751,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
732 | } | 751 | } |
733 | } | 752 | } |
734 | 753 | ||
735 | return 0; | 754 | return error; |
736 | } /* xfs_log_unmount_write */ | 755 | } /* xfs_log_unmount_write */ |
737 | 756 | ||
738 | /* | 757 | /* |
@@ -1210,7 +1229,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1210 | spin_lock_init(&log->l_icloglock); | 1229 | spin_lock_init(&log->l_icloglock); |
1211 | spin_lock_init(&log->l_grant_lock); | 1230 | spin_lock_init(&log->l_grant_lock); |
1212 | initnsema(&log->l_flushsema, 0, "ic-flush"); | 1231 | initnsema(&log->l_flushsema, 0, "ic-flush"); |
1213 | xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ | ||
1214 | 1232 | ||
1215 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1233 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1216 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1234 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1240,9 +1258,9 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1240 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); | 1258 | XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1); |
1241 | iclog->ic_bp = bp; | 1259 | iclog->ic_bp = bp; |
1242 | iclog->hic_data = bp->b_addr; | 1260 | iclog->hic_data = bp->b_addr; |
1243 | 1261 | #ifdef DEBUG | |
1244 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); | 1262 | log->l_iclog_bak[i] = (xfs_caddr_t)&(iclog->ic_header); |
1245 | 1263 | #endif | |
1246 | head = &iclog->ic_header; | 1264 | head = &iclog->ic_header; |
1247 | memset(head, 0, sizeof(xlog_rec_header_t)); | 1265 | memset(head, 0, sizeof(xlog_rec_header_t)); |
1248 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); | 1266 | head->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); |
@@ -1253,10 +1271,11 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1253 | head->h_fmt = cpu_to_be32(XLOG_FMT); | 1271 | head->h_fmt = cpu_to_be32(XLOG_FMT); |
1254 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); | 1272 | memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); |
1255 | 1273 | ||
1256 | |||
1257 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; | 1274 | iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; |
1258 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1275 | iclog->ic_state = XLOG_STATE_ACTIVE; |
1259 | iclog->ic_log = log; | 1276 | iclog->ic_log = log; |
1277 | atomic_set(&iclog->ic_refcnt, 0); | ||
1278 | spin_lock_init(&iclog->ic_callback_lock); | ||
1260 | iclog->ic_callback_tail = &(iclog->ic_callback); | 1279 | iclog->ic_callback_tail = &(iclog->ic_callback); |
1261 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; | 1280 | iclog->ic_datap = (char *)iclog->hic_data + log->l_iclog_hsize; |
1262 | 1281 | ||
@@ -1405,7 +1424,7 @@ xlog_sync(xlog_t *log, | |||
1405 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); | 1424 | int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); |
1406 | 1425 | ||
1407 | XFS_STATS_INC(xs_log_writes); | 1426 | XFS_STATS_INC(xs_log_writes); |
1408 | ASSERT(iclog->ic_refcnt == 0); | 1427 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
1409 | 1428 | ||
1410 | /* Add for LR header */ | 1429 | /* Add for LR header */ |
1411 | count_init = log->l_iclog_hsize + iclog->ic_offset; | 1430 | count_init = log->l_iclog_hsize + iclog->ic_offset; |
@@ -1538,7 +1557,6 @@ STATIC void | |||
1538 | xlog_dealloc_log(xlog_t *log) | 1557 | xlog_dealloc_log(xlog_t *log) |
1539 | { | 1558 | { |
1540 | xlog_in_core_t *iclog, *next_iclog; | 1559 | xlog_in_core_t *iclog, *next_iclog; |
1541 | xlog_ticket_t *tic, *next_tic; | ||
1542 | int i; | 1560 | int i; |
1543 | 1561 | ||
1544 | iclog = log->l_iclog; | 1562 | iclog = log->l_iclog; |
@@ -1559,22 +1577,6 @@ xlog_dealloc_log(xlog_t *log) | |||
1559 | spinlock_destroy(&log->l_icloglock); | 1577 | spinlock_destroy(&log->l_icloglock); |
1560 | spinlock_destroy(&log->l_grant_lock); | 1578 | spinlock_destroy(&log->l_grant_lock); |
1561 | 1579 | ||
1562 | /* XXXsup take a look at this again. */ | ||
1563 | if ((log->l_ticket_cnt != log->l_ticket_tcnt) && | ||
1564 | !XLOG_FORCED_SHUTDOWN(log)) { | ||
1565 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | ||
1566 | "xlog_dealloc_log: (cnt: %d, total: %d)", | ||
1567 | log->l_ticket_cnt, log->l_ticket_tcnt); | ||
1568 | /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ | ||
1569 | |||
1570 | } else { | ||
1571 | tic = log->l_unmount_free; | ||
1572 | while (tic) { | ||
1573 | next_tic = tic->t_next; | ||
1574 | kmem_free(tic, PAGE_SIZE); | ||
1575 | tic = next_tic; | ||
1576 | } | ||
1577 | } | ||
1578 | xfs_buf_free(log->l_xbuf); | 1580 | xfs_buf_free(log->l_xbuf); |
1579 | #ifdef XFS_LOG_TRACE | 1581 | #ifdef XFS_LOG_TRACE |
1580 | if (log->l_trace != NULL) { | 1582 | if (log->l_trace != NULL) { |
@@ -1987,7 +1989,7 @@ xlog_state_clean_log(xlog_t *log) | |||
1987 | if (iclog->ic_state == XLOG_STATE_DIRTY) { | 1989 | if (iclog->ic_state == XLOG_STATE_DIRTY) { |
1988 | iclog->ic_state = XLOG_STATE_ACTIVE; | 1990 | iclog->ic_state = XLOG_STATE_ACTIVE; |
1989 | iclog->ic_offset = 0; | 1991 | iclog->ic_offset = 0; |
1990 | iclog->ic_callback = NULL; /* don't need to free */ | 1992 | ASSERT(iclog->ic_callback == NULL); |
1991 | /* | 1993 | /* |
1992 | * If the number of ops in this iclog indicate it just | 1994 | * If the number of ops in this iclog indicate it just |
1993 | * contains the dummy transaction, we can | 1995 | * contains the dummy transaction, we can |
@@ -2190,37 +2192,40 @@ xlog_state_do_callback( | |||
2190 | be64_to_cpu(iclog->ic_header.h_lsn); | 2192 | be64_to_cpu(iclog->ic_header.h_lsn); |
2191 | spin_unlock(&log->l_grant_lock); | 2193 | spin_unlock(&log->l_grant_lock); |
2192 | 2194 | ||
2193 | /* | ||
2194 | * Keep processing entries in the callback list | ||
2195 | * until we come around and it is empty. We | ||
2196 | * need to atomically see that the list is | ||
2197 | * empty and change the state to DIRTY so that | ||
2198 | * we don't miss any more callbacks being added. | ||
2199 | */ | ||
2200 | spin_lock(&log->l_icloglock); | ||
2201 | } else { | 2195 | } else { |
2196 | spin_unlock(&log->l_icloglock); | ||
2202 | ioerrors++; | 2197 | ioerrors++; |
2203 | } | 2198 | } |
2204 | cb = iclog->ic_callback; | ||
2205 | 2199 | ||
2200 | /* | ||
2201 | * Keep processing entries in the callback list until | ||
2202 | * we come around and it is empty. We need to | ||
2203 | * atomically see that the list is empty and change the | ||
2204 | * state to DIRTY so that we don't miss any more | ||
2205 | * callbacks being added. | ||
2206 | */ | ||
2207 | spin_lock(&iclog->ic_callback_lock); | ||
2208 | cb = iclog->ic_callback; | ||
2206 | while (cb) { | 2209 | while (cb) { |
2207 | iclog->ic_callback_tail = &(iclog->ic_callback); | 2210 | iclog->ic_callback_tail = &(iclog->ic_callback); |
2208 | iclog->ic_callback = NULL; | 2211 | iclog->ic_callback = NULL; |
2209 | spin_unlock(&log->l_icloglock); | 2212 | spin_unlock(&iclog->ic_callback_lock); |
2210 | 2213 | ||
2211 | /* perform callbacks in the order given */ | 2214 | /* perform callbacks in the order given */ |
2212 | for (; cb; cb = cb_next) { | 2215 | for (; cb; cb = cb_next) { |
2213 | cb_next = cb->cb_next; | 2216 | cb_next = cb->cb_next; |
2214 | cb->cb_func(cb->cb_arg, aborted); | 2217 | cb->cb_func(cb->cb_arg, aborted); |
2215 | } | 2218 | } |
2216 | spin_lock(&log->l_icloglock); | 2219 | spin_lock(&iclog->ic_callback_lock); |
2217 | cb = iclog->ic_callback; | 2220 | cb = iclog->ic_callback; |
2218 | } | 2221 | } |
2219 | 2222 | ||
2220 | loopdidcallbacks++; | 2223 | loopdidcallbacks++; |
2221 | funcdidcallbacks++; | 2224 | funcdidcallbacks++; |
2222 | 2225 | ||
2226 | spin_lock(&log->l_icloglock); | ||
2223 | ASSERT(iclog->ic_callback == NULL); | 2227 | ASSERT(iclog->ic_callback == NULL); |
2228 | spin_unlock(&iclog->ic_callback_lock); | ||
2224 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) | 2229 | if (!(iclog->ic_state & XLOG_STATE_IOERROR)) |
2225 | iclog->ic_state = XLOG_STATE_DIRTY; | 2230 | iclog->ic_state = XLOG_STATE_DIRTY; |
2226 | 2231 | ||
@@ -2241,7 +2246,7 @@ xlog_state_do_callback( | |||
2241 | repeats = 0; | 2246 | repeats = 0; |
2242 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | 2247 | xfs_fs_cmn_err(CE_WARN, log->l_mp, |
2243 | "%s: possible infinite loop (%d iterations)", | 2248 | "%s: possible infinite loop (%d iterations)", |
2244 | __FUNCTION__, flushcnt); | 2249 | __func__, flushcnt); |
2245 | } | 2250 | } |
2246 | } while (!ioerrors && loopdidcallbacks); | 2251 | } while (!ioerrors && loopdidcallbacks); |
2247 | 2252 | ||
@@ -2309,7 +2314,7 @@ xlog_state_done_syncing( | |||
2309 | 2314 | ||
2310 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || | 2315 | ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || |
2311 | iclog->ic_state == XLOG_STATE_IOERROR); | 2316 | iclog->ic_state == XLOG_STATE_IOERROR); |
2312 | ASSERT(iclog->ic_refcnt == 0); | 2317 | ASSERT(atomic_read(&iclog->ic_refcnt) == 0); |
2313 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); | 2318 | ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2); |
2314 | 2319 | ||
2315 | 2320 | ||
@@ -2391,7 +2396,7 @@ restart: | |||
2391 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); | 2396 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); |
2392 | head = &iclog->ic_header; | 2397 | head = &iclog->ic_header; |
2393 | 2398 | ||
2394 | iclog->ic_refcnt++; /* prevents sync */ | 2399 | atomic_inc(&iclog->ic_refcnt); /* prevents sync */ |
2395 | log_offset = iclog->ic_offset; | 2400 | log_offset = iclog->ic_offset; |
2396 | 2401 | ||
2397 | /* On the 1st write to an iclog, figure out lsn. This works | 2402 | /* On the 1st write to an iclog, figure out lsn. This works |
@@ -2423,12 +2428,12 @@ restart: | |||
2423 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); | 2428 | xlog_state_switch_iclogs(log, iclog, iclog->ic_size); |
2424 | 2429 | ||
2425 | /* If I'm the only one writing to this iclog, sync it to disk */ | 2430 | /* If I'm the only one writing to this iclog, sync it to disk */ |
2426 | if (iclog->ic_refcnt == 1) { | 2431 | if (atomic_read(&iclog->ic_refcnt) == 1) { |
2427 | spin_unlock(&log->l_icloglock); | 2432 | spin_unlock(&log->l_icloglock); |
2428 | if ((error = xlog_state_release_iclog(log, iclog))) | 2433 | if ((error = xlog_state_release_iclog(log, iclog))) |
2429 | return error; | 2434 | return error; |
2430 | } else { | 2435 | } else { |
2431 | iclog->ic_refcnt--; | 2436 | atomic_dec(&iclog->ic_refcnt); |
2432 | spin_unlock(&log->l_icloglock); | 2437 | spin_unlock(&log->l_icloglock); |
2433 | } | 2438 | } |
2434 | goto restart; | 2439 | goto restart; |
@@ -2792,18 +2797,6 @@ xlog_ungrant_log_space(xlog_t *log, | |||
2792 | 2797 | ||
2793 | 2798 | ||
2794 | /* | 2799 | /* |
2795 | * Atomically put back used ticket. | ||
2796 | */ | ||
2797 | STATIC void | ||
2798 | xlog_state_put_ticket(xlog_t *log, | ||
2799 | xlog_ticket_t *tic) | ||
2800 | { | ||
2801 | spin_lock(&log->l_icloglock); | ||
2802 | xlog_ticket_put(log, tic); | ||
2803 | spin_unlock(&log->l_icloglock); | ||
2804 | } /* xlog_state_put_ticket */ | ||
2805 | |||
2806 | /* | ||
2807 | * Flush iclog to disk if this is the last reference to the given iclog and | 2800 | * Flush iclog to disk if this is the last reference to the given iclog and |
2808 | * the WANT_SYNC bit is set. | 2801 | * the WANT_SYNC bit is set. |
2809 | * | 2802 | * |
@@ -2813,33 +2806,35 @@ xlog_state_put_ticket(xlog_t *log, | |||
2813 | * | 2806 | * |
2814 | */ | 2807 | */ |
2815 | STATIC int | 2808 | STATIC int |
2816 | xlog_state_release_iclog(xlog_t *log, | 2809 | xlog_state_release_iclog( |
2817 | xlog_in_core_t *iclog) | 2810 | xlog_t *log, |
2811 | xlog_in_core_t *iclog) | ||
2818 | { | 2812 | { |
2819 | int sync = 0; /* do we sync? */ | 2813 | int sync = 0; /* do we sync? */ |
2820 | 2814 | ||
2821 | xlog_assign_tail_lsn(log->l_mp); | 2815 | if (iclog->ic_state & XLOG_STATE_IOERROR) |
2816 | return XFS_ERROR(EIO); | ||
2822 | 2817 | ||
2823 | spin_lock(&log->l_icloglock); | 2818 | ASSERT(atomic_read(&iclog->ic_refcnt) > 0); |
2819 | if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) | ||
2820 | return 0; | ||
2824 | 2821 | ||
2825 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 2822 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
2826 | spin_unlock(&log->l_icloglock); | 2823 | spin_unlock(&log->l_icloglock); |
2827 | return XFS_ERROR(EIO); | 2824 | return XFS_ERROR(EIO); |
2828 | } | 2825 | } |
2829 | |||
2830 | ASSERT(iclog->ic_refcnt > 0); | ||
2831 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || | 2826 | ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE || |
2832 | iclog->ic_state == XLOG_STATE_WANT_SYNC); | 2827 | iclog->ic_state == XLOG_STATE_WANT_SYNC); |
2833 | 2828 | ||
2834 | if (--iclog->ic_refcnt == 0 && | 2829 | if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { |
2835 | iclog->ic_state == XLOG_STATE_WANT_SYNC) { | 2830 | /* update tail before writing to iclog */ |
2831 | xlog_assign_tail_lsn(log->l_mp); | ||
2836 | sync++; | 2832 | sync++; |
2837 | iclog->ic_state = XLOG_STATE_SYNCING; | 2833 | iclog->ic_state = XLOG_STATE_SYNCING; |
2838 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); | 2834 | iclog->ic_header.h_tail_lsn = cpu_to_be64(log->l_tail_lsn); |
2839 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); | 2835 | xlog_verify_tail_lsn(log, iclog, log->l_tail_lsn); |
2840 | /* cycle incremented when incrementing curr_block */ | 2836 | /* cycle incremented when incrementing curr_block */ |
2841 | } | 2837 | } |
2842 | |||
2843 | spin_unlock(&log->l_icloglock); | 2838 | spin_unlock(&log->l_icloglock); |
2844 | 2839 | ||
2845 | /* | 2840 | /* |
@@ -2849,11 +2844,9 @@ xlog_state_release_iclog(xlog_t *log, | |||
2849 | * this iclog has consistent data, so we ignore IOERROR | 2844 | * this iclog has consistent data, so we ignore IOERROR |
2850 | * flags after this point. | 2845 | * flags after this point. |
2851 | */ | 2846 | */ |
2852 | if (sync) { | 2847 | if (sync) |
2853 | return xlog_sync(log, iclog); | 2848 | return xlog_sync(log, iclog); |
2854 | } | ||
2855 | return 0; | 2849 | return 0; |
2856 | |||
2857 | } /* xlog_state_release_iclog */ | 2850 | } /* xlog_state_release_iclog */ |
2858 | 2851 | ||
2859 | 2852 | ||
@@ -2953,7 +2946,8 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2953 | * previous iclog and go to sleep. | 2946 | * previous iclog and go to sleep. |
2954 | */ | 2947 | */ |
2955 | if (iclog->ic_state == XLOG_STATE_DIRTY || | 2948 | if (iclog->ic_state == XLOG_STATE_DIRTY || |
2956 | (iclog->ic_refcnt == 0 && iclog->ic_offset == 0)) { | 2949 | (atomic_read(&iclog->ic_refcnt) == 0 |
2950 | && iclog->ic_offset == 0)) { | ||
2957 | iclog = iclog->ic_prev; | 2951 | iclog = iclog->ic_prev; |
2958 | if (iclog->ic_state == XLOG_STATE_ACTIVE || | 2952 | if (iclog->ic_state == XLOG_STATE_ACTIVE || |
2959 | iclog->ic_state == XLOG_STATE_DIRTY) | 2953 | iclog->ic_state == XLOG_STATE_DIRTY) |
@@ -2961,14 +2955,14 @@ xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed) | |||
2961 | else | 2955 | else |
2962 | goto maybe_sleep; | 2956 | goto maybe_sleep; |
2963 | } else { | 2957 | } else { |
2964 | if (iclog->ic_refcnt == 0) { | 2958 | if (atomic_read(&iclog->ic_refcnt) == 0) { |
2965 | /* We are the only one with access to this | 2959 | /* We are the only one with access to this |
2966 | * iclog. Flush it out now. There should | 2960 | * iclog. Flush it out now. There should |
2967 | * be a roundoff of zero to show that someone | 2961 | * be a roundoff of zero to show that someone |
2968 | * has already taken care of the roundoff from | 2962 | * has already taken care of the roundoff from |
2969 | * the previous sync. | 2963 | * the previous sync. |
2970 | */ | 2964 | */ |
2971 | iclog->ic_refcnt++; | 2965 | atomic_inc(&iclog->ic_refcnt); |
2972 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); | 2966 | lsn = be64_to_cpu(iclog->ic_header.h_lsn); |
2973 | xlog_state_switch_iclogs(log, iclog, 0); | 2967 | xlog_state_switch_iclogs(log, iclog, 0); |
2974 | spin_unlock(&log->l_icloglock); | 2968 | spin_unlock(&log->l_icloglock); |
@@ -3100,7 +3094,7 @@ try_again: | |||
3100 | already_slept = 1; | 3094 | already_slept = 1; |
3101 | goto try_again; | 3095 | goto try_again; |
3102 | } else { | 3096 | } else { |
3103 | iclog->ic_refcnt++; | 3097 | atomic_inc(&iclog->ic_refcnt); |
3104 | xlog_state_switch_iclogs(log, iclog, 0); | 3098 | xlog_state_switch_iclogs(log, iclog, 0); |
3105 | spin_unlock(&log->l_icloglock); | 3099 | spin_unlock(&log->l_icloglock); |
3106 | if (xlog_state_release_iclog(log, iclog)) | 3100 | if (xlog_state_release_iclog(log, iclog)) |
@@ -3172,92 +3166,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | |||
3172 | */ | 3166 | */ |
3173 | 3167 | ||
3174 | /* | 3168 | /* |
3175 | * Algorithm doesn't take into account page size. ;-( | 3169 | * Free a used ticket. |
3176 | */ | ||
3177 | STATIC void | ||
3178 | xlog_state_ticket_alloc(xlog_t *log) | ||
3179 | { | ||
3180 | xlog_ticket_t *t_list; | ||
3181 | xlog_ticket_t *next; | ||
3182 | xfs_caddr_t buf; | ||
3183 | uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; | ||
3184 | |||
3185 | /* | ||
3186 | * The kmem_zalloc may sleep, so we shouldn't be holding the | ||
3187 | * global lock. XXXmiken: may want to use zone allocator. | ||
3188 | */ | ||
3189 | buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); | ||
3190 | |||
3191 | spin_lock(&log->l_icloglock); | ||
3192 | |||
3193 | /* Attach 1st ticket to Q, so we can keep track of allocated memory */ | ||
3194 | t_list = (xlog_ticket_t *)buf; | ||
3195 | t_list->t_next = log->l_unmount_free; | ||
3196 | log->l_unmount_free = t_list++; | ||
3197 | log->l_ticket_cnt++; | ||
3198 | log->l_ticket_tcnt++; | ||
3199 | |||
3200 | /* Next ticket becomes first ticket attached to ticket free list */ | ||
3201 | if (log->l_freelist != NULL) { | ||
3202 | ASSERT(log->l_tail != NULL); | ||
3203 | log->l_tail->t_next = t_list; | ||
3204 | } else { | ||
3205 | log->l_freelist = t_list; | ||
3206 | } | ||
3207 | log->l_ticket_cnt++; | ||
3208 | log->l_ticket_tcnt++; | ||
3209 | |||
3210 | /* Cycle through rest of alloc'ed memory, building up free Q */ | ||
3211 | for ( ; i > 0; i--) { | ||
3212 | next = t_list + 1; | ||
3213 | t_list->t_next = next; | ||
3214 | t_list = next; | ||
3215 | log->l_ticket_cnt++; | ||
3216 | log->l_ticket_tcnt++; | ||
3217 | } | ||
3218 | t_list->t_next = NULL; | ||
3219 | log->l_tail = t_list; | ||
3220 | spin_unlock(&log->l_icloglock); | ||
3221 | } /* xlog_state_ticket_alloc */ | ||
3222 | |||
3223 | |||
3224 | /* | ||
3225 | * Put ticket into free list | ||
3226 | * | ||
3227 | * Assumption: log lock is held around this call. | ||
3228 | */ | 3170 | */ |
3229 | STATIC void | 3171 | STATIC void |
3230 | xlog_ticket_put(xlog_t *log, | 3172 | xlog_ticket_put(xlog_t *log, |
3231 | xlog_ticket_t *ticket) | 3173 | xlog_ticket_t *ticket) |
3232 | { | 3174 | { |
3233 | sv_destroy(&ticket->t_sema); | 3175 | sv_destroy(&ticket->t_sema); |
3234 | 3176 | kmem_zone_free(xfs_log_ticket_zone, ticket); | |
3235 | /* | ||
3236 | * Don't think caching will make that much difference. It's | ||
3237 | * more important to make debug easier. | ||
3238 | */ | ||
3239 | #if 0 | ||
3240 | /* real code will want to use LIFO for caching */ | ||
3241 | ticket->t_next = log->l_freelist; | ||
3242 | log->l_freelist = ticket; | ||
3243 | /* no need to clear fields */ | ||
3244 | #else | ||
3245 | /* When we debug, it is easier if tickets are cycled */ | ||
3246 | ticket->t_next = NULL; | ||
3247 | if (log->l_tail) { | ||
3248 | log->l_tail->t_next = ticket; | ||
3249 | } else { | ||
3250 | ASSERT(log->l_freelist == NULL); | ||
3251 | log->l_freelist = ticket; | ||
3252 | } | ||
3253 | log->l_tail = ticket; | ||
3254 | #endif /* DEBUG */ | ||
3255 | log->l_ticket_cnt++; | ||
3256 | } /* xlog_ticket_put */ | 3177 | } /* xlog_ticket_put */ |
3257 | 3178 | ||
3258 | 3179 | ||
3259 | /* | 3180 | /* |
3260 | * Grab ticket off freelist or allocation some more | 3181 | * Allocate and initialise a new log ticket. |
3261 | */ | 3182 | */ |
3262 | STATIC xlog_ticket_t * | 3183 | STATIC xlog_ticket_t * |
3263 | xlog_ticket_get(xlog_t *log, | 3184 | xlog_ticket_get(xlog_t *log, |
@@ -3269,21 +3190,9 @@ xlog_ticket_get(xlog_t *log, | |||
3269 | xlog_ticket_t *tic; | 3190 | xlog_ticket_t *tic; |
3270 | uint num_headers; | 3191 | uint num_headers; |
3271 | 3192 | ||
3272 | alloc: | 3193 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); |
3273 | if (log->l_freelist == NULL) | 3194 | if (!tic) |
3274 | xlog_state_ticket_alloc(log); /* potentially sleep */ | 3195 | return NULL; |
3275 | |||
3276 | spin_lock(&log->l_icloglock); | ||
3277 | if (log->l_freelist == NULL) { | ||
3278 | spin_unlock(&log->l_icloglock); | ||
3279 | goto alloc; | ||
3280 | } | ||
3281 | tic = log->l_freelist; | ||
3282 | log->l_freelist = tic->t_next; | ||
3283 | if (log->l_freelist == NULL) | ||
3284 | log->l_tail = NULL; | ||
3285 | log->l_ticket_cnt--; | ||
3286 | spin_unlock(&log->l_icloglock); | ||
3287 | 3196 | ||
3288 | /* | 3197 | /* |
3289 | * Permanent reservations have up to 'cnt'-1 active log operations | 3198 | * Permanent reservations have up to 'cnt'-1 active log operations |
@@ -3611,8 +3520,8 @@ xfs_log_force_umount( | |||
3611 | * before we mark the filesystem SHUTDOWN and wake | 3520 | * before we mark the filesystem SHUTDOWN and wake |
3612 | * everybody up to tell the bad news. | 3521 | * everybody up to tell the bad news. |
3613 | */ | 3522 | */ |
3614 | spin_lock(&log->l_grant_lock); | ||
3615 | spin_lock(&log->l_icloglock); | 3523 | spin_lock(&log->l_icloglock); |
3524 | spin_lock(&log->l_grant_lock); | ||
3616 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; | 3525 | mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN; |
3617 | XFS_BUF_DONE(mp->m_sb_bp); | 3526 | XFS_BUF_DONE(mp->m_sb_bp); |
3618 | /* | 3527 | /* |