diff options
author | David Chinner <dgc@sgi.com> | 2008-04-09 22:18:46 -0400 |
---|---|---|
committer | Lachlan McIlroy <lachlan@redback.melbourne.sgi.com> | 2008-04-17 21:50:39 -0400 |
commit | eb01c9cd87c7a9998c2edf209721ea069e3e3652 (patch) | |
tree | 201fc07dac684d6a5a789995c8c35918ea16de7f | |
parent | 114d23aae51233b2bc62d8e2a632bcb55de1953d (diff) |
[XFS] Remove the xlog_ticket allocator
The ticket allocator is just a simple slab implementation internal to the
log. It requires the icloglock to be held when manipulating it and this
contributes to contention on that lock.
Just kill the entire allocator and use a memory zone instead. While there,
allow us to gracefully fail allocation with ENOMEM.
SGI-PV: 978729
SGI-Modid: xfs-linux-melb:xfs-kern:30771a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
-rw-r--r-- | fs/xfs/xfs_log.c | 137 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_vfsops.c | 12 |
3 files changed, 21 insertions, 137 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 7a5b12d93537..3cf115d8de75 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include "xfs_inode.h" | 41 | #include "xfs_inode.h" |
42 | #include "xfs_rw.h" | 42 | #include "xfs_rw.h" |
43 | 43 | ||
44 | kmem_zone_t *xfs_log_ticket_zone; | ||
44 | 45 | ||
45 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ | 46 | #define xlog_write_adv_cnt(ptr, len, off, bytes) \ |
46 | { (ptr) += (bytes); \ | 47 | { (ptr) += (bytes); \ |
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log, | |||
73 | xlog_ticket_t *ticket, | 74 | xlog_ticket_t *ticket, |
74 | int *continued_write, | 75 | int *continued_write, |
75 | int *logoffsetp); | 76 | int *logoffsetp); |
76 | STATIC void xlog_state_put_ticket(xlog_t *log, | ||
77 | xlog_ticket_t *tic); | ||
78 | STATIC int xlog_state_release_iclog(xlog_t *log, | 77 | STATIC int xlog_state_release_iclog(xlog_t *log, |
79 | xlog_in_core_t *iclog); | 78 | xlog_in_core_t *iclog); |
80 | STATIC void xlog_state_switch_iclogs(xlog_t *log, | 79 | STATIC void xlog_state_switch_iclogs(xlog_t *log, |
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log, | |||
101 | 100 | ||
102 | 101 | ||
103 | /* local ticket functions */ | 102 | /* local ticket functions */ |
104 | STATIC void xlog_state_ticket_alloc(xlog_t *log); | ||
105 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, | 103 | STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, |
106 | int unit_bytes, | 104 | int unit_bytes, |
107 | int count, | 105 | int count, |
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp, | |||
330 | */ | 328 | */ |
331 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); | 329 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); |
332 | xlog_ungrant_log_space(log, ticket); | 330 | xlog_ungrant_log_space(log, ticket); |
333 | xlog_state_put_ticket(log, ticket); | 331 | xlog_ticket_put(log, ticket); |
334 | } else { | 332 | } else { |
335 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); | 333 | xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); |
336 | xlog_regrant_reserve_log_space(log, ticket); | 334 | xlog_regrant_reserve_log_space(log, ticket); |
@@ -469,6 +467,8 @@ xfs_log_reserve(xfs_mount_t *mp, | |||
469 | /* may sleep if need to allocate more tickets */ | 467 | /* may sleep if need to allocate more tickets */ |
470 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, | 468 | internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, |
471 | client, flags); | 469 | client, flags); |
470 | if (!internal_ticket) | ||
471 | return XFS_ERROR(ENOMEM); | ||
472 | internal_ticket->t_trans_type = t_type; | 472 | internal_ticket->t_trans_type = t_type; |
473 | *ticket = internal_ticket; | 473 | *ticket = internal_ticket; |
474 | xlog_trace_loggrant(log, internal_ticket, | 474 | xlog_trace_loggrant(log, internal_ticket, |
@@ -693,7 +693,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) | |||
693 | if (tic) { | 693 | if (tic) { |
694 | xlog_trace_loggrant(log, tic, "unmount rec"); | 694 | xlog_trace_loggrant(log, tic, "unmount rec"); |
695 | xlog_ungrant_log_space(log, tic); | 695 | xlog_ungrant_log_space(log, tic); |
696 | xlog_state_put_ticket(log, tic); | 696 | xlog_ticket_put(log, tic); |
697 | } | 697 | } |
698 | } else { | 698 | } else { |
699 | /* | 699 | /* |
@@ -1208,7 +1208,6 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1208 | spin_lock_init(&log->l_icloglock); | 1208 | spin_lock_init(&log->l_icloglock); |
1209 | spin_lock_init(&log->l_grant_lock); | 1209 | spin_lock_init(&log->l_grant_lock); |
1210 | initnsema(&log->l_flushsema, 0, "ic-flush"); | 1210 | initnsema(&log->l_flushsema, 0, "ic-flush"); |
1211 | xlog_state_ticket_alloc(log); /* wait until after icloglock inited */ | ||
1212 | 1211 | ||
1213 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1212 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
1214 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1213 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
@@ -1538,7 +1537,6 @@ STATIC void | |||
1538 | xlog_dealloc_log(xlog_t *log) | 1537 | xlog_dealloc_log(xlog_t *log) |
1539 | { | 1538 | { |
1540 | xlog_in_core_t *iclog, *next_iclog; | 1539 | xlog_in_core_t *iclog, *next_iclog; |
1541 | xlog_ticket_t *tic, *next_tic; | ||
1542 | int i; | 1540 | int i; |
1543 | 1541 | ||
1544 | iclog = log->l_iclog; | 1542 | iclog = log->l_iclog; |
@@ -1559,22 +1557,6 @@ xlog_dealloc_log(xlog_t *log) | |||
1559 | spinlock_destroy(&log->l_icloglock); | 1557 | spinlock_destroy(&log->l_icloglock); |
1560 | spinlock_destroy(&log->l_grant_lock); | 1558 | spinlock_destroy(&log->l_grant_lock); |
1561 | 1559 | ||
1562 | /* XXXsup take a look at this again. */ | ||
1563 | if ((log->l_ticket_cnt != log->l_ticket_tcnt) && | ||
1564 | !XLOG_FORCED_SHUTDOWN(log)) { | ||
1565 | xfs_fs_cmn_err(CE_WARN, log->l_mp, | ||
1566 | "xlog_dealloc_log: (cnt: %d, total: %d)", | ||
1567 | log->l_ticket_cnt, log->l_ticket_tcnt); | ||
1568 | /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */ | ||
1569 | |||
1570 | } else { | ||
1571 | tic = log->l_unmount_free; | ||
1572 | while (tic) { | ||
1573 | next_tic = tic->t_next; | ||
1574 | kmem_free(tic, PAGE_SIZE); | ||
1575 | tic = next_tic; | ||
1576 | } | ||
1577 | } | ||
1578 | xfs_buf_free(log->l_xbuf); | 1560 | xfs_buf_free(log->l_xbuf); |
1579 | #ifdef XFS_LOG_TRACE | 1561 | #ifdef XFS_LOG_TRACE |
1580 | if (log->l_trace != NULL) { | 1562 | if (log->l_trace != NULL) { |
@@ -2795,18 +2777,6 @@ xlog_ungrant_log_space(xlog_t *log, | |||
2795 | 2777 | ||
2796 | 2778 | ||
2797 | /* | 2779 | /* |
2798 | * Atomically put back used ticket. | ||
2799 | */ | ||
2800 | STATIC void | ||
2801 | xlog_state_put_ticket(xlog_t *log, | ||
2802 | xlog_ticket_t *tic) | ||
2803 | { | ||
2804 | spin_lock(&log->l_icloglock); | ||
2805 | xlog_ticket_put(log, tic); | ||
2806 | spin_unlock(&log->l_icloglock); | ||
2807 | } /* xlog_state_put_ticket */ | ||
2808 | |||
2809 | /* | ||
2810 | * Flush iclog to disk if this is the last reference to the given iclog and | 2780 | * Flush iclog to disk if this is the last reference to the given iclog and |
2811 | * the WANT_SYNC bit is set. | 2781 | * the WANT_SYNC bit is set. |
2812 | * | 2782 | * |
@@ -3176,92 +3146,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) | |||
3176 | */ | 3146 | */ |
3177 | 3147 | ||
3178 | /* | 3148 | /* |
3179 | * Algorithm doesn't take into account page size. ;-( | 3149 | * Free a used ticket. |
3180 | */ | ||
3181 | STATIC void | ||
3182 | xlog_state_ticket_alloc(xlog_t *log) | ||
3183 | { | ||
3184 | xlog_ticket_t *t_list; | ||
3185 | xlog_ticket_t *next; | ||
3186 | xfs_caddr_t buf; | ||
3187 | uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2; | ||
3188 | |||
3189 | /* | ||
3190 | * The kmem_zalloc may sleep, so we shouldn't be holding the | ||
3191 | * global lock. XXXmiken: may want to use zone allocator. | ||
3192 | */ | ||
3193 | buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP); | ||
3194 | |||
3195 | spin_lock(&log->l_icloglock); | ||
3196 | |||
3197 | /* Attach 1st ticket to Q, so we can keep track of allocated memory */ | ||
3198 | t_list = (xlog_ticket_t *)buf; | ||
3199 | t_list->t_next = log->l_unmount_free; | ||
3200 | log->l_unmount_free = t_list++; | ||
3201 | log->l_ticket_cnt++; | ||
3202 | log->l_ticket_tcnt++; | ||
3203 | |||
3204 | /* Next ticket becomes first ticket attached to ticket free list */ | ||
3205 | if (log->l_freelist != NULL) { | ||
3206 | ASSERT(log->l_tail != NULL); | ||
3207 | log->l_tail->t_next = t_list; | ||
3208 | } else { | ||
3209 | log->l_freelist = t_list; | ||
3210 | } | ||
3211 | log->l_ticket_cnt++; | ||
3212 | log->l_ticket_tcnt++; | ||
3213 | |||
3214 | /* Cycle through rest of alloc'ed memory, building up free Q */ | ||
3215 | for ( ; i > 0; i--) { | ||
3216 | next = t_list + 1; | ||
3217 | t_list->t_next = next; | ||
3218 | t_list = next; | ||
3219 | log->l_ticket_cnt++; | ||
3220 | log->l_ticket_tcnt++; | ||
3221 | } | ||
3222 | t_list->t_next = NULL; | ||
3223 | log->l_tail = t_list; | ||
3224 | spin_unlock(&log->l_icloglock); | ||
3225 | } /* xlog_state_ticket_alloc */ | ||
3226 | |||
3227 | |||
3228 | /* | ||
3229 | * Put ticket into free list | ||
3230 | * | ||
3231 | * Assumption: log lock is held around this call. | ||
3232 | */ | 3150 | */ |
3233 | STATIC void | 3151 | STATIC void |
3234 | xlog_ticket_put(xlog_t *log, | 3152 | xlog_ticket_put(xlog_t *log, |
3235 | xlog_ticket_t *ticket) | 3153 | xlog_ticket_t *ticket) |
3236 | { | 3154 | { |
3237 | sv_destroy(&ticket->t_sema); | 3155 | sv_destroy(&ticket->t_sema); |
3238 | 3156 | kmem_zone_free(xfs_log_ticket_zone, ticket); | |
3239 | /* | ||
3240 | * Don't think caching will make that much difference. It's | ||
3241 | * more important to make debug easier. | ||
3242 | */ | ||
3243 | #if 0 | ||
3244 | /* real code will want to use LIFO for caching */ | ||
3245 | ticket->t_next = log->l_freelist; | ||
3246 | log->l_freelist = ticket; | ||
3247 | /* no need to clear fields */ | ||
3248 | #else | ||
3249 | /* When we debug, it is easier if tickets are cycled */ | ||
3250 | ticket->t_next = NULL; | ||
3251 | if (log->l_tail) { | ||
3252 | log->l_tail->t_next = ticket; | ||
3253 | } else { | ||
3254 | ASSERT(log->l_freelist == NULL); | ||
3255 | log->l_freelist = ticket; | ||
3256 | } | ||
3257 | log->l_tail = ticket; | ||
3258 | #endif /* DEBUG */ | ||
3259 | log->l_ticket_cnt++; | ||
3260 | } /* xlog_ticket_put */ | 3157 | } /* xlog_ticket_put */ |
3261 | 3158 | ||
3262 | 3159 | ||
3263 | /* | 3160 | /* |
3264 | * Grab ticket off freelist or allocation some more | 3161 | * Allocate and initialise a new log ticket. |
3265 | */ | 3162 | */ |
3266 | STATIC xlog_ticket_t * | 3163 | STATIC xlog_ticket_t * |
3267 | xlog_ticket_get(xlog_t *log, | 3164 | xlog_ticket_get(xlog_t *log, |
@@ -3273,21 +3170,9 @@ xlog_ticket_get(xlog_t *log, | |||
3273 | xlog_ticket_t *tic; | 3170 | xlog_ticket_t *tic; |
3274 | uint num_headers; | 3171 | uint num_headers; |
3275 | 3172 | ||
3276 | alloc: | 3173 | tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL); |
3277 | if (log->l_freelist == NULL) | 3174 | if (!tic) |
3278 | xlog_state_ticket_alloc(log); /* potentially sleep */ | 3175 | return NULL; |
3279 | |||
3280 | spin_lock(&log->l_icloglock); | ||
3281 | if (log->l_freelist == NULL) { | ||
3282 | spin_unlock(&log->l_icloglock); | ||
3283 | goto alloc; | ||
3284 | } | ||
3285 | tic = log->l_freelist; | ||
3286 | log->l_freelist = tic->t_next; | ||
3287 | if (log->l_freelist == NULL) | ||
3288 | log->l_tail = NULL; | ||
3289 | log->l_ticket_cnt--; | ||
3290 | spin_unlock(&log->l_icloglock); | ||
3291 | 3176 | ||
3292 | /* | 3177 | /* |
3293 | * Permanent reservations have up to 'cnt'-1 active log operations | 3178 | * Permanent reservations have up to 'cnt'-1 active log operations |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 104b623aa082..c1583960009d 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -242,7 +242,7 @@ typedef struct xlog_res { | |||
242 | 242 | ||
243 | typedef struct xlog_ticket { | 243 | typedef struct xlog_ticket { |
244 | sv_t t_sema; /* sleep on this semaphore : 20 */ | 244 | sv_t t_sema; /* sleep on this semaphore : 20 */ |
245 | struct xlog_ticket *t_next; /* :4|8 */ | 245 | struct xlog_ticket *t_next; /* :4|8 */ |
246 | struct xlog_ticket *t_prev; /* :4|8 */ | 246 | struct xlog_ticket *t_prev; /* :4|8 */ |
247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ | 247 | xlog_tid_t t_tid; /* transaction identifier : 4 */ |
248 | int t_curr_res; /* current reservation in bytes : 4 */ | 248 | int t_curr_res; /* current reservation in bytes : 4 */ |
@@ -406,13 +406,8 @@ typedef struct log { | |||
406 | sema_t l_flushsema; /* iclog flushing semaphore */ | 406 | sema_t l_flushsema; /* iclog flushing semaphore */ |
407 | int l_flushcnt; /* # of procs waiting on this | 407 | int l_flushcnt; /* # of procs waiting on this |
408 | * sema */ | 408 | * sema */ |
409 | int l_ticket_cnt; /* free ticket count */ | ||
410 | int l_ticket_tcnt; /* total ticket count */ | ||
411 | int l_covered_state;/* state of "covering disk | 409 | int l_covered_state;/* state of "covering disk |
412 | * log entries" */ | 410 | * log entries" */ |
413 | xlog_ticket_t *l_freelist; /* free list of tickets */ | ||
414 | xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */ | ||
415 | xlog_ticket_t *l_tail; /* free list of tickets */ | ||
416 | xlog_in_core_t *l_iclog; /* head log queue */ | 411 | xlog_in_core_t *l_iclog; /* head log queue */ |
417 | spinlock_t l_icloglock; /* grab to change iclog state */ | 412 | spinlock_t l_icloglock; /* grab to change iclog state */ |
418 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed | 413 | xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed |
@@ -478,6 +473,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int); | |||
478 | extern void xlog_put_bp(struct xfs_buf *); | 473 | extern void xlog_put_bp(struct xfs_buf *); |
479 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); | 474 | extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); |
480 | 475 | ||
476 | extern kmem_zone_t *xfs_log_ticket_zone; | ||
477 | |||
481 | /* iclog tracing */ | 478 | /* iclog tracing */ |
482 | #define XLOG_TRACE_GRAB_FLUSH 1 | 479 | #define XLOG_TRACE_GRAB_FLUSH 1 |
483 | #define XLOG_TRACE_REL_FLUSH 2 | 480 | #define XLOG_TRACE_REL_FLUSH 2 |
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index c21e4d168297..ea94593b5313 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c | |||
@@ -69,15 +69,17 @@ xfs_init(void) | |||
69 | /* | 69 | /* |
70 | * Initialize all of the zone allocators we use. | 70 | * Initialize all of the zone allocators we use. |
71 | */ | 71 | */ |
72 | xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), | ||
73 | "xfs_log_ticket"); | ||
72 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), | 74 | xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), |
73 | "xfs_bmap_free_item"); | 75 | "xfs_bmap_free_item"); |
74 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), | 76 | xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), |
75 | "xfs_btree_cur"); | 77 | "xfs_btree_cur"); |
76 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | 78 | xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), |
77 | xfs_da_state_zone = | 79 | "xfs_da_state"); |
78 | kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state"); | ||
79 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); | 80 | xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); |
80 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); | 81 | xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); |
82 | xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); | ||
81 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); | 83 | xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); |
82 | xfs_mru_cache_init(); | 84 | xfs_mru_cache_init(); |
83 | xfs_filestream_init(); | 85 | xfs_filestream_init(); |