aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2008-04-09 22:18:46 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-04-17 21:50:39 -0400
commiteb01c9cd87c7a9998c2edf209721ea069e3e3652 (patch)
tree201fc07dac684d6a5a789995c8c35918ea16de7f /fs
parent114d23aae51233b2bc62d8e2a632bcb55de1953d (diff)
[XFS] Remove the xlog_ticket allocator
The ticket allocator is just a simple slab implementation internal to the log. It requires the icloglock to be held when manipulating it and this contributes to contention on that lock. Just kill the entire allocator and use a memory zone instead. While there, allow us to gracefully fail allocation with ENOMEM. SGI-PV: 978729 SGI-Modid: xfs-linux-melb:xfs-kern:30771a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log.c137
-rw-r--r--fs/xfs/xfs_log_priv.h9
-rw-r--r--fs/xfs/xfs_vfsops.c12
3 files changed, 21 insertions, 137 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7a5b12d93537..3cf115d8de75 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -41,6 +41,7 @@
41#include "xfs_inode.h" 41#include "xfs_inode.h"
42#include "xfs_rw.h" 42#include "xfs_rw.h"
43 43
44kmem_zone_t *xfs_log_ticket_zone;
44 45
45#define xlog_write_adv_cnt(ptr, len, off, bytes) \ 46#define xlog_write_adv_cnt(ptr, len, off, bytes) \
46 { (ptr) += (bytes); \ 47 { (ptr) += (bytes); \
@@ -73,8 +74,6 @@ STATIC int xlog_state_get_iclog_space(xlog_t *log,
73 xlog_ticket_t *ticket, 74 xlog_ticket_t *ticket,
74 int *continued_write, 75 int *continued_write,
75 int *logoffsetp); 76 int *logoffsetp);
76STATIC void xlog_state_put_ticket(xlog_t *log,
77 xlog_ticket_t *tic);
78STATIC int xlog_state_release_iclog(xlog_t *log, 77STATIC int xlog_state_release_iclog(xlog_t *log,
79 xlog_in_core_t *iclog); 78 xlog_in_core_t *iclog);
80STATIC void xlog_state_switch_iclogs(xlog_t *log, 79STATIC void xlog_state_switch_iclogs(xlog_t *log,
@@ -101,7 +100,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
101 100
102 101
103/* local ticket functions */ 102/* local ticket functions */
104STATIC void xlog_state_ticket_alloc(xlog_t *log);
105STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log, 103STATIC xlog_ticket_t *xlog_ticket_get(xlog_t *log,
106 int unit_bytes, 104 int unit_bytes,
107 int count, 105 int count,
@@ -330,7 +328,7 @@ xfs_log_done(xfs_mount_t *mp,
330 */ 328 */
331 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)"); 329 xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
332 xlog_ungrant_log_space(log, ticket); 330 xlog_ungrant_log_space(log, ticket);
333 xlog_state_put_ticket(log, ticket); 331 xlog_ticket_put(log, ticket);
334 } else { 332 } else {
335 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); 333 xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
336 xlog_regrant_reserve_log_space(log, ticket); 334 xlog_regrant_reserve_log_space(log, ticket);
@@ -469,6 +467,8 @@ xfs_log_reserve(xfs_mount_t *mp,
469 /* may sleep if need to allocate more tickets */ 467 /* may sleep if need to allocate more tickets */
470 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt, 468 internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
471 client, flags); 469 client, flags);
470 if (!internal_ticket)
471 return XFS_ERROR(ENOMEM);
472 internal_ticket->t_trans_type = t_type; 472 internal_ticket->t_trans_type = t_type;
473 *ticket = internal_ticket; 473 *ticket = internal_ticket;
474 xlog_trace_loggrant(log, internal_ticket, 474 xlog_trace_loggrant(log, internal_ticket,
@@ -693,7 +693,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
693 if (tic) { 693 if (tic) {
694 xlog_trace_loggrant(log, tic, "unmount rec"); 694 xlog_trace_loggrant(log, tic, "unmount rec");
695 xlog_ungrant_log_space(log, tic); 695 xlog_ungrant_log_space(log, tic);
696 xlog_state_put_ticket(log, tic); 696 xlog_ticket_put(log, tic);
697 } 697 }
698 } else { 698 } else {
699 /* 699 /*
@@ -1208,7 +1208,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1208 spin_lock_init(&log->l_icloglock); 1208 spin_lock_init(&log->l_icloglock);
1209 spin_lock_init(&log->l_grant_lock); 1209 spin_lock_init(&log->l_grant_lock);
1210 initnsema(&log->l_flushsema, 0, "ic-flush"); 1210 initnsema(&log->l_flushsema, 0, "ic-flush");
1211 xlog_state_ticket_alloc(log); /* wait until after icloglock inited */
1212 1211
1213 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1212 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1214 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1213 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1538,7 +1537,6 @@ STATIC void
1538xlog_dealloc_log(xlog_t *log) 1537xlog_dealloc_log(xlog_t *log)
1539{ 1538{
1540 xlog_in_core_t *iclog, *next_iclog; 1539 xlog_in_core_t *iclog, *next_iclog;
1541 xlog_ticket_t *tic, *next_tic;
1542 int i; 1540 int i;
1543 1541
1544 iclog = log->l_iclog; 1542 iclog = log->l_iclog;
@@ -1559,22 +1557,6 @@ xlog_dealloc_log(xlog_t *log)
1559 spinlock_destroy(&log->l_icloglock); 1557 spinlock_destroy(&log->l_icloglock);
1560 spinlock_destroy(&log->l_grant_lock); 1558 spinlock_destroy(&log->l_grant_lock);
1561 1559
1562 /* XXXsup take a look at this again. */
1563 if ((log->l_ticket_cnt != log->l_ticket_tcnt) &&
1564 !XLOG_FORCED_SHUTDOWN(log)) {
1565 xfs_fs_cmn_err(CE_WARN, log->l_mp,
1566 "xlog_dealloc_log: (cnt: %d, total: %d)",
1567 log->l_ticket_cnt, log->l_ticket_tcnt);
1568 /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
1569
1570 } else {
1571 tic = log->l_unmount_free;
1572 while (tic) {
1573 next_tic = tic->t_next;
1574 kmem_free(tic, PAGE_SIZE);
1575 tic = next_tic;
1576 }
1577 }
1578 xfs_buf_free(log->l_xbuf); 1560 xfs_buf_free(log->l_xbuf);
1579#ifdef XFS_LOG_TRACE 1561#ifdef XFS_LOG_TRACE
1580 if (log->l_trace != NULL) { 1562 if (log->l_trace != NULL) {
@@ -2795,18 +2777,6 @@ xlog_ungrant_log_space(xlog_t *log,
2795 2777
2796 2778
2797/* 2779/*
2798 * Atomically put back used ticket.
2799 */
2800STATIC void
2801xlog_state_put_ticket(xlog_t *log,
2802 xlog_ticket_t *tic)
2803{
2804 spin_lock(&log->l_icloglock);
2805 xlog_ticket_put(log, tic);
2806 spin_unlock(&log->l_icloglock);
2807} /* xlog_state_put_ticket */
2808
2809/*
2810 * Flush iclog to disk if this is the last reference to the given iclog and 2780 * Flush iclog to disk if this is the last reference to the given iclog and
2811 * the WANT_SYNC bit is set. 2781 * the WANT_SYNC bit is set.
2812 * 2782 *
@@ -3176,92 +3146,19 @@ xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3176 */ 3146 */
3177 3147
3178/* 3148/*
3179 * Algorithm doesn't take into account page size. ;-( 3149 * Free a used ticket.
3180 */
3181STATIC void
3182xlog_state_ticket_alloc(xlog_t *log)
3183{
3184 xlog_ticket_t *t_list;
3185 xlog_ticket_t *next;
3186 xfs_caddr_t buf;
3187 uint i = (PAGE_SIZE / sizeof(xlog_ticket_t)) - 2;
3188
3189 /*
3190 * The kmem_zalloc may sleep, so we shouldn't be holding the
3191 * global lock. XXXmiken: may want to use zone allocator.
3192 */
3193 buf = (xfs_caddr_t) kmem_zalloc(PAGE_SIZE, KM_SLEEP);
3194
3195 spin_lock(&log->l_icloglock);
3196
3197 /* Attach 1st ticket to Q, so we can keep track of allocated memory */
3198 t_list = (xlog_ticket_t *)buf;
3199 t_list->t_next = log->l_unmount_free;
3200 log->l_unmount_free = t_list++;
3201 log->l_ticket_cnt++;
3202 log->l_ticket_tcnt++;
3203
3204 /* Next ticket becomes first ticket attached to ticket free list */
3205 if (log->l_freelist != NULL) {
3206 ASSERT(log->l_tail != NULL);
3207 log->l_tail->t_next = t_list;
3208 } else {
3209 log->l_freelist = t_list;
3210 }
3211 log->l_ticket_cnt++;
3212 log->l_ticket_tcnt++;
3213
3214 /* Cycle through rest of alloc'ed memory, building up free Q */
3215 for ( ; i > 0; i--) {
3216 next = t_list + 1;
3217 t_list->t_next = next;
3218 t_list = next;
3219 log->l_ticket_cnt++;
3220 log->l_ticket_tcnt++;
3221 }
3222 t_list->t_next = NULL;
3223 log->l_tail = t_list;
3224 spin_unlock(&log->l_icloglock);
3225} /* xlog_state_ticket_alloc */
3226
3227
3228/*
3229 * Put ticket into free list
3230 *
3231 * Assumption: log lock is held around this call.
3232 */ 3150 */
3233STATIC void 3151STATIC void
3234xlog_ticket_put(xlog_t *log, 3152xlog_ticket_put(xlog_t *log,
3235 xlog_ticket_t *ticket) 3153 xlog_ticket_t *ticket)
3236{ 3154{
3237 sv_destroy(&ticket->t_sema); 3155 sv_destroy(&ticket->t_sema);
3238 3156 kmem_zone_free(xfs_log_ticket_zone, ticket);
3239 /*
3240 * Don't think caching will make that much difference. It's
3241 * more important to make debug easier.
3242 */
3243#if 0
3244 /* real code will want to use LIFO for caching */
3245 ticket->t_next = log->l_freelist;
3246 log->l_freelist = ticket;
3247 /* no need to clear fields */
3248#else
3249 /* When we debug, it is easier if tickets are cycled */
3250 ticket->t_next = NULL;
3251 if (log->l_tail) {
3252 log->l_tail->t_next = ticket;
3253 } else {
3254 ASSERT(log->l_freelist == NULL);
3255 log->l_freelist = ticket;
3256 }
3257 log->l_tail = ticket;
3258#endif /* DEBUG */
3259 log->l_ticket_cnt++;
3260} /* xlog_ticket_put */ 3157} /* xlog_ticket_put */
3261 3158
3262 3159
3263/* 3160/*
3264 * Grab ticket off freelist or allocation some more 3161 * Allocate and initialise a new log ticket.
3265 */ 3162 */
3266STATIC xlog_ticket_t * 3163STATIC xlog_ticket_t *
3267xlog_ticket_get(xlog_t *log, 3164xlog_ticket_get(xlog_t *log,
@@ -3273,21 +3170,9 @@ xlog_ticket_get(xlog_t *log,
3273 xlog_ticket_t *tic; 3170 xlog_ticket_t *tic;
3274 uint num_headers; 3171 uint num_headers;
3275 3172
3276 alloc: 3173 tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
3277 if (log->l_freelist == NULL) 3174 if (!tic)
3278 xlog_state_ticket_alloc(log); /* potentially sleep */ 3175 return NULL;
3279
3280 spin_lock(&log->l_icloglock);
3281 if (log->l_freelist == NULL) {
3282 spin_unlock(&log->l_icloglock);
3283 goto alloc;
3284 }
3285 tic = log->l_freelist;
3286 log->l_freelist = tic->t_next;
3287 if (log->l_freelist == NULL)
3288 log->l_tail = NULL;
3289 log->l_ticket_cnt--;
3290 spin_unlock(&log->l_icloglock);
3291 3176
3292 /* 3177 /*
3293 * Permanent reservations have up to 'cnt'-1 active log operations 3178 * Permanent reservations have up to 'cnt'-1 active log operations
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 104b623aa082..c1583960009d 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -242,7 +242,7 @@ typedef struct xlog_res {
242 242
243typedef struct xlog_ticket { 243typedef struct xlog_ticket {
244 sv_t t_sema; /* sleep on this semaphore : 20 */ 244 sv_t t_sema; /* sleep on this semaphore : 20 */
245 struct xlog_ticket *t_next; /* :4|8 */ 245 struct xlog_ticket *t_next; /* :4|8 */
246 struct xlog_ticket *t_prev; /* :4|8 */ 246 struct xlog_ticket *t_prev; /* :4|8 */
247 xlog_tid_t t_tid; /* transaction identifier : 4 */ 247 xlog_tid_t t_tid; /* transaction identifier : 4 */
248 int t_curr_res; /* current reservation in bytes : 4 */ 248 int t_curr_res; /* current reservation in bytes : 4 */
@@ -406,13 +406,8 @@ typedef struct log {
406 sema_t l_flushsema; /* iclog flushing semaphore */ 406 sema_t l_flushsema; /* iclog flushing semaphore */
407 int l_flushcnt; /* # of procs waiting on this 407 int l_flushcnt; /* # of procs waiting on this
408 * sema */ 408 * sema */
409 int l_ticket_cnt; /* free ticket count */
410 int l_ticket_tcnt; /* total ticket count */
411 int l_covered_state;/* state of "covering disk 409 int l_covered_state;/* state of "covering disk
412 * log entries" */ 410 * log entries" */
413 xlog_ticket_t *l_freelist; /* free list of tickets */
414 xlog_ticket_t *l_unmount_free;/* kmem_free these addresses */
415 xlog_ticket_t *l_tail; /* free list of tickets */
416 xlog_in_core_t *l_iclog; /* head log queue */ 411 xlog_in_core_t *l_iclog; /* head log queue */
417 spinlock_t l_icloglock; /* grab to change iclog state */ 412 spinlock_t l_icloglock; /* grab to change iclog state */
418 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed 413 xfs_lsn_t l_tail_lsn; /* lsn of 1st LR with unflushed
@@ -478,6 +473,8 @@ extern struct xfs_buf *xlog_get_bp(xlog_t *, int);
478extern void xlog_put_bp(struct xfs_buf *); 473extern void xlog_put_bp(struct xfs_buf *);
479extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *); 474extern int xlog_bread(xlog_t *, xfs_daddr_t, int, struct xfs_buf *);
480 475
476extern kmem_zone_t *xfs_log_ticket_zone;
477
481/* iclog tracing */ 478/* iclog tracing */
482#define XLOG_TRACE_GRAB_FLUSH 1 479#define XLOG_TRACE_GRAB_FLUSH 1
483#define XLOG_TRACE_REL_FLUSH 2 480#define XLOG_TRACE_REL_FLUSH 2
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index c21e4d168297..ea94593b5313 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -69,15 +69,17 @@ xfs_init(void)
69 /* 69 /*
70 * Initialize all of the zone allocators we use. 70 * Initialize all of the zone allocators we use.
71 */ 71 */
72 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
73 "xfs_log_ticket");
72 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), 74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
73 "xfs_bmap_free_item"); 75 "xfs_bmap_free_item");
74 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), 76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
75 "xfs_btree_cur"); 77 "xfs_btree_cur");
76 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); 78 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
77 xfs_da_state_zone = 79 "xfs_da_state");
78 kmem_zone_init(sizeof(xfs_da_state_t), "xfs_da_state");
79 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); 80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
80 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); 81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
82 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
81 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); 83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
82 xfs_mru_cache_init(); 84 xfs_mru_cache_init();
83 xfs_filestream_init(); 85 xfs_filestream_init();