aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-12-20 20:29:14 -0500
committerDave Chinner <david@fromorbit.com>2010-12-20 20:29:14 -0500
commitd0eb2f38b250b7d6c993adf81b0e4ded0565497e (patch)
tree660923947f3f7d412bd65c58ba9d2e4c8835320b /fs
parent3f16b9850743b702380f098ab5e0308cd6af1792 (diff)
xfs: convert grant head manipulations to lockless algorithm
The only thing that the grant lock remains to protect is the grant head manipulations when adding or removing space from the log. These calculations are already based on atomic variables, so we can already update them safely without locks. However, the grant head manpulations require atomic multi-step calculations to be executed, which the algorithms currently don't allow. To make these multi-step calculations atomic, convert the algorithms to compare-and-exchange loops on the atomic variables. That is, we sample the old value, perform the calculation and use atomic64_cmpxchg() to attempt to update the head with the new value. If the head has not changed since we sampled it, it will succeed and we are done. Otherwise, we rerun the calculation again from a new sample of the head. This allows us to remove the grant lock from around all the grant head space manipulations, and that effectively removes the grant lock from the log completely. Hence we can remove the grant lock completely from the log at this point. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log.c103
-rw-r--r--fs/xfs/xfs_log_priv.h23
2 files changed, 49 insertions, 77 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 6fcc9d0af524..0bf24b11d0c4 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -81,7 +81,6 @@ STATIC void xlog_ungrant_log_space(xlog_t *log,
81 81
82#if defined(DEBUG) 82#if defined(DEBUG)
83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); 83STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
84STATIC void xlog_verify_grant_head(xlog_t *log, int equals);
85STATIC void xlog_verify_grant_tail(struct log *log); 84STATIC void xlog_verify_grant_tail(struct log *log);
86STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, 85STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
87 int count, boolean_t syncing); 86 int count, boolean_t syncing);
@@ -89,7 +88,6 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
89 xfs_lsn_t tail_lsn); 88 xfs_lsn_t tail_lsn);
90#else 89#else
91#define xlog_verify_dest_ptr(a,b) 90#define xlog_verify_dest_ptr(a,b)
92#define xlog_verify_grant_head(a,b)
93#define xlog_verify_grant_tail(a) 91#define xlog_verify_grant_tail(a)
94#define xlog_verify_iclog(a,b,c,d) 92#define xlog_verify_iclog(a,b,c,d)
95#define xlog_verify_tail_lsn(a,b,c) 93#define xlog_verify_tail_lsn(a,b,c)
@@ -103,17 +101,24 @@ xlog_grant_sub_space(
103 atomic64_t *head, 101 atomic64_t *head,
104 int bytes) 102 int bytes)
105{ 103{
106 int cycle, space; 104 int64_t head_val = atomic64_read(head);
105 int64_t new, old;
107 106
108 xlog_crack_grant_head(head, &cycle, &space); 107 do {
108 int cycle, space;
109 109
110 space -= bytes; 110 xlog_crack_grant_head_val(head_val, &cycle, &space);
111 if (space < 0) {
112 space += log->l_logsize;
113 cycle--;
114 }
115 111
116 xlog_assign_grant_head(head, cycle, space); 112 space -= bytes;
113 if (space < 0) {
114 space += log->l_logsize;
115 cycle--;
116 }
117
118 old = head_val;
119 new = xlog_assign_grant_head_val(cycle, space);
120 head_val = atomic64_cmpxchg(head, old, new);
121 } while (head_val != old);
117} 122}
118 123
119static void 124static void
@@ -122,20 +127,27 @@ xlog_grant_add_space(
122 atomic64_t *head, 127 atomic64_t *head,
123 int bytes) 128 int bytes)
124{ 129{
125 int tmp; 130 int64_t head_val = atomic64_read(head);
126 int cycle, space; 131 int64_t new, old;
127 132
128 xlog_crack_grant_head(head, &cycle, &space); 133 do {
134 int tmp;
135 int cycle, space;
129 136
130 tmp = log->l_logsize - space; 137 xlog_crack_grant_head_val(head_val, &cycle, &space);
131 if (tmp > bytes)
132 space += bytes;
133 else {
134 space = bytes - tmp;
135 cycle++;
136 }
137 138
138 xlog_assign_grant_head(head, cycle, space); 139 tmp = log->l_logsize - space;
140 if (tmp > bytes)
141 space += bytes;
142 else {
143 space = bytes - tmp;
144 cycle++;
145 }
146
147 old = head_val;
148 new = xlog_assign_grant_head_val(cycle, space);
149 head_val = atomic64_cmpxchg(head, old, new);
150 } while (head_val != old);
139} 151}
140 152
141static void 153static void
@@ -318,9 +330,7 @@ xfs_log_reserve(
318 330
319 trace_xfs_log_reserve(log, internal_ticket); 331 trace_xfs_log_reserve(log, internal_ticket);
320 332
321 spin_lock(&log->l_grant_lock);
322 xlog_grant_push_ail(log, internal_ticket->t_unit_res); 333 xlog_grant_push_ail(log, internal_ticket->t_unit_res);
323 spin_unlock(&log->l_grant_lock);
324 retval = xlog_regrant_write_log_space(log, internal_ticket); 334 retval = xlog_regrant_write_log_space(log, internal_ticket);
325 } else { 335 } else {
326 /* may sleep if need to allocate more tickets */ 336 /* may sleep if need to allocate more tickets */
@@ -334,11 +344,9 @@ xfs_log_reserve(
334 344
335 trace_xfs_log_reserve(log, internal_ticket); 345 trace_xfs_log_reserve(log, internal_ticket);
336 346
337 spin_lock(&log->l_grant_lock);
338 xlog_grant_push_ail(log, 347 xlog_grant_push_ail(log,
339 (internal_ticket->t_unit_res * 348 (internal_ticket->t_unit_res *
340 internal_ticket->t_cnt)); 349 internal_ticket->t_cnt));
341 spin_unlock(&log->l_grant_lock);
342 retval = xlog_grant_log_space(log, internal_ticket); 350 retval = xlog_grant_log_space(log, internal_ticket);
343 } 351 }
344 352
@@ -1057,7 +1065,6 @@ xlog_alloc_log(xfs_mount_t *mp,
1057 log->l_xbuf = bp; 1065 log->l_xbuf = bp;
1058 1066
1059 spin_lock_init(&log->l_icloglock); 1067 spin_lock_init(&log->l_icloglock);
1060 spin_lock_init(&log->l_grant_lock);
1061 init_waitqueue_head(&log->l_flush_wait); 1068 init_waitqueue_head(&log->l_flush_wait);
1062 1069
1063 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1070 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
@@ -1135,7 +1142,6 @@ out_free_iclog:
1135 kmem_free(iclog); 1142 kmem_free(iclog);
1136 } 1143 }
1137 spinlock_destroy(&log->l_icloglock); 1144 spinlock_destroy(&log->l_icloglock);
1138 spinlock_destroy(&log->l_grant_lock);
1139 xfs_buf_free(log->l_xbuf); 1145 xfs_buf_free(log->l_xbuf);
1140out_free_log: 1146out_free_log:
1141 kmem_free(log); 1147 kmem_free(log);
@@ -1331,10 +1337,8 @@ xlog_sync(xlog_t *log,
1331 roundoff < BBTOB(1))); 1337 roundoff < BBTOB(1)));
1332 1338
1333 /* move grant heads by roundoff in sync */ 1339 /* move grant heads by roundoff in sync */
1334 spin_lock(&log->l_grant_lock);
1335 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); 1340 xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
1336 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); 1341 xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
1337 spin_unlock(&log->l_grant_lock);
1338 1342
1339 /* put cycle number in every block */ 1343 /* put cycle number in every block */
1340 xlog_pack_data(log, iclog, roundoff); 1344 xlog_pack_data(log, iclog, roundoff);
@@ -1455,7 +1459,6 @@ xlog_dealloc_log(xlog_t *log)
1455 iclog = next_iclog; 1459 iclog = next_iclog;
1456 } 1460 }
1457 spinlock_destroy(&log->l_icloglock); 1461 spinlock_destroy(&log->l_icloglock);
1458 spinlock_destroy(&log->l_grant_lock);
1459 1462
1460 xfs_buf_free(log->l_xbuf); 1463 xfs_buf_free(log->l_xbuf);
1461 log->l_mp->m_log = NULL; 1464 log->l_mp->m_log = NULL;
@@ -2574,13 +2577,10 @@ redo:
2574 } 2577 }
2575 2578
2576 /* we've got enough space */ 2579 /* we've got enough space */
2577 spin_lock(&log->l_grant_lock);
2578 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); 2580 xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
2579 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2581 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2580 trace_xfs_log_grant_exit(log, tic); 2582 trace_xfs_log_grant_exit(log, tic);
2581 xlog_verify_grant_head(log, 1);
2582 xlog_verify_grant_tail(log); 2583 xlog_verify_grant_tail(log);
2583 spin_unlock(&log->l_grant_lock);
2584 return 0; 2584 return 0;
2585 2585
2586error_return_unlocked: 2586error_return_unlocked:
@@ -2694,12 +2694,9 @@ redo:
2694 } 2694 }
2695 2695
2696 /* we've got enough space */ 2696 /* we've got enough space */
2697 spin_lock(&log->l_grant_lock);
2698 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); 2697 xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
2699 trace_xfs_log_regrant_write_exit(log, tic); 2698 trace_xfs_log_regrant_write_exit(log, tic);
2700 xlog_verify_grant_head(log, 1);
2701 xlog_verify_grant_tail(log); 2699 xlog_verify_grant_tail(log);
2702 spin_unlock(&log->l_grant_lock);
2703 return 0; 2700 return 0;
2704 2701
2705 2702
@@ -2737,7 +2734,6 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2737 if (ticket->t_cnt > 0) 2734 if (ticket->t_cnt > 0)
2738 ticket->t_cnt--; 2735 ticket->t_cnt--;
2739 2736
2740 spin_lock(&log->l_grant_lock);
2741 xlog_grant_sub_space(log, &log->l_grant_reserve_head, 2737 xlog_grant_sub_space(log, &log->l_grant_reserve_head,
2742 ticket->t_curr_res); 2738 ticket->t_curr_res);
2743 xlog_grant_sub_space(log, &log->l_grant_write_head, 2739 xlog_grant_sub_space(log, &log->l_grant_write_head,
@@ -2747,21 +2743,15 @@ xlog_regrant_reserve_log_space(xlog_t *log,
2747 2743
2748 trace_xfs_log_regrant_reserve_sub(log, ticket); 2744 trace_xfs_log_regrant_reserve_sub(log, ticket);
2749 2745
2750 xlog_verify_grant_head(log, 1);
2751
2752 /* just return if we still have some of the pre-reserved space */ 2746 /* just return if we still have some of the pre-reserved space */
2753 if (ticket->t_cnt > 0) { 2747 if (ticket->t_cnt > 0)
2754 spin_unlock(&log->l_grant_lock);
2755 return; 2748 return;
2756 }
2757 2749
2758 xlog_grant_add_space(log, &log->l_grant_reserve_head, 2750 xlog_grant_add_space(log, &log->l_grant_reserve_head,
2759 ticket->t_unit_res); 2751 ticket->t_unit_res);
2760 2752
2761 trace_xfs_log_regrant_reserve_exit(log, ticket); 2753 trace_xfs_log_regrant_reserve_exit(log, ticket);
2762 2754
2763 xlog_verify_grant_head(log, 0);
2764 spin_unlock(&log->l_grant_lock);
2765 ticket->t_curr_res = ticket->t_unit_res; 2755 ticket->t_curr_res = ticket->t_unit_res;
2766 xlog_tic_reset_res(ticket); 2756 xlog_tic_reset_res(ticket);
2767} /* xlog_regrant_reserve_log_space */ 2757} /* xlog_regrant_reserve_log_space */
@@ -2790,7 +2780,6 @@ xlog_ungrant_log_space(xlog_t *log,
2790 if (ticket->t_cnt > 0) 2780 if (ticket->t_cnt > 0)
2791 ticket->t_cnt--; 2781 ticket->t_cnt--;
2792 2782
2793 spin_lock(&log->l_grant_lock);
2794 trace_xfs_log_ungrant_enter(log, ticket); 2783 trace_xfs_log_ungrant_enter(log, ticket);
2795 trace_xfs_log_ungrant_sub(log, ticket); 2784 trace_xfs_log_ungrant_sub(log, ticket);
2796 2785
@@ -2809,8 +2798,6 @@ xlog_ungrant_log_space(xlog_t *log,
2809 2798
2810 trace_xfs_log_ungrant_exit(log, ticket); 2799 trace_xfs_log_ungrant_exit(log, ticket);
2811 2800
2812 xlog_verify_grant_head(log, 1);
2813 spin_unlock(&log->l_grant_lock);
2814 xfs_log_move_tail(log->l_mp, 1); 2801 xfs_log_move_tail(log->l_mp, 1);
2815} /* xlog_ungrant_log_space */ 2802} /* xlog_ungrant_log_space */
2816 2803
@@ -3429,28 +3416,6 @@ xlog_verify_dest_ptr(
3429} 3416}
3430 3417
3431STATIC void 3418STATIC void
3432xlog_verify_grant_head(xlog_t *log, int equals)
3433{
3434 int reserve_cycle, reserve_space;
3435 int write_cycle, write_space;
3436
3437 xlog_crack_grant_head(&log->l_grant_reserve_head,
3438 &reserve_cycle, &reserve_space);
3439 xlog_crack_grant_head(&log->l_grant_write_head,
3440 &write_cycle, &write_space);
3441
3442 if (reserve_cycle == write_cycle) {
3443 if (equals)
3444 ASSERT(reserve_space >= write_space);
3445 else
3446 ASSERT(reserve_space > write_space);
3447 } else {
3448 ASSERT(reserve_cycle - 1 == write_cycle);
3449 ASSERT(write_space >= reserve_space);
3450 }
3451}
3452
3453STATIC void
3454xlog_verify_grant_tail( 3419xlog_verify_grant_tail(
3455 struct log *log) 3420 struct log *log)
3456{ 3421{
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index befb2fc5b027..d5f8be8f4bf6 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -510,9 +510,6 @@ typedef struct log {
510 int l_curr_block; /* current logical log block */ 510 int l_curr_block; /* current logical log block */
511 int l_prev_block; /* previous logical log block */ 511 int l_prev_block; /* previous logical log block */
512 512
513 /* The following block of fields are changed while holding grant_lock */
514 spinlock_t l_grant_lock ____cacheline_aligned_in_smp;
515
516 /* 513 /*
517 * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and 514 * l_last_sync_lsn and l_tail_lsn are atomics so they can be set and
518 * read without needing to hold specific locks. To avoid operations 515 * read without needing to hold specific locks. To avoid operations
@@ -599,23 +596,33 @@ xlog_assign_atomic_lsn(atomic64_t *lsn, uint cycle, uint block)
599} 596}
600 597
601/* 598/*
602 * When we crack the grrant head, we sample it first so that the value will not 599 * When we crack the grant head, we sample it first so that the value will not
603 * change while we are cracking it into the component values. This means we 600 * change while we are cracking it into the component values. This means we
604 * will always get consistent component values to work from. 601 * will always get consistent component values to work from.
605 */ 602 */
606static inline void 603static inline void
607xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space) 604xlog_crack_grant_head_val(int64_t val, int *cycle, int *space)
608{ 605{
609 int64_t val = atomic64_read(head);
610
611 *cycle = val >> 32; 606 *cycle = val >> 32;
612 *space = val & 0xffffffff; 607 *space = val & 0xffffffff;
613} 608}
614 609
615static inline void 610static inline void
611xlog_crack_grant_head(atomic64_t *head, int *cycle, int *space)
612{
613 xlog_crack_grant_head_val(atomic64_read(head), cycle, space);
614}
615
616static inline int64_t
617xlog_assign_grant_head_val(int cycle, int space)
618{
619 return ((int64_t)cycle << 32) | space;
620}
621
622static inline void
616xlog_assign_grant_head(atomic64_t *head, int cycle, int space) 623xlog_assign_grant_head(atomic64_t *head, int cycle, int space)
617{ 624{
618 atomic64_set(head, ((int64_t)cycle << 32) | space); 625 atomic64_set(head, xlog_assign_grant_head_val(cycle, space));
619} 626}
620 627
621/* 628/*