aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2013-08-12 06:50:07 -0400
committerBen Myers <bpm@sgi.com>2013-08-13 17:20:09 -0400
commit991aaf65ff0addc2692cfa8dc1ff082dcf69d26f (patch)
treebe01d8b5f7ba7e51902a1d8efc8145bb9737948b /fs/xfs
parentf5baac354db8b6abfe8ed4ff6b6c3438c42ea606 (diff)
xfs: Combine CIL insert and prepare passes
Now that all the log item preparation and formatting is done under the CIL lock, we can get rid of the intermediate log vector chain used to track items to be inserted into the CIL. We can already find all the items to be committed from the transaction handle, so as long as we attach the log vectors to the item before we insert the items into the CIL, we don't need to create a log vector chain to pass around. This means we can move all the item insertion code into and optimise it into a pair of simple passes across all the items in the transaction. The first pass does the formatting and accounting, the second inserts them all into the CIL. We keep this two pass split so that we can separate the CIL insertion - which must be done under the CIL spinlock - from the formatting. We could insert each item into the CIL with a single pass, but that massively increases the number of times we have to grab the CIL spinlock. It is much more efficient (and hence scalable) to do a batch operation and insert all objects in a single lock grab. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_log_cil.c227
1 files changed, 100 insertions, 127 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b20b15761e9c..c1a3384406fd 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -111,6 +111,53 @@ xlog_cil_lv_item_format(
111} 111}
112 112
113/* 113/*
114 * Prepare the log item for insertion into the CIL. Calculate the difference in
115 * log space and vectors it will consume, and if it is a new item pin it as
116 * well.
117 */
118STATIC void
119xfs_cil_prepare_item(
120 struct xlog *log,
121 struct xfs_log_vec *lv,
122 struct xfs_log_vec *old_lv,
123 int *diff_len,
124 int *diff_iovecs)
125{
126 /* Account for the new LV being passed in */
127 if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
128 *diff_len += lv->lv_buf_len;
129 *diff_iovecs += lv->lv_niovecs;
130 }
131
132 /*
133 * If there is no old LV, this is the first time we've seen the item in
134 * this CIL context and so we need to pin it. If we are replacing the
135 * old_lv, then remove the space it accounts for and free it.
136 */
137 if (!old_lv)
138 lv->lv_item->li_ops->iop_pin(lv->lv_item);
139 else if (old_lv != lv) {
140 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
141
142 *diff_len -= old_lv->lv_buf_len;
143 *diff_iovecs -= old_lv->lv_niovecs;
144 kmem_free(old_lv);
145 }
146
147 /* attach new log vector to log item */
148 lv->lv_item->li_lv = lv;
149
150 /*
151 * If this is the first time the item is being committed to the
152 * CIL, store the sequence number on the log item so we can
153 * tell in future commits whether this is the first checkpoint
154 * the item is being committed into.
155 */
156 if (!lv->lv_item->li_seq)
157 lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
158}
159
160/*
114 * Format log item into a flat buffers 161 * Format log item into a flat buffers
115 * 162 *
116 * For delayed logging, we need to hold a formatted buffer containing all the 163 * For delayed logging, we need to hold a formatted buffer containing all the
@@ -136,24 +183,26 @@ xlog_cil_lv_item_format(
136 * format the regions into the iclog as though they are being formatted 183 * format the regions into the iclog as though they are being formatted
137 * directly out of the objects themselves. 184 * directly out of the objects themselves.
138 */ 185 */
139static struct xfs_log_vec * 186static void
140xlog_cil_prepare_log_vecs( 187xlog_cil_insert_format_items(
141 struct xfs_trans *tp) 188 struct xlog *log,
189 struct xfs_trans *tp,
190 int *diff_len,
191 int *diff_iovecs)
142{ 192{
143 struct xfs_log_item_desc *lidp; 193 struct xfs_log_item_desc *lidp;
144 struct xfs_log_vec *prev_lv = NULL;
145 struct xfs_log_vec *ret_lv = NULL;
146 194
147 195
148 /* Bail out if we didn't find a log item. */ 196 /* Bail out if we didn't find a log item. */
149 if (list_empty(&tp->t_items)) { 197 if (list_empty(&tp->t_items)) {
150 ASSERT(0); 198 ASSERT(0);
151 return NULL; 199 return;
152 } 200 }
153 201
154 list_for_each_entry(lidp, &tp->t_items, lid_trans) { 202 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
155 struct xfs_log_item *lip = lidp->lid_item; 203 struct xfs_log_item *lip = lidp->lid_item;
156 struct xfs_log_vec *lv; 204 struct xfs_log_vec *lv;
205 struct xfs_log_vec *old_lv;
157 int niovecs = 0; 206 int niovecs = 0;
158 int nbytes = 0; 207 int nbytes = 0;
159 int buf_size; 208 int buf_size;
@@ -181,6 +230,9 @@ xlog_cil_prepare_log_vecs(
181 nbytes = 0; 230 nbytes = 0;
182 } 231 }
183 232
233 /* grab the old item if it exists for reservation accounting */
234 old_lv = lip->li_lv;
235
184 /* calc buffer size */ 236 /* calc buffer size */
185 buf_size = sizeof(struct xfs_log_vec) + nbytes + 237 buf_size = sizeof(struct xfs_log_vec) + nbytes +
186 niovecs * sizeof(struct xfs_log_iovec); 238 niovecs * sizeof(struct xfs_log_iovec);
@@ -194,9 +246,17 @@ xlog_cil_prepare_log_vecs(
194 if (ordered) 246 if (ordered)
195 goto insert; 247 goto insert;
196 248
249 /*
250 * set the item up as though it is a new insertion so
251 * that the space reservation accounting is correct.
252 */
253 *diff_iovecs -= lv->lv_niovecs;
254 *diff_len -= lv->lv_buf_len;
255
197 /* Ensure the lv is set up according to ->iop_size */ 256 /* Ensure the lv is set up according to ->iop_size */
198 lv->lv_niovecs = niovecs; 257 lv->lv_niovecs = niovecs;
199 lv->lv_buf = (char *)lv + buf_size - nbytes; 258 lv->lv_buf = (char *)lv + buf_size - nbytes;
259
200 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); 260 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
201 goto insert; 261 goto insert;
202 } 262 }
@@ -222,74 +282,8 @@ xlog_cil_prepare_log_vecs(
222 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); 282 lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv);
223insert: 283insert:
224 ASSERT(lv->lv_buf_len <= nbytes); 284 ASSERT(lv->lv_buf_len <= nbytes);
225 if (!ret_lv) 285 xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
226 ret_lv = lv;
227 else
228 prev_lv->lv_next = lv;
229 prev_lv = lv;
230 }
231
232 return ret_lv;
233}
234
235/*
236 * Prepare the log item for insertion into the CIL. Calculate the difference in
237 * log space and vectors it will consume, and if it is a new item pin it as
238 * well.
239 */
240STATIC void
241xfs_cil_prepare_item(
242 struct xlog *log,
243 struct xfs_log_vec *lv,
244 int *len,
245 int *diff_iovecs)
246{
247 struct xfs_log_vec *old = lv->lv_item->li_lv;
248
249 if (!old) {
250 /* new lv, must pin the log item */
251 ASSERT(!lv->lv_item->li_lv);
252
253 if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
254 *len += lv->lv_buf_len;
255 *diff_iovecs += lv->lv_niovecs;
256 }
257 lv->lv_item->li_ops->iop_pin(lv->lv_item);
258
259 } else if (old != lv) {
260 /* existing lv on log item, space used is a delta */
261 ASSERT((old->lv_buf && old->lv_buf_len && old->lv_niovecs) ||
262 old->lv_buf_len == XFS_LOG_VEC_ORDERED);
263
264 /*
265 * If the new item is ordered, keep the old one that is already
266 * tracking dirty or ordered regions
267 */
268 if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
269 ASSERT(!lv->lv_buf);
270 kmem_free(lv);
271 return;
272 }
273
274 *len += lv->lv_buf_len - old->lv_buf_len;
275 *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
276 kmem_free(old);
277 } else {
278 /* re-used lv */
279 /* XXX: can't account for len/diff_iovecs yet */
280 } 286 }
281
282 /* attach new log vector to log item */
283 lv->lv_item->li_lv = lv;
284
285 /*
286 * If this is the first time the item is being committed to the
287 * CIL, store the sequence number on the log item so we can
288 * tell in future commits whether this is the first checkpoint
289 * the item is being committed into.
290 */
291 if (!lv->lv_item->li_seq)
292 lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
293} 287}
294 288
295/* 289/*
@@ -302,53 +296,47 @@ xfs_cil_prepare_item(
302static void 296static void
303xlog_cil_insert_items( 297xlog_cil_insert_items(
304 struct xlog *log, 298 struct xlog *log,
305 struct xfs_log_vec *log_vector, 299 struct xfs_trans *tp)
306 struct xlog_ticket *ticket)
307{ 300{
308 struct xfs_cil *cil = log->l_cilp; 301 struct xfs_cil *cil = log->l_cilp;
309 struct xfs_cil_ctx *ctx = cil->xc_ctx; 302 struct xfs_cil_ctx *ctx = cil->xc_ctx;
310 struct xfs_log_vec *lv; 303 struct xfs_log_item_desc *lidp;
311 int len = 0; 304 int len = 0;
312 int diff_iovecs = 0; 305 int diff_iovecs = 0;
313 int iclog_space; 306 int iclog_space;
314 307
315 ASSERT(log_vector); 308 ASSERT(tp);
316 309
317 /* 310 /*
318 * Do all the accounting aggregation and switching of log vectors
319 * around in a separate loop to the insertion of items into the CIL.
320 * Then we can do a separate loop to update the CIL within a single
321 * lock/unlock pair. This reduces the number of round trips on the CIL
322 * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
323 * hold time for the transaction commit.
324 *
325 * If this is the first time the item is being placed into the CIL in
326 * this context, pin it so it can't be written to disk until the CIL is
327 * flushed to the iclog and the iclog written to disk.
328 *
329 * We can do this safely because the context can't checkpoint until we 311 * We can do this safely because the context can't checkpoint until we
330 * are done so it doesn't matter exactly how we update the CIL. 312 * are done so it doesn't matter exactly how we update the CIL.
331 */ 313 */
314 xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
315
316 /*
317 * Now (re-)position everything modified at the tail of the CIL.
318 * We do this here so we only need to take the CIL lock once during
319 * the transaction commit.
320 */
332 spin_lock(&cil->xc_cil_lock); 321 spin_lock(&cil->xc_cil_lock);
333 for (lv = log_vector; lv; ) { 322 list_for_each_entry(lidp, &tp->t_items, lid_trans) {
334 struct xfs_log_vec *next = lv->lv_next; 323 struct xfs_log_item *lip = lidp->lid_item;
335 324
336 ASSERT(lv->lv_item->li_lv || list_empty(&lv->lv_item->li_cil)); 325 /* Skip items which aren't dirty in this transaction. */
337 lv->lv_next = NULL; 326 if (!(lidp->lid_flags & XFS_LID_DIRTY))
327 continue;
338 328
339 /* 329 list_move_tail(&lip->li_cil, &cil->xc_cil);
340 * xfs_cil_prepare_item() may free the lv, so move the item on
341 * the CIL first.
342 */
343 list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
344 xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
345 lv = next;
346 } 330 }
347 331
348 /* account for space used by new iovec headers */ 332 /* account for space used by new iovec headers */
349 len += diff_iovecs * sizeof(xlog_op_header_t); 333 len += diff_iovecs * sizeof(xlog_op_header_t);
350 ctx->nvecs += diff_iovecs; 334 ctx->nvecs += diff_iovecs;
351 335
336 /* attach the transaction to the CIL if it has any busy extents */
337 if (!list_empty(&tp->t_busy))
338 list_splice_init(&tp->t_busy, &ctx->busy_extents);
339
352 /* 340 /*
353 * Now transfer enough transaction reservation to the context ticket 341 * Now transfer enough transaction reservation to the context ticket
354 * for the checkpoint. The context ticket is special - the unit 342 * for the checkpoint. The context ticket is special - the unit
@@ -357,10 +345,8 @@ xlog_cil_insert_items(
357 * during the transaction commit. 345 * during the transaction commit.
358 */ 346 */
359 if (ctx->ticket->t_curr_res == 0) { 347 if (ctx->ticket->t_curr_res == 0) {
360 /* first commit in checkpoint, steal the header reservation */
361 ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
362 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; 348 ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
363 ticket->t_curr_res -= ctx->ticket->t_unit_res; 349 tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
364 } 350 }
365 351
366 /* do we need space for more log record headers? */ 352 /* do we need space for more log record headers? */
@@ -374,10 +360,10 @@ xlog_cil_insert_items(
374 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); 360 hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
375 ctx->ticket->t_unit_res += hdrs; 361 ctx->ticket->t_unit_res += hdrs;
376 ctx->ticket->t_curr_res += hdrs; 362 ctx->ticket->t_curr_res += hdrs;
377 ticket->t_curr_res -= hdrs; 363 tp->t_ticket->t_curr_res -= hdrs;
378 ASSERT(ticket->t_curr_res >= len); 364 ASSERT(tp->t_ticket->t_curr_res >= len);
379 } 365 }
380 ticket->t_curr_res -= len; 366 tp->t_ticket->t_curr_res -= len;
381 ctx->space_used += len; 367 ctx->space_used += len;
382 368
383 spin_unlock(&cil->xc_cil_lock); 369 spin_unlock(&cil->xc_cil_lock);
@@ -746,38 +732,25 @@ xfs_log_commit_cil(
746 int flags) 732 int flags)
747{ 733{
748 struct xlog *log = mp->m_log; 734 struct xlog *log = mp->m_log;
735 struct xfs_cil *cil = log->l_cilp;
749 int log_flags = 0; 736 int log_flags = 0;
750 struct xfs_log_vec *log_vector;
751 737
752 if (flags & XFS_TRANS_RELEASE_LOG_RES) 738 if (flags & XFS_TRANS_RELEASE_LOG_RES)
753 log_flags = XFS_LOG_REL_PERM_RESERV; 739 log_flags = XFS_LOG_REL_PERM_RESERV;
754 740
755 /* lock out background commit */ 741 /* lock out background commit */
756 down_read(&log->l_cilp->xc_ctx_lock); 742 down_read(&cil->xc_ctx_lock);
757 743
758 log_vector = xlog_cil_prepare_log_vecs(tp); 744 xlog_cil_insert_items(log, tp);
759 if (!log_vector)
760 return ENOMEM;
761
762 if (commit_lsn)
763 *commit_lsn = log->l_cilp->xc_ctx->sequence;
764
765 /* xlog_cil_insert_items() destroys log_vector list */
766 xlog_cil_insert_items(log, log_vector, tp->t_ticket);
767 745
768 /* check we didn't blow the reservation */ 746 /* check we didn't blow the reservation */
769 if (tp->t_ticket->t_curr_res < 0) 747 if (tp->t_ticket->t_curr_res < 0)
770 xlog_print_tic_res(log->l_mp, tp->t_ticket); 748 xlog_print_tic_res(mp, tp->t_ticket);
771 749
772 /* attach the transaction to the CIL if it has any busy extents */ 750 tp->t_commit_lsn = cil->xc_ctx->sequence;
773 if (!list_empty(&tp->t_busy)) { 751 if (commit_lsn)
774 spin_lock(&log->l_cilp->xc_cil_lock); 752 *commit_lsn = tp->t_commit_lsn;
775 list_splice_init(&tp->t_busy,
776 &log->l_cilp->xc_ctx->busy_extents);
777 spin_unlock(&log->l_cilp->xc_cil_lock);
778 }
779 753
780 tp->t_commit_lsn = *commit_lsn;
781 xfs_log_done(mp, tp->t_ticket, NULL, log_flags); 754 xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
782 xfs_trans_unreserve_and_mod_sb(tp); 755 xfs_trans_unreserve_and_mod_sb(tp);
783 756
@@ -792,11 +765,11 @@ xfs_log_commit_cil(
792 * the log items. This affects (at least) processing of stale buffers, 765 * the log items. This affects (at least) processing of stale buffers,
793 * inodes and EFIs. 766 * inodes and EFIs.
794 */ 767 */
795 xfs_trans_free_items(tp, *commit_lsn, 0); 768 xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
796 769
797 xlog_cil_push_background(log); 770 xlog_cil_push_background(log);
798 771
799 up_read(&log->l_cilp->xc_ctx_lock); 772 up_read(&cil->xc_ctx_lock);
800 return 0; 773 return 0;
801} 774}
802 775